openmetadata-ingestion 1.7.0.1__py3-none-any.whl → 1.7.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openmetadata-ingestion might be problematic. Click here for more details.
- metadata/applications/example.py +74 -0
- metadata/cli/classify.py +2 -4
- metadata/cli/common.py +26 -0
- metadata/cli/dataquality.py +2 -4
- metadata/cli/ingest.py +2 -4
- metadata/cli/profile.py +2 -4
- metadata/cli/usage.py +2 -4
- metadata/data_quality/source/test_suite.py +11 -1
- metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeUnique.py +14 -2
- metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py +1 -1
- metadata/data_quality/validations/models.py +3 -0
- metadata/data_quality/validations/runtime_param_setter/base_diff_params_setter.py +120 -0
- metadata/data_quality/validations/runtime_param_setter/table_diff_params_setter.py +61 -47
- metadata/data_quality/validations/table/sqlalchemy/tableDiff.py +24 -0
- metadata/examples/workflows/tableau.yaml +0 -3
- metadata/generated/schema/analytics/__init__.py +1 -1
- metadata/generated/schema/analytics/basic.py +1 -1
- metadata/generated/schema/analytics/reportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
- metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
- metadata/generated/schema/api/__init__.py +1 -1
- metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
- metadata/generated/schema/api/addTagToAssetsRequest.py +1 -1
- metadata/generated/schema/api/analytics/__init__.py +1 -1
- metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
- metadata/generated/schema/api/automations/__init__.py +1 -1
- metadata/generated/schema/api/automations/createWorkflow.py +1 -1
- metadata/generated/schema/api/bulkAssets.py +1 -1
- metadata/generated/schema/api/classification/__init__.py +1 -1
- metadata/generated/schema/api/classification/createClassification.py +1 -1
- metadata/generated/schema/api/classification/createTag.py +1 -1
- metadata/generated/schema/api/classification/loadTags.py +1 -1
- metadata/generated/schema/api/createBot.py +1 -1
- metadata/generated/schema/api/createEventPublisherJob.py +1 -1
- metadata/generated/schema/api/createType.py +1 -1
- metadata/generated/schema/api/data/__init__.py +1 -1
- metadata/generated/schema/api/data/createAPICollection.py +1 -1
- metadata/generated/schema/api/data/createAPIEndpoint.py +1 -1
- metadata/generated/schema/api/data/createChart.py +1 -1
- metadata/generated/schema/api/data/createContainer.py +1 -1
- metadata/generated/schema/api/data/createCustomProperty.py +1 -1
- metadata/generated/schema/api/data/createDashboard.py +1 -1
- metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
- metadata/generated/schema/api/data/createDatabase.py +1 -1
- metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
- metadata/generated/schema/api/data/createGlossary.py +1 -1
- metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
- metadata/generated/schema/api/data/createMetric.py +1 -1
- metadata/generated/schema/api/data/createMlModel.py +1 -1
- metadata/generated/schema/api/data/createPipeline.py +1 -1
- metadata/generated/schema/api/data/createQuery.py +1 -1
- metadata/generated/schema/api/data/createQueryCostRecord.py +1 -1
- metadata/generated/schema/api/data/createSearchIndex.py +1 -1
- metadata/generated/schema/api/data/createStoredProcedure.py +1 -1
- metadata/generated/schema/api/data/createTable.py +1 -1
- metadata/generated/schema/api/data/createTableProfile.py +1 -1
- metadata/generated/schema/api/data/createTopic.py +1 -1
- metadata/generated/schema/api/data/loadGlossary.py +1 -1
- metadata/generated/schema/api/data/restoreEntity.py +1 -1
- metadata/generated/schema/api/dataInsight/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
- metadata/generated/schema/api/dataInsight/custom/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/custom/createDataInsightCustomChart.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
- metadata/generated/schema/api/docStore/__init__.py +1 -1
- metadata/generated/schema/api/docStore/createDocument.py +1 -1
- metadata/generated/schema/api/domains/__init__.py +1 -1
- metadata/generated/schema/api/domains/createDataProduct.py +1 -1
- metadata/generated/schema/api/domains/createDomain.py +1 -1
- metadata/generated/schema/api/feed/__init__.py +1 -1
- metadata/generated/schema/api/feed/closeTask.py +1 -1
- metadata/generated/schema/api/feed/createPost.py +1 -1
- metadata/generated/schema/api/feed/createSuggestion.py +1 -1
- metadata/generated/schema/api/feed/createThread.py +1 -1
- metadata/generated/schema/api/feed/resolveTask.py +1 -1
- metadata/generated/schema/api/feed/threadCount.py +1 -1
- metadata/generated/schema/api/governance/__init__.py +1 -1
- metadata/generated/schema/api/governance/createWorkflowDefinition.py +1 -1
- metadata/generated/schema/api/governance/createWorkflowInstanceState.py +1 -1
- metadata/generated/schema/api/lineage/__init__.py +1 -1
- metadata/generated/schema/api/lineage/addLineage.py +1 -1
- metadata/generated/schema/api/lineage/esLineageData.py +1 -1
- metadata/generated/schema/api/lineage/lineageDirection.py +1 -1
- metadata/generated/schema/api/lineage/nodeInformation.py +1 -1
- metadata/generated/schema/api/lineage/searchLineageRequest.py +1 -1
- metadata/generated/schema/api/lineage/searchLineageResult.py +1 -1
- metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
- metadata/generated/schema/api/policies/__init__.py +1 -1
- metadata/generated/schema/api/policies/createPolicy.py +1 -1
- metadata/generated/schema/api/search/__init__.py +1 -1
- metadata/generated/schema/api/search/previewSearchRequest.py +1 -1
- metadata/generated/schema/api/services/__init__.py +1 -1
- metadata/generated/schema/api/services/createApiService.py +1 -1
- metadata/generated/schema/api/services/createDashboardService.py +1 -1
- metadata/generated/schema/api/services/createDatabaseService.py +1 -1
- metadata/generated/schema/api/services/createMessagingService.py +1 -1
- metadata/generated/schema/api/services/createMetadataService.py +1 -1
- metadata/generated/schema/api/services/createMlModelService.py +1 -1
- metadata/generated/schema/api/services/createPipelineService.py +1 -1
- metadata/generated/schema/api/services/createSearchService.py +1 -1
- metadata/generated/schema/api/services/createStorageService.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +8 -1
- metadata/generated/schema/api/setOwner.py +1 -1
- metadata/generated/schema/api/teams/__init__.py +1 -1
- metadata/generated/schema/api/teams/createPersona.py +1 -1
- metadata/generated/schema/api/teams/createRole.py +1 -1
- metadata/generated/schema/api/teams/createTeam.py +1 -1
- metadata/generated/schema/api/teams/createUser.py +1 -1
- metadata/generated/schema/api/tests/__init__.py +1 -1
- metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
- metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
- metadata/generated/schema/api/tests/createTestCase.py +1 -1
- metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
- metadata/generated/schema/api/tests/createTestCaseResult.py +1 -1
- metadata/generated/schema/api/tests/createTestDefinition.py +1 -1
- metadata/generated/schema/api/tests/createTestSuite.py +1 -1
- metadata/generated/schema/api/voteRequest.py +1 -1
- metadata/generated/schema/auth/__init__.py +1 -1
- metadata/generated/schema/auth/basicAuth.py +1 -1
- metadata/generated/schema/auth/basicLoginRequest.py +1 -1
- metadata/generated/schema/auth/changePasswordRequest.py +1 -1
- metadata/generated/schema/auth/createPersonalToken.py +1 -1
- metadata/generated/schema/auth/emailRequest.py +1 -1
- metadata/generated/schema/auth/emailVerificationToken.py +1 -1
- metadata/generated/schema/auth/generateToken.py +1 -1
- metadata/generated/schema/auth/jwtAuth.py +1 -1
- metadata/generated/schema/auth/loginRequest.py +1 -1
- metadata/generated/schema/auth/logoutRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetToken.py +1 -1
- metadata/generated/schema/auth/personalAccessToken.py +1 -1
- metadata/generated/schema/auth/refreshToken.py +1 -1
- metadata/generated/schema/auth/registrationRequest.py +1 -1
- metadata/generated/schema/auth/revokePersonalToken.py +1 -1
- metadata/generated/schema/auth/revokeToken.py +1 -1
- metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
- metadata/generated/schema/auth/ssoAuth.py +1 -1
- metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
- metadata/generated/schema/configuration/__init__.py +1 -1
- metadata/generated/schema/configuration/appsPrivateConfiguration.py +1 -1
- metadata/generated/schema/configuration/assetCertificationSettings.py +1 -1
- metadata/generated/schema/configuration/authConfig.py +1 -1
- metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
- metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
- metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/dataQualityConfiguration.py +1 -1
- metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
- metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
- metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
- metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
- metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
- metadata/generated/schema/configuration/limitsConfiguration.py +1 -1
- metadata/generated/schema/configuration/lineageSettings.py +1 -1
- metadata/generated/schema/configuration/loginConfiguration.py +1 -1
- metadata/generated/schema/configuration/logoConfiguration.py +1 -1
- metadata/generated/schema/configuration/openMetadataBaseUrlConfiguration.py +3 -2
- metadata/generated/schema/configuration/opertionalConfiguration.py +1 -1
- metadata/generated/schema/configuration/opsConfig.py +1 -1
- metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
- metadata/generated/schema/configuration/profilerConfiguration.py +1 -1
- metadata/generated/schema/configuration/searchSettings.py +1 -1
- metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
- metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
- metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
- metadata/generated/schema/configuration/themeConfiguration.py +1 -1
- metadata/generated/schema/configuration/uiThemePreference.py +1 -1
- metadata/generated/schema/configuration/workflowSettings.py +3 -3
- metadata/generated/schema/dataInsight/__init__.py +1 -1
- metadata/generated/schema/dataInsight/custom/__init__.py +1 -1
- metadata/generated/schema/dataInsight/custom/dataInsightCustomChart.py +1 -1
- metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResult.py +1 -1
- metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResultList.py +1 -1
- metadata/generated/schema/dataInsight/custom/formulaHolder.py +1 -1
- metadata/generated/schema/dataInsight/custom/lineChart.py +1 -1
- metadata/generated/schema/dataInsight/custom/summaryCard.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
- metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
- metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
- metadata/generated/schema/dataInsight/type/__init__.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
- metadata/generated/schema/email/__init__.py +1 -1
- metadata/generated/schema/email/emailRequest.py +1 -1
- metadata/generated/schema/email/emailTemplate.py +1 -1
- metadata/generated/schema/email/emailTemplatePlaceholder.py +1 -1
- metadata/generated/schema/email/smtpSettings.py +1 -1
- metadata/generated/schema/email/templateValidationReponse.py +1 -1
- metadata/generated/schema/entity/__init__.py +1 -1
- metadata/generated/schema/entity/applications/__init__.py +1 -1
- metadata/generated/schema/entity/applications/app.py +4 -3
- metadata/generated/schema/entity/applications/appExtension.py +1 -1
- metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
- metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/applicationConfig.py +10 -4
- metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addCustomProperties.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addDataProductAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addDescriptionAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addDomainAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addOwnerAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addTagsAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addTestCaseAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addTierAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/lineagePropagationAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/mlTaggingAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeCustomPropertiesAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeDataProductAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeDescriptionAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeDomainAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeOwnerAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeTagsAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeTestCaseAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeTierAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automatorAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/collateAIAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/slackAppTokenConfiguration.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/autoPilotAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/collateAIQualityAgentAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/collateAITierAgentAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +3 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataRetentionConfiguration.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/helloPipelinesConfiguration.py +18 -0
- metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/collateAIAppPrivateConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/internal/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/internal/collateAITierAgentAppPrivateConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/limits.py +1 -1
- metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
- metadata/generated/schema/entity/applications/jobStatus.py +1 -1
- metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +5 -2
- metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +12 -2
- metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
- metadata/generated/schema/entity/automations/__init__.py +1 -1
- metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
- metadata/generated/schema/entity/automations/workflow.py +1 -1
- metadata/generated/schema/entity/bot.py +1 -1
- metadata/generated/schema/entity/classification/__init__.py +1 -1
- metadata/generated/schema/entity/classification/classification.py +1 -1
- metadata/generated/schema/entity/classification/tag.py +1 -1
- metadata/generated/schema/entity/data/__init__.py +1 -1
- metadata/generated/schema/entity/data/apiCollection.py +1 -1
- metadata/generated/schema/entity/data/apiEndpoint.py +1 -1
- metadata/generated/schema/entity/data/chart.py +1 -1
- metadata/generated/schema/entity/data/container.py +1 -1
- metadata/generated/schema/entity/data/dashboard.py +1 -1
- metadata/generated/schema/entity/data/dashboardDataModel.py +2 -1
- metadata/generated/schema/entity/data/database.py +5 -1
- metadata/generated/schema/entity/data/databaseSchema.py +5 -1
- metadata/generated/schema/entity/data/glossary.py +1 -1
- metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
- metadata/generated/schema/entity/data/metric.py +1 -1
- metadata/generated/schema/entity/data/mlmodel.py +1 -1
- metadata/generated/schema/entity/data/pipeline.py +1 -1
- metadata/generated/schema/entity/data/query.py +1 -1
- metadata/generated/schema/entity/data/queryCostRecord.py +1 -1
- metadata/generated/schema/entity/data/queryCostSearchResult.py +1 -1
- metadata/generated/schema/entity/data/report.py +1 -1
- metadata/generated/schema/entity/data/searchIndex.py +1 -1
- metadata/generated/schema/entity/data/storedProcedure.py +1 -1
- metadata/generated/schema/entity/data/table.py +1 -1
- metadata/generated/schema/entity/data/topic.py +1 -1
- metadata/generated/schema/entity/docStore/__init__.py +1 -1
- metadata/generated/schema/entity/docStore/document.py +1 -1
- metadata/generated/schema/entity/domains/__init__.py +1 -1
- metadata/generated/schema/entity/domains/dataProduct.py +1 -1
- metadata/generated/schema/entity/domains/domain.py +1 -1
- metadata/generated/schema/entity/events/__init__.py +1 -1
- metadata/generated/schema/entity/events/webhook.py +1 -1
- metadata/generated/schema/entity/feed/__init__.py +1 -1
- metadata/generated/schema/entity/feed/assets.py +1 -1
- metadata/generated/schema/entity/feed/customProperty.py +1 -1
- metadata/generated/schema/entity/feed/description.py +1 -1
- metadata/generated/schema/entity/feed/domain.py +1 -1
- metadata/generated/schema/entity/feed/entityInfo.py +1 -1
- metadata/generated/schema/entity/feed/owner.py +1 -1
- metadata/generated/schema/entity/feed/suggestion.py +1 -1
- metadata/generated/schema/entity/feed/tag.py +1 -1
- metadata/generated/schema/entity/feed/testCaseResult.py +1 -1
- metadata/generated/schema/entity/feed/thread.py +1 -1
- metadata/generated/schema/entity/policies/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
- metadata/generated/schema/entity/policies/filters.py +1 -1
- metadata/generated/schema/entity/policies/policy.py +1 -1
- metadata/generated/schema/entity/services/__init__.py +1 -1
- metadata/generated/schema/entity/services/apiService.py +5 -1
- metadata/generated/schema/entity/services/connections/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/api/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/api/restConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/connectionBasicType.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/microStrategyConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerBIReportServerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/bucketDetails.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/gcsConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/s3Config.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/qlikCloudConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/sigmaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +2 -11
- metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cassandra/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cassandra/cloudConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cassandraConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cockroachConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/noConfigAuthenticationTypes.py +1 -1
- metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltalake/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltalake/metastoreConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltalake/storageConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/exasolConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +9 -1
- metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/postgresConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapErpConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaHDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaSQLConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/synapseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/teradataConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/alationSinkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/vertexaiConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/datafactoryConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/dbtCloudConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/flinkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/kafkaConnectConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/matillion/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/matillion/matillionETL.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/matillionConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifi/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifi/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifi/clientCertificateAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/openLineageConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/stitchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/wherescapeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
- metadata/generated/schema/entity/services/dashboardService.py +5 -1
- metadata/generated/schema/entity/services/databaseService.py +5 -1
- metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +12 -1
- metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/reverseIngestionResponse.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
- metadata/generated/schema/entity/services/messagingService.py +5 -1
- metadata/generated/schema/entity/services/metadataService.py +5 -1
- metadata/generated/schema/entity/services/mlmodelService.py +5 -1
- metadata/generated/schema/entity/services/pipelineService.py +5 -1
- metadata/generated/schema/entity/services/searchService.py +5 -1
- metadata/generated/schema/entity/services/serviceType.py +1 -1
- metadata/generated/schema/entity/services/storageService.py +5 -1
- metadata/generated/schema/entity/teams/__init__.py +1 -1
- metadata/generated/schema/entity/teams/persona.py +1 -1
- metadata/generated/schema/entity/teams/role.py +1 -1
- metadata/generated/schema/entity/teams/team.py +1 -1
- metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
- metadata/generated/schema/entity/teams/user.py +1 -1
- metadata/generated/schema/entity/type.py +1 -1
- metadata/generated/schema/entity/utils/__init__.py +1 -1
- metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
- metadata/generated/schema/entity/utils/servicesCount.py +1 -1
- metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
- metadata/generated/schema/events/__init__.py +1 -1
- metadata/generated/schema/events/alertMetrics.py +1 -1
- metadata/generated/schema/events/api/__init__.py +1 -1
- metadata/generated/schema/events/api/createEventSubscription.py +1 -1
- metadata/generated/schema/events/api/eventSubscriptionDiagnosticInfo.py +1 -1
- metadata/generated/schema/events/api/eventsRecord.py +1 -1
- metadata/generated/schema/events/api/testEventSubscriptionDestination.py +1 -1
- metadata/generated/schema/events/api/typedEvent.py +1 -1
- metadata/generated/schema/events/emailAlertConfig.py +1 -1
- metadata/generated/schema/events/eventFilterRule.py +1 -1
- metadata/generated/schema/events/eventSubscription.py +1 -1
- metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
- metadata/generated/schema/events/failedEvent.py +1 -1
- metadata/generated/schema/events/failedEventResponse.py +1 -1
- metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
- metadata/generated/schema/events/statusContext.py +1 -1
- metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
- metadata/generated/schema/events/subscriptionStatus.py +1 -1
- metadata/generated/schema/events/testDestinationStatus.py +1 -1
- metadata/generated/schema/governance/workflows/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/edge.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodeSubType.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodeType.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/checkEntityAttributesTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/createAndRunIngestionPipelineTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/runAppTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setEntityCertificationTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setGlossaryTermStatusTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/endEvent/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/endEvent/endEvent.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/gateway/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/gateway/parallelGateway.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/startEvent/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/startEvent/startEvent.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/userTask/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/userTask/userApprovalTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/eventBasedEntityTrigger.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/noOpTrigger.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/periodicBatchEntityTrigger.py +1 -1
- metadata/generated/schema/governance/workflows/workflowDefinition.py +1 -1
- metadata/generated/schema/governance/workflows/workflowInstance.py +1 -1
- metadata/generated/schema/governance/workflows/workflowInstanceState.py +1 -1
- metadata/generated/schema/jobs/__init__.py +1 -1
- metadata/generated/schema/jobs/backgroundJob.py +1 -1
- metadata/generated/schema/jobs/enumCleanupArgs.py +1 -1
- metadata/generated/schema/metadataIngestion/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/apiServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/application.py +1 -1
- metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceAutoClassificationPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseIngestionPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/descriptionConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/ownerConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/tagsConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/workflow.py +8 -1
- metadata/generated/schema/monitoring/__init__.py +1 -1
- metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
- metadata/generated/schema/search/__init__.py +1 -1
- metadata/generated/schema/search/aggregationRequest.py +1 -1
- metadata/generated/schema/search/searchRequest.py +1 -1
- metadata/generated/schema/security/__init__.py +1 -1
- metadata/generated/schema/security/client/__init__.py +1 -1
- metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/oidcClientConfig.py +4 -1
- metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
- metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
- metadata/generated/schema/security/credentials/__init__.py +1 -1
- metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
- metadata/generated/schema/security/credentials/azureCredentials.py +1 -1
- metadata/generated/schema/security/credentials/basicAuth.py +1 -1
- metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpExternalAccount.py +1 -1
- metadata/generated/schema/security/credentials/gcpValues.py +1 -1
- metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
- metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gitlabCredentials.py +1 -1
- metadata/generated/schema/security/sasl/__init__.py +1 -1
- metadata/generated/schema/security/sasl/saslClientConfig.py +1 -1
- metadata/generated/schema/security/secrets/__init__.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerProvider.py +1 -1
- metadata/generated/schema/security/securityConfiguration.py +1 -1
- metadata/generated/schema/security/ssl/__init__.py +1 -1
- metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
- metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
- metadata/generated/schema/settings/__init__.py +1 -1
- metadata/generated/schema/settings/settings.py +1 -1
- metadata/generated/schema/system/__init__.py +1 -1
- metadata/generated/schema/system/entityError.py +1 -1
- metadata/generated/schema/system/eventPublisherJob.py +17 -5
- metadata/generated/schema/system/indexingError.py +1 -1
- metadata/generated/schema/system/limitsResponse.py +1 -1
- metadata/generated/schema/system/ui/__init__.py +1 -1
- metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
- metadata/generated/schema/system/ui/navigationItem.py +1 -1
- metadata/generated/schema/system/ui/page.py +1 -1
- metadata/generated/schema/system/ui/tab.py +1 -1
- metadata/generated/schema/system/ui/uiCustomization.py +1 -1
- metadata/generated/schema/system/validationResponse.py +1 -1
- metadata/generated/schema/tests/__init__.py +1 -1
- metadata/generated/schema/tests/assigned.py +1 -1
- metadata/generated/schema/tests/basic.py +20 -20
- metadata/generated/schema/tests/customMetric.py +1 -1
- metadata/generated/schema/tests/dataQualityReport.py +1 -1
- metadata/generated/schema/tests/resolved.py +1 -1
- metadata/generated/schema/tests/testCase.py +1 -1
- metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
- metadata/generated/schema/tests/testDefinition.py +1 -1
- metadata/generated/schema/tests/testSuite.py +1 -1
- metadata/generated/schema/type/__init__.py +1 -1
- metadata/generated/schema/type/apiSchema.py +1 -1
- metadata/generated/schema/type/assetCertification.py +1 -1
- metadata/generated/schema/type/auditLog.py +1 -1
- metadata/generated/schema/type/basic.py +1 -1
- metadata/generated/schema/type/bulkOperationResult.py +1 -1
- metadata/generated/schema/type/changeEvent.py +1 -1
- metadata/generated/schema/type/changeEventType.py +1 -1
- metadata/generated/schema/type/changeSummaryMap.py +1 -1
- metadata/generated/schema/type/collectionDescriptor.py +1 -1
- metadata/generated/schema/type/csvDocumentation.py +1 -1
- metadata/generated/schema/type/csvErrorType.py +1 -1
- metadata/generated/schema/type/csvFile.py +1 -1
- metadata/generated/schema/type/csvImportResult.py +1 -1
- metadata/generated/schema/type/customProperties/__init__.py +1 -1
- metadata/generated/schema/type/customProperties/complexTypes.py +1 -1
- metadata/generated/schema/type/customProperties/enumConfig.py +1 -1
- metadata/generated/schema/type/customProperties/tableConfig.py +1 -1
- metadata/generated/schema/type/customProperty.py +1 -1
- metadata/generated/schema/type/dailyCount.py +1 -1
- metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
- metadata/generated/schema/type/entityHierarchy.py +1 -1
- metadata/generated/schema/type/entityHistory.py +1 -1
- metadata/generated/schema/type/entityLineage.py +1 -1
- metadata/generated/schema/type/entityReference.py +1 -1
- metadata/generated/schema/type/entityReferenceList.py +1 -1
- metadata/generated/schema/type/entityRelationship.py +1 -1
- metadata/generated/schema/type/entityUsage.py +1 -1
- metadata/generated/schema/type/filterPattern.py +1 -1
- metadata/generated/schema/type/function.py +1 -1
- metadata/generated/schema/type/include.py +1 -1
- metadata/generated/schema/type/jdbcConnection.py +1 -1
- metadata/generated/schema/type/lifeCycle.py +1 -1
- metadata/generated/schema/type/paging.py +1 -1
- metadata/generated/schema/type/profile.py +1 -1
- metadata/generated/schema/type/queryParserData.py +1 -1
- metadata/generated/schema/type/reaction.py +1 -1
- metadata/generated/schema/type/schedule.py +1 -1
- metadata/generated/schema/type/schema.py +1 -1
- metadata/generated/schema/type/tableQuery.py +1 -1
- metadata/generated/schema/type/tableUsageCount.py +1 -1
- metadata/generated/schema/type/tagLabel.py +2 -1
- metadata/generated/schema/type/usageDetails.py +1 -1
- metadata/generated/schema/type/usageRequest.py +1 -1
- metadata/generated/schema/type/votes.py +1 -1
- metadata/ingestion/api/topology_runner.py +5 -1
- metadata/ingestion/models/patch_request.py +71 -3
- metadata/ingestion/ometa/mixins/es_mixin.py +11 -5
- metadata/ingestion/source/api/rest/metadata.py +15 -2
- metadata/ingestion/source/dashboard/powerbi/metadata.py +131 -39
- metadata/ingestion/source/dashboard/powerbi/models.py +23 -1
- metadata/ingestion/source/dashboard/tableau/client.py +152 -171
- metadata/ingestion/source/dashboard/tableau/connection.py +23 -48
- metadata/ingestion/source/dashboard/tableau/metadata.py +73 -99
- metadata/ingestion/source/dashboard/tableau/models.py +8 -18
- metadata/ingestion/source/dashboard/tableau/queries.py +2 -2
- metadata/ingestion/source/database/athena/metadata.py +26 -0
- metadata/ingestion/source/database/bigquery/connection.py +8 -3
- metadata/ingestion/source/database/bigquery/helper.py +8 -6
- metadata/ingestion/source/database/bigquery/metadata.py +13 -5
- metadata/ingestion/source/database/dbt/metadata.py +30 -17
- metadata/ingestion/source/database/life_cycle_query_mixin.py +9 -0
- metadata/ingestion/source/database/mysql/connection.py +11 -3
- metadata/ingestion/source/database/mysql/lineage.py +4 -4
- metadata/ingestion/source/database/mysql/queries.py +29 -0
- metadata/ingestion/source/database/mysql/query_parser.py +31 -0
- metadata/ingestion/source/database/oracle/queries.py +2 -2
- metadata/ingestion/source/database/postgres/metadata.py +3 -1
- metadata/ingestion/source/database/postgres/queries.py +7 -0
- metadata/ingestion/source/database/postgres/utils.py +28 -19
- metadata/ingestion/source/database/snowflake/data_diff/__init__.py +0 -0
- metadata/ingestion/source/database/snowflake/data_diff/data_diff.py +37 -0
- metadata/ingestion/source/database/snowflake/metadata.py +14 -0
- metadata/ingestion/source/database/snowflake/queries.py +11 -7
- metadata/ingestion/source/database/snowflake/service_spec.py +4 -0
- metadata/ingestion/source/database/snowflake/utils.py +32 -4
- metadata/ingestion/source/database/vertica/queries.py +5 -20
- metadata/ingestion/source/pipeline/airbyte/constants.py +29 -0
- metadata/ingestion/source/pipeline/airbyte/metadata.py +67 -26
- metadata/ingestion/source/pipeline/airbyte/utils.py +99 -0
- metadata/ingestion/source/pipeline/openlineage/models.py +3 -2
- metadata/ingestion/source/pipeline/pipeline_service.py +2 -3
- metadata/ingestion/source/storage/s3/metadata.py +7 -8
- metadata/pii/algorithms/classifiers.py +180 -0
- metadata/pii/algorithms/column_patterns.py +61 -0
- metadata/pii/algorithms/feature_extraction.py +154 -0
- metadata/pii/algorithms/preprocessing.py +62 -0
- metadata/pii/algorithms/presidio_patches.py +45 -0
- metadata/pii/algorithms/presidio_utils.py +119 -0
- metadata/pii/algorithms/tags.py +111 -0
- metadata/pii/algorithms/utils.py +38 -0
- metadata/pii/base_processor.py +125 -0
- metadata/pii/constants.py +8 -0
- metadata/pii/processor.py +47 -138
- metadata/profiler/interface/sqlalchemy/profiler_interface.py +66 -36
- metadata/profiler/processor/runner.py +29 -6
- metadata/profiler/source/database/mssql/profiler_source.py +86 -0
- metadata/profiler/source/fetcher/profiler_source_factory.py +13 -0
- metadata/readers/dataframe/json.py +5 -1
- metadata/readers/dataframe/parquet.py +10 -2
- metadata/readers/dataframe/reader_factory.py +8 -0
- metadata/sampler/processor.py +1 -1
- metadata/sampler/sampler_interface.py +3 -0
- metadata/sampler/sqlalchemy/bigquery/sampler.py +2 -63
- metadata/sampler/sqlalchemy/sampler.py +32 -40
- metadata/utils/datalake/datalake_utils.py +9 -3
- metadata/utils/fqn.py +4 -4
- metadata/utils/service_spec/default.py +4 -0
- metadata/utils/service_spec/service_spec.py +1 -0
- metadata/utils/sqa_utils.py +15 -0
- metadata/workflow/base.py +8 -1
- metadata/workflow/profiler.py +9 -9
- metadata/workflow/workflow_status_mixin.py +1 -7
- {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.1.dist-info}/METADATA +427 -421
- {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.1.dist-info}/RECORD +780 -762
- {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.1.dist-info}/LICENSE +0 -0
- {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.1.dist-info}/WHEEL +0 -0
- {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.1.dist-info}/entry_points.txt +0 -0
- {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Extraction of PII features (from text, column names, and data types) to be used
|
|
13
|
+
for the PII classification model.
|
|
14
|
+
"""
|
|
15
|
+
import logging
|
|
16
|
+
import re
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
from typing import DefaultDict, Dict, Iterable, List, Mapping, Optional, Sequence, Set
|
|
19
|
+
|
|
20
|
+
from presidio_analyzer import AnalyzerEngine
|
|
21
|
+
|
|
22
|
+
from metadata.generated.schema.entity.data.table import DataType
|
|
23
|
+
from metadata.pii.algorithms.presidio_patches import PresidioRecognizerResultPatcher
|
|
24
|
+
from metadata.pii.algorithms.tags import PIITag
|
|
25
|
+
from metadata.pii.scanners.ner_scanner import SUPPORTED_LANG
|
|
26
|
+
from metadata.utils.logger import pii_logger
|
|
27
|
+
|
|
28
|
+
logger = pii_logger()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def extract_pii_tags(
|
|
32
|
+
analyzer: AnalyzerEngine,
|
|
33
|
+
texts: Sequence[str],
|
|
34
|
+
context: Optional[List[str]] = None,
|
|
35
|
+
recognizer_result_patcher: Optional[PresidioRecognizerResultPatcher] = None,
|
|
36
|
+
) -> Dict[PIITag, float]:
|
|
37
|
+
"""
|
|
38
|
+
Extract PII entities from a batch of texts.
|
|
39
|
+
|
|
40
|
+
The results are averaged over the batch. In general, the larger the batch,
|
|
41
|
+
the better the results, as some single texts might be noisy or contain
|
|
42
|
+
false positives.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
analyzer (AnalyzerEngine): The analyzer engine to use for PII detection.
|
|
46
|
+
texts (Sequence[str]): A sequence of texts to analyze.
|
|
47
|
+
context (Optional[List[str]]): Optional context to provide to the analyzer.
|
|
48
|
+
This can be used to improve the accuracy of the PII detection.
|
|
49
|
+
For example, keywords extracted from column names.
|
|
50
|
+
recognizer_result_patcher (Optional[PresidioRecognizerResultPatcher]): A function
|
|
51
|
+
that takes a recognizer result and returns a modified result.
|
|
52
|
+
Returns:
|
|
53
|
+
Mapping[PIITag, float]: A mapping of PII entity types to their average scores.
|
|
54
|
+
"""
|
|
55
|
+
entity_scores: DefaultDict[PIITag, float] = defaultdict(float)
|
|
56
|
+
|
|
57
|
+
if SUPPORTED_LANG not in analyzer.supported_languages:
|
|
58
|
+
raise ValueError(
|
|
59
|
+
f"The analyzer does not support {SUPPORTED_LANG}, which is required for this function."
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
for text in texts:
|
|
63
|
+
results = analyzer.analyze(
|
|
64
|
+
text, language=SUPPORTED_LANG, context=context, entities=PIITag.values()
|
|
65
|
+
)
|
|
66
|
+
if recognizer_result_patcher is not None:
|
|
67
|
+
results = recognizer_result_patcher(results, text)
|
|
68
|
+
|
|
69
|
+
for result in results:
|
|
70
|
+
try:
|
|
71
|
+
# This should be safe because the analyzer only considers the entities that we passed
|
|
72
|
+
pii_entity = PIITag[result.entity_type]
|
|
73
|
+
entity_scores[pii_entity] += result.score
|
|
74
|
+
except KeyError:
|
|
75
|
+
logging.error(f"Unrecognized PII entity type: {result.entity_type}.")
|
|
76
|
+
|
|
77
|
+
# normalize the scores if the batch is not empty
|
|
78
|
+
if len(texts):
|
|
79
|
+
for entity in entity_scores:
|
|
80
|
+
entity_scores[entity] /= len(texts)
|
|
81
|
+
|
|
82
|
+
return entity_scores
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def split_column_name(column_name: str) -> List[str]:
|
|
86
|
+
"""
|
|
87
|
+
Split a column name into its components.
|
|
88
|
+
This is used for passing column names to the analyzer as context.
|
|
89
|
+
"""
|
|
90
|
+
# Split by common delimiters
|
|
91
|
+
delimiters = ["_", "-", " ", ".", "/"]
|
|
92
|
+
regex_pattern = "|".join(map(re.escape, delimiters))
|
|
93
|
+
return list(re.split(regex_pattern, column_name.lower()))
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def extract_pii_from_column_names(
|
|
97
|
+
column_name: str, patterns: Mapping[PIITag, Iterable[re.Pattern[str]]]
|
|
98
|
+
) -> Set[PIITag]:
|
|
99
|
+
"""
|
|
100
|
+
Extract PII entities from a column name using a collection of regex patterns
|
|
101
|
+
for each PII type. This is used to match patterns in column names that might
|
|
102
|
+
indicate the presence of PII data.
|
|
103
|
+
|
|
104
|
+
Example: "user_email" might match the EMAIL_ADDRESS pattern, returning
|
|
105
|
+
a set containing the PII tag PIITag.EMAIL_ADDRESS.
|
|
106
|
+
"""
|
|
107
|
+
results: Set[PIITag] = set()
|
|
108
|
+
|
|
109
|
+
for pii_type, pii_type_patterns in patterns.items():
|
|
110
|
+
for pattern in pii_type_patterns:
|
|
111
|
+
if pattern.match(column_name) is not None:
|
|
112
|
+
results.add(pii_type)
|
|
113
|
+
break # No need to check other patterns for this PII type
|
|
114
|
+
|
|
115
|
+
return results
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def is_non_pii_datatype(dtype: DataType) -> bool:
|
|
119
|
+
"""
|
|
120
|
+
Determine whether a column with the given data type is unlikely
|
|
121
|
+
to contain PII and can be safely excluded from PII detection or scanning.
|
|
122
|
+
"""
|
|
123
|
+
non_pii_types = {
|
|
124
|
+
DataType.BOOLEAN,
|
|
125
|
+
DataType.BIT,
|
|
126
|
+
DataType.NULL,
|
|
127
|
+
DataType.ERROR,
|
|
128
|
+
DataType.FIXED,
|
|
129
|
+
DataType.AGGREGATEFUNCTION,
|
|
130
|
+
DataType.HLLSKETCH,
|
|
131
|
+
DataType.QUANTILE_STATE,
|
|
132
|
+
DataType.AGG_STATE,
|
|
133
|
+
DataType.BITMAP,
|
|
134
|
+
DataType.PG_LSN,
|
|
135
|
+
DataType.PG_SNAPSHOT,
|
|
136
|
+
DataType.TXID_SNAPSHOT,
|
|
137
|
+
DataType.TSQUERY,
|
|
138
|
+
DataType.TSVECTOR,
|
|
139
|
+
DataType.UNKNOWN,
|
|
140
|
+
DataType.LOWCARDINALITY,
|
|
141
|
+
DataType.MEASURE_HIDDEN,
|
|
142
|
+
DataType.MEASURE_VISIBLE,
|
|
143
|
+
}
|
|
144
|
+
geo_data_types = {
|
|
145
|
+
DataType.GEOGRAPHY,
|
|
146
|
+
DataType.GEOMETRY,
|
|
147
|
+
DataType.SPATIAL,
|
|
148
|
+
DataType.POINT,
|
|
149
|
+
DataType.POLYGON,
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
excluded_data_types = non_pii_types | geo_data_types
|
|
153
|
+
|
|
154
|
+
return dtype in excluded_data_types
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Preprocessing functions for the classification tasks.
|
|
13
|
+
"""
|
|
14
|
+
import datetime
|
|
15
|
+
import json
|
|
16
|
+
from typing import Any, List, Mapping, Optional, Sequence
|
|
17
|
+
|
|
18
|
+
from metadata.utils.logger import pii_logger
|
|
19
|
+
|
|
20
|
+
logger = pii_logger()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# pylint: disable=too-many-return-statements
|
|
24
|
+
def convert_to_str(value: Any) -> Optional[str]:
|
|
25
|
+
"""
|
|
26
|
+
Convert the given value to a string. This is a conversion
|
|
27
|
+
tailored to our use case, not a generic one.
|
|
28
|
+
"""
|
|
29
|
+
if isinstance(value, str):
|
|
30
|
+
return value
|
|
31
|
+
if isinstance(value, (int, float, datetime.datetime, datetime.date)):
|
|
32
|
+
# Values we want to convert to string out of the box
|
|
33
|
+
return str(value)
|
|
34
|
+
if isinstance(value, bytes):
|
|
35
|
+
return value.decode("utf-8", errors="ignore")
|
|
36
|
+
if isinstance(value, (Sequence, Mapping)):
|
|
37
|
+
try:
|
|
38
|
+
return json.dumps(value, default=str)
|
|
39
|
+
except (TypeError, ValueError, OverflowError) as e:
|
|
40
|
+
# If the value cannot be serialized to JSON, return None
|
|
41
|
+
logger.warning(f"Failed to convert value to JSON: {e}")
|
|
42
|
+
return None
|
|
43
|
+
if value is None:
|
|
44
|
+
# We want to skip None values, not convert them to "None"
|
|
45
|
+
return None
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def preprocess_values(values: Sequence[Any]) -> List[str]:
|
|
50
|
+
result: List[str] = []
|
|
51
|
+
for value in values:
|
|
52
|
+
converted_value = convert_to_str(value)
|
|
53
|
+
if converted_value is None:
|
|
54
|
+
# Skip None values
|
|
55
|
+
continue
|
|
56
|
+
# skip empty strings
|
|
57
|
+
if not converted_value.strip():
|
|
58
|
+
continue
|
|
59
|
+
# Add the converted value as is, without any further processing
|
|
60
|
+
result.append(converted_value)
|
|
61
|
+
|
|
62
|
+
return result
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Patch the Presidio recognizer results to make adapt them to specific use cases.
|
|
13
|
+
"""
|
|
14
|
+
from typing import List, Protocol, Sequence
|
|
15
|
+
|
|
16
|
+
from presidio_analyzer import RecognizerResult
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PresidioRecognizerResultPatcher(Protocol):
|
|
20
|
+
"""
|
|
21
|
+
A protocol for a function that takes a recognizer result and returns a modified result.
|
|
22
|
+
Sometimes we need to patch the recognizer result to make it compatible with our use case.
|
|
23
|
+
For instance, Presidio yields URL false positive with email address.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __call__(
|
|
27
|
+
self, recognizer_results: Sequence[RecognizerResult], text: str
|
|
28
|
+
) -> Sequence[RecognizerResult]:
|
|
29
|
+
...
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def url_patcher(
|
|
33
|
+
recognizer_results: Sequence[RecognizerResult], text: str
|
|
34
|
+
) -> Sequence[RecognizerResult]:
|
|
35
|
+
"""
|
|
36
|
+
Patch the recognizer result to remove URL false positive with email address.
|
|
37
|
+
"""
|
|
38
|
+
patched_result: List[RecognizerResult] = []
|
|
39
|
+
for result in recognizer_results:
|
|
40
|
+
if result.entity_type == "URL":
|
|
41
|
+
if text[: result.start].endswith("@"):
|
|
42
|
+
# probably an email address, skip the URL
|
|
43
|
+
continue
|
|
44
|
+
patched_result.append(result)
|
|
45
|
+
return patched_result
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Utilities for working with the Presidio Library.
|
|
13
|
+
"""
|
|
14
|
+
import inspect
|
|
15
|
+
import logging
|
|
16
|
+
from typing import Iterable, Optional, Type
|
|
17
|
+
|
|
18
|
+
import spacy
|
|
19
|
+
from presidio_analyzer import (
|
|
20
|
+
AnalyzerEngine,
|
|
21
|
+
EntityRecognizer,
|
|
22
|
+
PatternRecognizer,
|
|
23
|
+
predefined_recognizers,
|
|
24
|
+
)
|
|
25
|
+
from presidio_analyzer.nlp_engine import SpacyNlpEngine
|
|
26
|
+
from spacy.cli.download import download # pyright: ignore[reportUnknownVariableType]
|
|
27
|
+
|
|
28
|
+
from metadata.pii.constants import PRESIDIO_LOGGER, SPACY_EN_MODEL, SUPPORTED_LANG
|
|
29
|
+
from metadata.utils.logger import METADATA_LOGGER, pii_logger
|
|
30
|
+
|
|
31
|
+
logger = pii_logger()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def build_analyzer_engine(
|
|
35
|
+
model_name: str = SPACY_EN_MODEL,
|
|
36
|
+
) -> AnalyzerEngine:
|
|
37
|
+
"""
|
|
38
|
+
Build a Presidio analyzer engine for the model_name and tailored to our use case.
|
|
39
|
+
|
|
40
|
+
If the model is not found locally, it will be downloaded.
|
|
41
|
+
"""
|
|
42
|
+
_load_spacy_model(model_name)
|
|
43
|
+
|
|
44
|
+
model = {
|
|
45
|
+
"lang_code": SUPPORTED_LANG,
|
|
46
|
+
"model_name": model_name,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
nlp_engine = SpacyNlpEngine(models=[model])
|
|
50
|
+
analyzer_engine = AnalyzerEngine(
|
|
51
|
+
nlp_engine=nlp_engine, supported_languages=[SUPPORTED_LANG]
|
|
52
|
+
)
|
|
53
|
+
for recognizer in _get_all_pattern_recognizers():
|
|
54
|
+
# Register the recognizer by setting the appropriate language.
|
|
55
|
+
# Presidio recognizers are language-dependent: when analyzing text,
|
|
56
|
+
# Presidio filters recognizers based on the specified language, assuming
|
|
57
|
+
# language-specific patterns (e.g., for country-specific formats).
|
|
58
|
+
# However, our use case involves analyzing structured table data rather than free text,
|
|
59
|
+
# so this language-based approach doesn't always make sense.
|
|
60
|
+
# To fix this, we manually set the recognizer supported language to the one we want.
|
|
61
|
+
recognizer.supported_language = SUPPORTED_LANG
|
|
62
|
+
analyzer_engine.registry.add_recognizer(recognizer)
|
|
63
|
+
|
|
64
|
+
return analyzer_engine
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def set_presidio_logger_level(log_level: Optional[int] = None) -> None:
|
|
68
|
+
"""
|
|
69
|
+
Set the presidio logger to talk less about internal entities unless we are debugging.
|
|
70
|
+
"""
|
|
71
|
+
if log_level is None:
|
|
72
|
+
log_level = (
|
|
73
|
+
logging.INFO
|
|
74
|
+
if logging.getLogger(METADATA_LOGGER).level == logging.DEBUG
|
|
75
|
+
else logging.ERROR
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
logging.getLogger(PRESIDIO_LOGGER).setLevel(log_level)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _load_spacy_model(model_name: str) -> None:
|
|
82
|
+
"""
|
|
83
|
+
Load the spaCy model for the given language.
|
|
84
|
+
If the model is not found locally, it will be downloaded.
|
|
85
|
+
"""
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
_ = spacy.load(model_name)
|
|
89
|
+
except OSError:
|
|
90
|
+
|
|
91
|
+
logger.warning(f"Downloading {model_name} language model for the spaCy")
|
|
92
|
+
download(model_name)
|
|
93
|
+
_ = spacy.load(model_name)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _get_all_entity_recognizer_classes() -> Iterable[Type[EntityRecognizer]]:
|
|
97
|
+
"""
|
|
98
|
+
Iterate over all subclasses of the `EntityRecognizer` exposed
|
|
99
|
+
in the predefined_recognizers module.
|
|
100
|
+
"""
|
|
101
|
+
for name in getattr(predefined_recognizers, "__all__", []):
|
|
102
|
+
obj = getattr(predefined_recognizers, name, None)
|
|
103
|
+
if inspect.isclass(obj) and issubclass(obj, EntityRecognizer):
|
|
104
|
+
yield obj
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def _get_all_pattern_recognizers() -> Iterable[EntityRecognizer]:
|
|
108
|
+
for cls in _get_all_entity_recognizer_classes():
|
|
109
|
+
if issubclass(cls, PatternRecognizer):
|
|
110
|
+
try:
|
|
111
|
+
# Try to instantiate the recognizer
|
|
112
|
+
yield cls(
|
|
113
|
+
supported_language=SUPPORTED_LANG
|
|
114
|
+
) # pyright: ignore[reportCallIssue]
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logger.warning(e)
|
|
117
|
+
elif cls == predefined_recognizers.PhoneRecognizer:
|
|
118
|
+
# Not a pattern recognizer, but pretty much the same
|
|
119
|
+
yield predefined_recognizers.PhoneRecognizer()
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Definition of tags for the PII algorithms.
|
|
13
|
+
These tags currently belong to the layer logic of the algorithms.
|
|
14
|
+
"""
|
|
15
|
+
import enum
|
|
16
|
+
from typing import List
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class PIISensitivityTag(enum.Enum):
|
|
20
|
+
SENSITIVE = "Sensitive"
|
|
21
|
+
NONSENSITIVE = "NonSensitive"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@enum.unique
|
|
25
|
+
class PIITag(enum.Enum):
|
|
26
|
+
"""
|
|
27
|
+
PII Tags (borrowed from Presidio https://microsoft.github.io/presidio/supported_entities/).
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
# Global
|
|
31
|
+
CREDIT_CARD = "CREDIT_CARD"
|
|
32
|
+
CRYPTO = "CRYPTO" # Crypto Wallet Address
|
|
33
|
+
DATE_TIME = "DATE_TIME"
|
|
34
|
+
EMAIL_ADDRESS = "EMAIL_ADDRESS"
|
|
35
|
+
IBAN_CODE = "IBAN_CODE"
|
|
36
|
+
IP_ADDRESS = "IP_ADDRESS"
|
|
37
|
+
NRP = "NRP"
|
|
38
|
+
LOCATION = "LOCATION"
|
|
39
|
+
PERSON = "PERSON"
|
|
40
|
+
PHONE_NUMBER = "PHONE_NUMBER"
|
|
41
|
+
MEDICAL_LICENSE = "MEDICAL_LICENSE"
|
|
42
|
+
URL = "URL"
|
|
43
|
+
|
|
44
|
+
# USA
|
|
45
|
+
US_BANK_NUMBER = "US_BANK_NUMBER"
|
|
46
|
+
US_DRIVER_LICENSE = "US_DRIVER_LICENSE"
|
|
47
|
+
US_ITIN = "US_ITIN"
|
|
48
|
+
US_PASSPORT = "US_PASSPORT"
|
|
49
|
+
US_SSN = "US_SSN"
|
|
50
|
+
|
|
51
|
+
# UK
|
|
52
|
+
UK_NHS = "UK_NHS"
|
|
53
|
+
|
|
54
|
+
# Spain
|
|
55
|
+
ES_NIF = "ES_NIF"
|
|
56
|
+
ES_NIE = "ES_NIE"
|
|
57
|
+
|
|
58
|
+
# Italy
|
|
59
|
+
IT_FISCAL_CODE = "IT_FISCAL_CODE"
|
|
60
|
+
IT_DRIVER_LICENSE = "IT_DRIVER_LICENSE"
|
|
61
|
+
IT_VAT_CODE = "IT_VAT_CODE"
|
|
62
|
+
IT_PASSPORT = "IT_PASSPORT"
|
|
63
|
+
IT_IDENTITY_CARD = "IT_IDENTITY_CARD"
|
|
64
|
+
|
|
65
|
+
# Poland
|
|
66
|
+
PL_PESEL = "PL_PESEL"
|
|
67
|
+
|
|
68
|
+
# Singapore
|
|
69
|
+
SG_NRIC_FIN = "SG_NRIC_FIN"
|
|
70
|
+
SG_UEN = "SG_UEN"
|
|
71
|
+
|
|
72
|
+
# Australia
|
|
73
|
+
AU_ABN = "AU_ABN"
|
|
74
|
+
AU_ACN = "AU_ACN"
|
|
75
|
+
AU_TFN = "AU_TFN"
|
|
76
|
+
AU_MEDICARE = "AU_MEDICARE"
|
|
77
|
+
|
|
78
|
+
# India
|
|
79
|
+
IN_PAN = "IN_PAN"
|
|
80
|
+
IN_AADHAAR = "IN_AADHAAR"
|
|
81
|
+
IN_VEHICLE_REGISTRATION = "IN_VEHICLE_REGISTRATION"
|
|
82
|
+
IN_VOTER = "IN_VOTER"
|
|
83
|
+
IN_PASSPORT = "IN_PASSPORT"
|
|
84
|
+
|
|
85
|
+
# Finland
|
|
86
|
+
FI_PERSONAL_IDENTITY_CODE = "FI_PERSONAL_IDENTITY_CODE"
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def values(cls) -> List[str]:
|
|
90
|
+
"""
|
|
91
|
+
Get all the values of the enum as a set of strings.
|
|
92
|
+
"""
|
|
93
|
+
return [tag.value for tag in cls]
|
|
94
|
+
|
|
95
|
+
def sensitivity(self) -> PIISensitivityTag:
|
|
96
|
+
"""
|
|
97
|
+
Get the sensitivity level of the PII tag.
|
|
98
|
+
This map is opinionated and can be changed in the future according to users' needs.
|
|
99
|
+
"""
|
|
100
|
+
if self in DEFAULT_NON_PII_SENSITIVE:
|
|
101
|
+
return PIISensitivityTag.NONSENSITIVE
|
|
102
|
+
return PIISensitivityTag.SENSITIVE
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
DEFAULT_NON_PII_SENSITIVE = (
|
|
106
|
+
PIITag.DATE_TIME,
|
|
107
|
+
PIITag.NRP,
|
|
108
|
+
PIITag.LOCATION,
|
|
109
|
+
PIITag.PHONE_NUMBER,
|
|
110
|
+
PIITag.URL,
|
|
111
|
+
)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Utility functions for PII algorithms
|
|
13
|
+
"""
|
|
14
|
+
from typing import Mapping, Sequence, TypeVar
|
|
15
|
+
|
|
16
|
+
T = TypeVar("T")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def normalize_scores(scores: Mapping[T, float], tol: float = 0.01) -> Mapping[T, float]:
|
|
20
|
+
"""
|
|
21
|
+
Normalize the scores to sum to 1, while ignoring scores below the tolerance.
|
|
22
|
+
Scores must be positive.
|
|
23
|
+
"""
|
|
24
|
+
scores = {key: score for key, score in scores.items() if score > tol}
|
|
25
|
+
total = sum(scores.values())
|
|
26
|
+
if total == 0:
|
|
27
|
+
return scores
|
|
28
|
+
return {key: score / total for key, score in scores.items()}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_top_classes(scores: Mapping[T, float], n: int, threshold: float) -> Sequence[T]:
|
|
32
|
+
"""
|
|
33
|
+
Get the top n scores from the scores mapping that are above the threshold.
|
|
34
|
+
The classes are sorted in descending order of their scores.
|
|
35
|
+
"""
|
|
36
|
+
sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
|
37
|
+
top_classes = [key for key, score in sorted_scores if score >= threshold]
|
|
38
|
+
return top_classes[:n]
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# Copyright 2025 Collate
|
|
2
|
+
# Licensed under the Collate Community License, Version 1.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
# https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
|
|
6
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
7
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
8
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
9
|
+
# See the License for the specific language governing permissions and
|
|
10
|
+
# limitations under the License.
|
|
11
|
+
"""
|
|
12
|
+
Base class for the Auto Classification Processor.
|
|
13
|
+
"""
|
|
14
|
+
import traceback
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from typing import Any, Optional, Sequence, Type, TypeVar, cast, final
|
|
17
|
+
|
|
18
|
+
from metadata.generated.schema.entity.data.table import Column
|
|
19
|
+
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
|
|
20
|
+
StackTraceError,
|
|
21
|
+
)
|
|
22
|
+
from metadata.generated.schema.metadataIngestion.databaseServiceAutoClassificationPipeline import (
|
|
23
|
+
DatabaseServiceAutoClassificationPipeline,
|
|
24
|
+
)
|
|
25
|
+
from metadata.generated.schema.metadataIngestion.workflow import (
|
|
26
|
+
OpenMetadataWorkflowConfig,
|
|
27
|
+
)
|
|
28
|
+
from metadata.generated.schema.type.tagLabel import TagLabel
|
|
29
|
+
from metadata.ingestion.api.models import Either
|
|
30
|
+
from metadata.ingestion.api.parser import parse_workflow_config_gracefully
|
|
31
|
+
from metadata.ingestion.api.steps import Processor
|
|
32
|
+
from metadata.ingestion.models.table_metadata import ColumnTag
|
|
33
|
+
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
34
|
+
from metadata.sampler.models import SamplerResponse
|
|
35
|
+
|
|
36
|
+
C = TypeVar("C", bound="AutoClassificationProcessor")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class AutoClassificationProcessor(Processor, ABC):
|
|
40
|
+
"""
|
|
41
|
+
Abstract class for the Auto Classification Processor.
|
|
42
|
+
|
|
43
|
+
Implementations should only provide the logic for creating tags based on sample data,
|
|
44
|
+
and rely on the running part to be handled by the base class.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Some methods are marked as final to prevent overriding in subclasses thus
|
|
48
|
+
# ensuring that the workflow is always run in the same way keeping implementer
|
|
49
|
+
# with the responsibility of *only* implementing the logic for creating tags.
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
config: OpenMetadataWorkflowConfig,
|
|
53
|
+
metadata: OpenMetadata,
|
|
54
|
+
):
|
|
55
|
+
super().__init__()
|
|
56
|
+
self.config = config
|
|
57
|
+
self.metadata = metadata
|
|
58
|
+
|
|
59
|
+
# Init and type the source config
|
|
60
|
+
self.source_config: DatabaseServiceAutoClassificationPipeline = cast(
|
|
61
|
+
DatabaseServiceAutoClassificationPipeline,
|
|
62
|
+
self.config.source.sourceConfig.config,
|
|
63
|
+
) # Used to satisfy type checked
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def create_column_tag_labels(
|
|
67
|
+
self, column: Column, sample_data: Sequence[Any]
|
|
68
|
+
) -> Sequence[TagLabel]:
|
|
69
|
+
"""
|
|
70
|
+
Create tags for the column based on the sample data.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def name(self) -> str:
|
|
75
|
+
return "Auto Classification Processor"
|
|
76
|
+
|
|
77
|
+
def close(self) -> None:
|
|
78
|
+
"""Nothing to close"""
|
|
79
|
+
|
|
80
|
+
@classmethod
|
|
81
|
+
@final
|
|
82
|
+
def create(
|
|
83
|
+
cls: Type[C],
|
|
84
|
+
config_dict: dict,
|
|
85
|
+
metadata: OpenMetadata,
|
|
86
|
+
pipeline_name: Optional[str] = None,
|
|
87
|
+
) -> C:
|
|
88
|
+
config = parse_workflow_config_gracefully(config_dict)
|
|
89
|
+
return cls(config=config, metadata=metadata)
|
|
90
|
+
|
|
91
|
+
@final
|
|
92
|
+
def _run(self, record: SamplerResponse) -> Either[SamplerResponse]:
|
|
93
|
+
"""
|
|
94
|
+
Main entrypoint for the processor.
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
# We don't always need to process
|
|
98
|
+
if not self.source_config.enableAutoClassification:
|
|
99
|
+
return Either(right=record, left=None)
|
|
100
|
+
|
|
101
|
+
column_tags = []
|
|
102
|
+
|
|
103
|
+
for idx, column in enumerate(record.table.columns):
|
|
104
|
+
try:
|
|
105
|
+
tags = self.create_column_tag_labels(
|
|
106
|
+
column=column,
|
|
107
|
+
sample_data=[row[idx] for row in record.sample_data.data.rows],
|
|
108
|
+
)
|
|
109
|
+
for tag in tags:
|
|
110
|
+
column_tag = ColumnTag(
|
|
111
|
+
column_fqn=column.fullyQualifiedName.root, tag_label=tag
|
|
112
|
+
)
|
|
113
|
+
column_tags.append(column_tag)
|
|
114
|
+
except Exception as err:
|
|
115
|
+
# TODO: Shouldn't we return a Left here?
|
|
116
|
+
self.status.failed(
|
|
117
|
+
StackTraceError(
|
|
118
|
+
name=record.table.fullyQualifiedName.root,
|
|
119
|
+
error=f"Error in Processor {self.name} computing tags for [{column}] - [{err}]",
|
|
120
|
+
stackTrace=traceback.format_exc(),
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
record.column_tags = column_tags
|
|
125
|
+
return Either(right=record, left=None)
|
metadata/pii/constants.py
CHANGED
|
@@ -13,4 +13,12 @@ PII constants
|
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
PII = "PII"
|
|
16
|
+
|
|
17
|
+
# Constants for Presidio
|
|
18
|
+
PRESIDIO_LOGGER = "presidio-analyzer"
|
|
16
19
|
SPACY_EN_MODEL = "en_core_web_md"
|
|
20
|
+
|
|
21
|
+
# Supported language for Presidio.
|
|
22
|
+
# Don't change this unless you know what you are doing.
|
|
23
|
+
# We are doing some tricks to make Presidio work for our use case.
|
|
24
|
+
SUPPORTED_LANG = "en"
|