openmetadata-ingestion 1.7.0.1__py3-none-any.whl → 1.7.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openmetadata-ingestion might be problematic. Click here for more details.

Files changed (780) hide show
  1. metadata/applications/example.py +74 -0
  2. metadata/cli/classify.py +2 -4
  3. metadata/cli/common.py +26 -0
  4. metadata/cli/dataquality.py +2 -4
  5. metadata/cli/ingest.py +2 -4
  6. metadata/cli/profile.py +2 -4
  7. metadata/cli/usage.py +2 -4
  8. metadata/data_quality/source/test_suite.py +11 -1
  9. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeUnique.py +14 -2
  10. metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py +1 -1
  11. metadata/data_quality/validations/models.py +3 -0
  12. metadata/data_quality/validations/runtime_param_setter/base_diff_params_setter.py +120 -0
  13. metadata/data_quality/validations/runtime_param_setter/table_diff_params_setter.py +61 -47
  14. metadata/data_quality/validations/table/sqlalchemy/tableDiff.py +24 -0
  15. metadata/examples/workflows/tableau.yaml +0 -3
  16. metadata/generated/schema/analytics/__init__.py +1 -1
  17. metadata/generated/schema/analytics/basic.py +1 -1
  18. metadata/generated/schema/analytics/reportData.py +1 -1
  19. metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
  20. metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
  21. metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
  22. metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
  23. metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
  24. metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
  25. metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
  26. metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
  27. metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
  28. metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
  29. metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
  30. metadata/generated/schema/api/__init__.py +1 -1
  31. metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
  32. metadata/generated/schema/api/addTagToAssetsRequest.py +1 -1
  33. metadata/generated/schema/api/analytics/__init__.py +1 -1
  34. metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
  35. metadata/generated/schema/api/automations/__init__.py +1 -1
  36. metadata/generated/schema/api/automations/createWorkflow.py +1 -1
  37. metadata/generated/schema/api/bulkAssets.py +1 -1
  38. metadata/generated/schema/api/classification/__init__.py +1 -1
  39. metadata/generated/schema/api/classification/createClassification.py +1 -1
  40. metadata/generated/schema/api/classification/createTag.py +1 -1
  41. metadata/generated/schema/api/classification/loadTags.py +1 -1
  42. metadata/generated/schema/api/createBot.py +1 -1
  43. metadata/generated/schema/api/createEventPublisherJob.py +1 -1
  44. metadata/generated/schema/api/createType.py +1 -1
  45. metadata/generated/schema/api/data/__init__.py +1 -1
  46. metadata/generated/schema/api/data/createAPICollection.py +1 -1
  47. metadata/generated/schema/api/data/createAPIEndpoint.py +1 -1
  48. metadata/generated/schema/api/data/createChart.py +1 -1
  49. metadata/generated/schema/api/data/createContainer.py +1 -1
  50. metadata/generated/schema/api/data/createCustomProperty.py +1 -1
  51. metadata/generated/schema/api/data/createDashboard.py +1 -1
  52. metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
  53. metadata/generated/schema/api/data/createDatabase.py +1 -1
  54. metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
  55. metadata/generated/schema/api/data/createGlossary.py +1 -1
  56. metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
  57. metadata/generated/schema/api/data/createMetric.py +1 -1
  58. metadata/generated/schema/api/data/createMlModel.py +1 -1
  59. metadata/generated/schema/api/data/createPipeline.py +1 -1
  60. metadata/generated/schema/api/data/createQuery.py +1 -1
  61. metadata/generated/schema/api/data/createQueryCostRecord.py +1 -1
  62. metadata/generated/schema/api/data/createSearchIndex.py +1 -1
  63. metadata/generated/schema/api/data/createStoredProcedure.py +1 -1
  64. metadata/generated/schema/api/data/createTable.py +1 -1
  65. metadata/generated/schema/api/data/createTableProfile.py +1 -1
  66. metadata/generated/schema/api/data/createTopic.py +1 -1
  67. metadata/generated/schema/api/data/loadGlossary.py +1 -1
  68. metadata/generated/schema/api/data/restoreEntity.py +1 -1
  69. metadata/generated/schema/api/dataInsight/__init__.py +1 -1
  70. metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
  71. metadata/generated/schema/api/dataInsight/custom/__init__.py +1 -1
  72. metadata/generated/schema/api/dataInsight/custom/createDataInsightCustomChart.py +1 -1
  73. metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
  74. metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
  75. metadata/generated/schema/api/docStore/__init__.py +1 -1
  76. metadata/generated/schema/api/docStore/createDocument.py +1 -1
  77. metadata/generated/schema/api/domains/__init__.py +1 -1
  78. metadata/generated/schema/api/domains/createDataProduct.py +1 -1
  79. metadata/generated/schema/api/domains/createDomain.py +1 -1
  80. metadata/generated/schema/api/feed/__init__.py +1 -1
  81. metadata/generated/schema/api/feed/closeTask.py +1 -1
  82. metadata/generated/schema/api/feed/createPost.py +1 -1
  83. metadata/generated/schema/api/feed/createSuggestion.py +1 -1
  84. metadata/generated/schema/api/feed/createThread.py +1 -1
  85. metadata/generated/schema/api/feed/resolveTask.py +1 -1
  86. metadata/generated/schema/api/feed/threadCount.py +1 -1
  87. metadata/generated/schema/api/governance/__init__.py +1 -1
  88. metadata/generated/schema/api/governance/createWorkflowDefinition.py +1 -1
  89. metadata/generated/schema/api/governance/createWorkflowInstanceState.py +1 -1
  90. metadata/generated/schema/api/lineage/__init__.py +1 -1
  91. metadata/generated/schema/api/lineage/addLineage.py +1 -1
  92. metadata/generated/schema/api/lineage/esLineageData.py +1 -1
  93. metadata/generated/schema/api/lineage/lineageDirection.py +1 -1
  94. metadata/generated/schema/api/lineage/nodeInformation.py +1 -1
  95. metadata/generated/schema/api/lineage/searchLineageRequest.py +1 -1
  96. metadata/generated/schema/api/lineage/searchLineageResult.py +1 -1
  97. metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
  98. metadata/generated/schema/api/policies/__init__.py +1 -1
  99. metadata/generated/schema/api/policies/createPolicy.py +1 -1
  100. metadata/generated/schema/api/search/__init__.py +1 -1
  101. metadata/generated/schema/api/search/previewSearchRequest.py +1 -1
  102. metadata/generated/schema/api/services/__init__.py +1 -1
  103. metadata/generated/schema/api/services/createApiService.py +1 -1
  104. metadata/generated/schema/api/services/createDashboardService.py +1 -1
  105. metadata/generated/schema/api/services/createDatabaseService.py +1 -1
  106. metadata/generated/schema/api/services/createMessagingService.py +1 -1
  107. metadata/generated/schema/api/services/createMetadataService.py +1 -1
  108. metadata/generated/schema/api/services/createMlModelService.py +1 -1
  109. metadata/generated/schema/api/services/createPipelineService.py +1 -1
  110. metadata/generated/schema/api/services/createSearchService.py +1 -1
  111. metadata/generated/schema/api/services/createStorageService.py +1 -1
  112. metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
  113. metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +8 -1
  114. metadata/generated/schema/api/setOwner.py +1 -1
  115. metadata/generated/schema/api/teams/__init__.py +1 -1
  116. metadata/generated/schema/api/teams/createPersona.py +1 -1
  117. metadata/generated/schema/api/teams/createRole.py +1 -1
  118. metadata/generated/schema/api/teams/createTeam.py +1 -1
  119. metadata/generated/schema/api/teams/createUser.py +1 -1
  120. metadata/generated/schema/api/tests/__init__.py +1 -1
  121. metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
  122. metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
  123. metadata/generated/schema/api/tests/createTestCase.py +1 -1
  124. metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
  125. metadata/generated/schema/api/tests/createTestCaseResult.py +1 -1
  126. metadata/generated/schema/api/tests/createTestDefinition.py +1 -1
  127. metadata/generated/schema/api/tests/createTestSuite.py +1 -1
  128. metadata/generated/schema/api/voteRequest.py +1 -1
  129. metadata/generated/schema/auth/__init__.py +1 -1
  130. metadata/generated/schema/auth/basicAuth.py +1 -1
  131. metadata/generated/schema/auth/basicLoginRequest.py +1 -1
  132. metadata/generated/schema/auth/changePasswordRequest.py +1 -1
  133. metadata/generated/schema/auth/createPersonalToken.py +1 -1
  134. metadata/generated/schema/auth/emailRequest.py +1 -1
  135. metadata/generated/schema/auth/emailVerificationToken.py +1 -1
  136. metadata/generated/schema/auth/generateToken.py +1 -1
  137. metadata/generated/schema/auth/jwtAuth.py +1 -1
  138. metadata/generated/schema/auth/loginRequest.py +1 -1
  139. metadata/generated/schema/auth/logoutRequest.py +1 -1
  140. metadata/generated/schema/auth/passwordResetRequest.py +1 -1
  141. metadata/generated/schema/auth/passwordResetToken.py +1 -1
  142. metadata/generated/schema/auth/personalAccessToken.py +1 -1
  143. metadata/generated/schema/auth/refreshToken.py +1 -1
  144. metadata/generated/schema/auth/registrationRequest.py +1 -1
  145. metadata/generated/schema/auth/revokePersonalToken.py +1 -1
  146. metadata/generated/schema/auth/revokeToken.py +1 -1
  147. metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
  148. metadata/generated/schema/auth/ssoAuth.py +1 -1
  149. metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
  150. metadata/generated/schema/configuration/__init__.py +1 -1
  151. metadata/generated/schema/configuration/appsPrivateConfiguration.py +1 -1
  152. metadata/generated/schema/configuration/assetCertificationSettings.py +1 -1
  153. metadata/generated/schema/configuration/authConfig.py +1 -1
  154. metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
  155. metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
  156. metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
  157. metadata/generated/schema/configuration/dataQualityConfiguration.py +1 -1
  158. metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
  159. metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
  160. metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
  161. metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
  162. metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
  163. metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
  164. metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
  165. metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
  166. metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
  167. metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
  168. metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
  169. metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
  170. metadata/generated/schema/configuration/limitsConfiguration.py +1 -1
  171. metadata/generated/schema/configuration/lineageSettings.py +1 -1
  172. metadata/generated/schema/configuration/loginConfiguration.py +1 -1
  173. metadata/generated/schema/configuration/logoConfiguration.py +1 -1
  174. metadata/generated/schema/configuration/openMetadataBaseUrlConfiguration.py +3 -2
  175. metadata/generated/schema/configuration/opertionalConfiguration.py +1 -1
  176. metadata/generated/schema/configuration/opsConfig.py +1 -1
  177. metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
  178. metadata/generated/schema/configuration/profilerConfiguration.py +1 -1
  179. metadata/generated/schema/configuration/searchSettings.py +1 -1
  180. metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
  181. metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
  182. metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
  183. metadata/generated/schema/configuration/themeConfiguration.py +1 -1
  184. metadata/generated/schema/configuration/uiThemePreference.py +1 -1
  185. metadata/generated/schema/configuration/workflowSettings.py +3 -3
  186. metadata/generated/schema/dataInsight/__init__.py +1 -1
  187. metadata/generated/schema/dataInsight/custom/__init__.py +1 -1
  188. metadata/generated/schema/dataInsight/custom/dataInsightCustomChart.py +1 -1
  189. metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResult.py +1 -1
  190. metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResultList.py +1 -1
  191. metadata/generated/schema/dataInsight/custom/formulaHolder.py +1 -1
  192. metadata/generated/schema/dataInsight/custom/lineChart.py +1 -1
  193. metadata/generated/schema/dataInsight/custom/summaryCard.py +1 -1
  194. metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
  195. metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
  196. metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
  197. metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
  198. metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
  199. metadata/generated/schema/dataInsight/type/__init__.py +1 -1
  200. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
  201. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
  202. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
  203. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
  204. metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
  205. metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
  206. metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
  207. metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
  208. metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
  209. metadata/generated/schema/email/__init__.py +1 -1
  210. metadata/generated/schema/email/emailRequest.py +1 -1
  211. metadata/generated/schema/email/emailTemplate.py +1 -1
  212. metadata/generated/schema/email/emailTemplatePlaceholder.py +1 -1
  213. metadata/generated/schema/email/smtpSettings.py +1 -1
  214. metadata/generated/schema/email/templateValidationReponse.py +1 -1
  215. metadata/generated/schema/entity/__init__.py +1 -1
  216. metadata/generated/schema/entity/applications/__init__.py +1 -1
  217. metadata/generated/schema/entity/applications/app.py +4 -3
  218. metadata/generated/schema/entity/applications/appExtension.py +1 -1
  219. metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
  220. metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
  221. metadata/generated/schema/entity/applications/configuration/applicationConfig.py +10 -4
  222. metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
  223. metadata/generated/schema/entity/applications/configuration/external/automator/__init__.py +1 -1
  224. metadata/generated/schema/entity/applications/configuration/external/automator/addCustomProperties.py +1 -1
  225. metadata/generated/schema/entity/applications/configuration/external/automator/addDataProductAction.py +1 -1
  226. metadata/generated/schema/entity/applications/configuration/external/automator/addDescriptionAction.py +1 -1
  227. metadata/generated/schema/entity/applications/configuration/external/automator/addDomainAction.py +1 -1
  228. metadata/generated/schema/entity/applications/configuration/external/automator/addOwnerAction.py +1 -1
  229. metadata/generated/schema/entity/applications/configuration/external/automator/addTagsAction.py +1 -1
  230. metadata/generated/schema/entity/applications/configuration/external/automator/addTestCaseAction.py +1 -1
  231. metadata/generated/schema/entity/applications/configuration/external/automator/addTierAction.py +1 -1
  232. metadata/generated/schema/entity/applications/configuration/external/automator/lineagePropagationAction.py +1 -1
  233. metadata/generated/schema/entity/applications/configuration/external/automator/mlTaggingAction.py +1 -1
  234. metadata/generated/schema/entity/applications/configuration/external/automator/removeCustomPropertiesAction.py +1 -1
  235. metadata/generated/schema/entity/applications/configuration/external/automator/removeDataProductAction.py +1 -1
  236. metadata/generated/schema/entity/applications/configuration/external/automator/removeDescriptionAction.py +1 -1
  237. metadata/generated/schema/entity/applications/configuration/external/automator/removeDomainAction.py +1 -1
  238. metadata/generated/schema/entity/applications/configuration/external/automator/removeOwnerAction.py +1 -1
  239. metadata/generated/schema/entity/applications/configuration/external/automator/removeTagsAction.py +1 -1
  240. metadata/generated/schema/entity/applications/configuration/external/automator/removeTestCaseAction.py +1 -1
  241. metadata/generated/schema/entity/applications/configuration/external/automator/removeTierAction.py +1 -1
  242. metadata/generated/schema/entity/applications/configuration/external/automatorAppConfig.py +1 -1
  243. metadata/generated/schema/entity/applications/configuration/external/collateAIAppConfig.py +1 -1
  244. metadata/generated/schema/entity/applications/configuration/external/slackAppTokenConfiguration.py +1 -1
  245. metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
  246. metadata/generated/schema/entity/applications/configuration/internal/autoPilotAppConfig.py +1 -1
  247. metadata/generated/schema/entity/applications/configuration/internal/collateAIQualityAgentAppConfig.py +1 -1
  248. metadata/generated/schema/entity/applications/configuration/internal/collateAITierAgentAppConfig.py +1 -1
  249. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +3 -1
  250. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
  251. metadata/generated/schema/entity/applications/configuration/internal/dataRetentionConfiguration.py +1 -1
  252. metadata/generated/schema/entity/applications/configuration/internal/helloPipelinesConfiguration.py +18 -0
  253. metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +1 -1
  254. metadata/generated/schema/entity/applications/configuration/private/__init__.py +1 -1
  255. metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
  256. metadata/generated/schema/entity/applications/configuration/private/external/collateAIAppPrivateConfig.py +1 -1
  257. metadata/generated/schema/entity/applications/configuration/private/internal/__init__.py +1 -1
  258. metadata/generated/schema/entity/applications/configuration/private/internal/collateAITierAgentAppPrivateConfig.py +1 -1
  259. metadata/generated/schema/entity/applications/configuration/private/limits.py +1 -1
  260. metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
  261. metadata/generated/schema/entity/applications/jobStatus.py +1 -1
  262. metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
  263. metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
  264. metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +5 -2
  265. metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +12 -2
  266. metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
  267. metadata/generated/schema/entity/automations/__init__.py +1 -1
  268. metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
  269. metadata/generated/schema/entity/automations/workflow.py +1 -1
  270. metadata/generated/schema/entity/bot.py +1 -1
  271. metadata/generated/schema/entity/classification/__init__.py +1 -1
  272. metadata/generated/schema/entity/classification/classification.py +1 -1
  273. metadata/generated/schema/entity/classification/tag.py +1 -1
  274. metadata/generated/schema/entity/data/__init__.py +1 -1
  275. metadata/generated/schema/entity/data/apiCollection.py +1 -1
  276. metadata/generated/schema/entity/data/apiEndpoint.py +1 -1
  277. metadata/generated/schema/entity/data/chart.py +1 -1
  278. metadata/generated/schema/entity/data/container.py +1 -1
  279. metadata/generated/schema/entity/data/dashboard.py +1 -1
  280. metadata/generated/schema/entity/data/dashboardDataModel.py +2 -1
  281. metadata/generated/schema/entity/data/database.py +5 -1
  282. metadata/generated/schema/entity/data/databaseSchema.py +5 -1
  283. metadata/generated/schema/entity/data/glossary.py +1 -1
  284. metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
  285. metadata/generated/schema/entity/data/metric.py +1 -1
  286. metadata/generated/schema/entity/data/mlmodel.py +1 -1
  287. metadata/generated/schema/entity/data/pipeline.py +1 -1
  288. metadata/generated/schema/entity/data/query.py +1 -1
  289. metadata/generated/schema/entity/data/queryCostRecord.py +1 -1
  290. metadata/generated/schema/entity/data/queryCostSearchResult.py +1 -1
  291. metadata/generated/schema/entity/data/report.py +1 -1
  292. metadata/generated/schema/entity/data/searchIndex.py +1 -1
  293. metadata/generated/schema/entity/data/storedProcedure.py +1 -1
  294. metadata/generated/schema/entity/data/table.py +1 -1
  295. metadata/generated/schema/entity/data/topic.py +1 -1
  296. metadata/generated/schema/entity/docStore/__init__.py +1 -1
  297. metadata/generated/schema/entity/docStore/document.py +1 -1
  298. metadata/generated/schema/entity/domains/__init__.py +1 -1
  299. metadata/generated/schema/entity/domains/dataProduct.py +1 -1
  300. metadata/generated/schema/entity/domains/domain.py +1 -1
  301. metadata/generated/schema/entity/events/__init__.py +1 -1
  302. metadata/generated/schema/entity/events/webhook.py +1 -1
  303. metadata/generated/schema/entity/feed/__init__.py +1 -1
  304. metadata/generated/schema/entity/feed/assets.py +1 -1
  305. metadata/generated/schema/entity/feed/customProperty.py +1 -1
  306. metadata/generated/schema/entity/feed/description.py +1 -1
  307. metadata/generated/schema/entity/feed/domain.py +1 -1
  308. metadata/generated/schema/entity/feed/entityInfo.py +1 -1
  309. metadata/generated/schema/entity/feed/owner.py +1 -1
  310. metadata/generated/schema/entity/feed/suggestion.py +1 -1
  311. metadata/generated/schema/entity/feed/tag.py +1 -1
  312. metadata/generated/schema/entity/feed/testCaseResult.py +1 -1
  313. metadata/generated/schema/entity/feed/thread.py +1 -1
  314. metadata/generated/schema/entity/policies/__init__.py +1 -1
  315. metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
  316. metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
  317. metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
  318. metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
  319. metadata/generated/schema/entity/policies/filters.py +1 -1
  320. metadata/generated/schema/entity/policies/policy.py +1 -1
  321. metadata/generated/schema/entity/services/__init__.py +1 -1
  322. metadata/generated/schema/entity/services/apiService.py +5 -1
  323. metadata/generated/schema/entity/services/connections/__init__.py +1 -1
  324. metadata/generated/schema/entity/services/connections/api/__init__.py +1 -1
  325. metadata/generated/schema/entity/services/connections/api/restConnection.py +1 -1
  326. metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
  327. metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
  328. metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
  329. metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
  330. metadata/generated/schema/entity/services/connections/connectionBasicType.py +1 -1
  331. metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
  332. metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
  333. metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
  334. metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
  335. metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
  336. metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
  337. metadata/generated/schema/entity/services/connections/dashboard/microStrategyConnection.py +1 -1
  338. metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
  339. metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
  340. metadata/generated/schema/entity/services/connections/dashboard/powerBIReportServerConnection.py +1 -1
  341. metadata/generated/schema/entity/services/connections/dashboard/powerbi/__init__.py +1 -1
  342. metadata/generated/schema/entity/services/connections/dashboard/powerbi/azureConfig.py +1 -1
  343. metadata/generated/schema/entity/services/connections/dashboard/powerbi/bucketDetails.py +1 -1
  344. metadata/generated/schema/entity/services/connections/dashboard/powerbi/gcsConfig.py +1 -1
  345. metadata/generated/schema/entity/services/connections/dashboard/powerbi/s3Config.py +1 -1
  346. metadata/generated/schema/entity/services/connections/dashboard/qlikCloudConnection.py +1 -1
  347. metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
  348. metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
  349. metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
  350. metadata/generated/schema/entity/services/connections/dashboard/sigmaConnection.py +1 -1
  351. metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
  352. metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +2 -11
  353. metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
  354. metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
  355. metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +1 -1
  356. metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
  357. metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +1 -1
  358. metadata/generated/schema/entity/services/connections/database/cassandra/__init__.py +1 -1
  359. metadata/generated/schema/entity/services/connections/database/cassandra/cloudConfig.py +1 -1
  360. metadata/generated/schema/entity/services/connections/database/cassandraConnection.py +1 -1
  361. metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
  362. metadata/generated/schema/entity/services/connections/database/cockroachConnection.py +1 -1
  363. metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
  364. metadata/generated/schema/entity/services/connections/database/common/azureConfig.py +1 -1
  365. metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
  366. metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
  367. metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
  368. metadata/generated/schema/entity/services/connections/database/common/noConfigAuthenticationTypes.py +1 -1
  369. metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
  370. metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
  371. metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
  372. metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
  373. metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
  374. metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
  375. metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
  376. metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
  377. metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
  378. metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
  379. metadata/generated/schema/entity/services/connections/database/deltalake/__init__.py +1 -1
  380. metadata/generated/schema/entity/services/connections/database/deltalake/metastoreConfig.py +1 -1
  381. metadata/generated/schema/entity/services/connections/database/deltalake/storageConfig.py +1 -1
  382. metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
  383. metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
  384. metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
  385. metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
  386. metadata/generated/schema/entity/services/connections/database/exasolConnection.py +1 -1
  387. metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
  388. metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
  389. metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
  390. metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
  391. metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
  392. metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
  393. metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
  394. metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
  395. metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
  396. metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
  397. metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
  398. metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
  399. metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
  400. metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +1 -1
  401. metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
  402. metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +9 -1
  403. metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
  404. metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +1 -1
  405. metadata/generated/schema/entity/services/connections/database/postgresConnection.py +1 -1
  406. metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
  407. metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
  408. metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
  409. metadata/generated/schema/entity/services/connections/database/sapErpConnection.py +1 -1
  410. metadata/generated/schema/entity/services/connections/database/sapHana/__init__.py +1 -1
  411. metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaHDBConnection.py +1 -1
  412. metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaSQLConnection.py +1 -1
  413. metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +1 -1
  414. metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
  415. metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
  416. metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
  417. metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
  418. metadata/generated/schema/entity/services/connections/database/synapseConnection.py +1 -1
  419. metadata/generated/schema/entity/services/connections/database/teradataConnection.py +1 -1
  420. metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
  421. metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
  422. metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
  423. metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
  424. metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
  425. metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +1 -1
  426. metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
  427. metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
  428. metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +1 -1
  429. metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
  430. metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
  431. metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
  432. metadata/generated/schema/entity/services/connections/metadata/alationSinkConnection.py +1 -1
  433. metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
  434. metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
  435. metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +1 -1
  436. metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
  437. metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
  438. metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
  439. metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
  440. metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
  441. metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
  442. metadata/generated/schema/entity/services/connections/mlmodel/vertexaiConnection.py +1 -1
  443. metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
  444. metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
  445. metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
  446. metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
  447. metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
  448. metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
  449. metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
  450. metadata/generated/schema/entity/services/connections/pipeline/datafactoryConnection.py +1 -1
  451. metadata/generated/schema/entity/services/connections/pipeline/dbtCloudConnection.py +1 -1
  452. metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
  453. metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
  454. metadata/generated/schema/entity/services/connections/pipeline/flinkConnection.py +1 -1
  455. metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
  456. metadata/generated/schema/entity/services/connections/pipeline/kafkaConnectConnection.py +1 -1
  457. metadata/generated/schema/entity/services/connections/pipeline/matillion/__init__.py +1 -1
  458. metadata/generated/schema/entity/services/connections/pipeline/matillion/matillionETL.py +1 -1
  459. metadata/generated/schema/entity/services/connections/pipeline/matillionConnection.py +1 -1
  460. metadata/generated/schema/entity/services/connections/pipeline/nifi/__init__.py +1 -1
  461. metadata/generated/schema/entity/services/connections/pipeline/nifi/basicAuth.py +1 -1
  462. metadata/generated/schema/entity/services/connections/pipeline/nifi/clientCertificateAuth.py +1 -1
  463. metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
  464. metadata/generated/schema/entity/services/connections/pipeline/openLineageConnection.py +1 -1
  465. metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
  466. metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
  467. metadata/generated/schema/entity/services/connections/pipeline/stitchConnection.py +1 -1
  468. metadata/generated/schema/entity/services/connections/pipeline/wherescapeConnection.py +1 -1
  469. metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
  470. metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
  471. metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
  472. metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
  473. metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
  474. metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
  475. metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
  476. metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
  477. metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
  478. metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
  479. metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
  480. metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
  481. metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
  482. metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
  483. metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
  484. metadata/generated/schema/entity/services/dashboardService.py +5 -1
  485. metadata/generated/schema/entity/services/databaseService.py +5 -1
  486. metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
  487. metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +12 -1
  488. metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
  489. metadata/generated/schema/entity/services/ingestionPipelines/reverseIngestionResponse.py +1 -1
  490. metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
  491. metadata/generated/schema/entity/services/messagingService.py +5 -1
  492. metadata/generated/schema/entity/services/metadataService.py +5 -1
  493. metadata/generated/schema/entity/services/mlmodelService.py +5 -1
  494. metadata/generated/schema/entity/services/pipelineService.py +5 -1
  495. metadata/generated/schema/entity/services/searchService.py +5 -1
  496. metadata/generated/schema/entity/services/serviceType.py +1 -1
  497. metadata/generated/schema/entity/services/storageService.py +5 -1
  498. metadata/generated/schema/entity/teams/__init__.py +1 -1
  499. metadata/generated/schema/entity/teams/persona.py +1 -1
  500. metadata/generated/schema/entity/teams/role.py +1 -1
  501. metadata/generated/schema/entity/teams/team.py +1 -1
  502. metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
  503. metadata/generated/schema/entity/teams/user.py +1 -1
  504. metadata/generated/schema/entity/type.py +1 -1
  505. metadata/generated/schema/entity/utils/__init__.py +1 -1
  506. metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
  507. metadata/generated/schema/entity/utils/servicesCount.py +1 -1
  508. metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
  509. metadata/generated/schema/events/__init__.py +1 -1
  510. metadata/generated/schema/events/alertMetrics.py +1 -1
  511. metadata/generated/schema/events/api/__init__.py +1 -1
  512. metadata/generated/schema/events/api/createEventSubscription.py +1 -1
  513. metadata/generated/schema/events/api/eventSubscriptionDiagnosticInfo.py +1 -1
  514. metadata/generated/schema/events/api/eventsRecord.py +1 -1
  515. metadata/generated/schema/events/api/testEventSubscriptionDestination.py +1 -1
  516. metadata/generated/schema/events/api/typedEvent.py +1 -1
  517. metadata/generated/schema/events/emailAlertConfig.py +1 -1
  518. metadata/generated/schema/events/eventFilterRule.py +1 -1
  519. metadata/generated/schema/events/eventSubscription.py +1 -1
  520. metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
  521. metadata/generated/schema/events/failedEvent.py +1 -1
  522. metadata/generated/schema/events/failedEventResponse.py +1 -1
  523. metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
  524. metadata/generated/schema/events/statusContext.py +1 -1
  525. metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
  526. metadata/generated/schema/events/subscriptionStatus.py +1 -1
  527. metadata/generated/schema/events/testDestinationStatus.py +1 -1
  528. metadata/generated/schema/governance/workflows/__init__.py +1 -1
  529. metadata/generated/schema/governance/workflows/elements/__init__.py +1 -1
  530. metadata/generated/schema/governance/workflows/elements/edge.py +1 -1
  531. metadata/generated/schema/governance/workflows/elements/nodeSubType.py +1 -1
  532. metadata/generated/schema/governance/workflows/elements/nodeType.py +1 -1
  533. metadata/generated/schema/governance/workflows/elements/nodes/__init__.py +1 -1
  534. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/__init__.py +1 -1
  535. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/checkEntityAttributesTask.py +1 -1
  536. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/createAndRunIngestionPipelineTask.py +1 -1
  537. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/runAppTask.py +1 -1
  538. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setEntityCertificationTask.py +1 -1
  539. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setGlossaryTermStatusTask.py +1 -1
  540. metadata/generated/schema/governance/workflows/elements/nodes/endEvent/__init__.py +1 -1
  541. metadata/generated/schema/governance/workflows/elements/nodes/endEvent/endEvent.py +1 -1
  542. metadata/generated/schema/governance/workflows/elements/nodes/gateway/__init__.py +1 -1
  543. metadata/generated/schema/governance/workflows/elements/nodes/gateway/parallelGateway.py +1 -1
  544. metadata/generated/schema/governance/workflows/elements/nodes/startEvent/__init__.py +1 -1
  545. metadata/generated/schema/governance/workflows/elements/nodes/startEvent/startEvent.py +1 -1
  546. metadata/generated/schema/governance/workflows/elements/nodes/userTask/__init__.py +1 -1
  547. metadata/generated/schema/governance/workflows/elements/nodes/userTask/userApprovalTask.py +1 -1
  548. metadata/generated/schema/governance/workflows/elements/triggers/__init__.py +1 -1
  549. metadata/generated/schema/governance/workflows/elements/triggers/eventBasedEntityTrigger.py +1 -1
  550. metadata/generated/schema/governance/workflows/elements/triggers/noOpTrigger.py +1 -1
  551. metadata/generated/schema/governance/workflows/elements/triggers/periodicBatchEntityTrigger.py +1 -1
  552. metadata/generated/schema/governance/workflows/workflowDefinition.py +1 -1
  553. metadata/generated/schema/governance/workflows/workflowInstance.py +1 -1
  554. metadata/generated/schema/governance/workflows/workflowInstanceState.py +1 -1
  555. metadata/generated/schema/jobs/__init__.py +1 -1
  556. metadata/generated/schema/jobs/backgroundJob.py +1 -1
  557. metadata/generated/schema/jobs/enumCleanupArgs.py +1 -1
  558. metadata/generated/schema/metadataIngestion/__init__.py +1 -1
  559. metadata/generated/schema/metadataIngestion/apiServiceMetadataPipeline.py +1 -1
  560. metadata/generated/schema/metadataIngestion/application.py +1 -1
  561. metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
  562. metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
  563. metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
  564. metadata/generated/schema/metadataIngestion/databaseServiceAutoClassificationPipeline.py +1 -1
  565. metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
  566. metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -1
  567. metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
  568. metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
  569. metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
  570. metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
  571. metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +1 -1
  572. metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
  573. metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
  574. metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +1 -1
  575. metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +1 -1
  576. metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +1 -1
  577. metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +1 -1
  578. metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
  579. metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
  580. metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
  581. metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
  582. metadata/generated/schema/metadataIngestion/reverseIngestionPipeline.py +1 -1
  583. metadata/generated/schema/metadataIngestion/reverseingestionconfig/__init__.py +1 -1
  584. metadata/generated/schema/metadataIngestion/reverseingestionconfig/descriptionConfig.py +1 -1
  585. metadata/generated/schema/metadataIngestion/reverseingestionconfig/ownerConfig.py +1 -1
  586. metadata/generated/schema/metadataIngestion/reverseingestionconfig/tagsConfig.py +1 -1
  587. metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
  588. metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
  589. metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
  590. metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
  591. metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
  592. metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
  593. metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
  594. metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
  595. metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
  596. metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
  597. metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
  598. metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
  599. metadata/generated/schema/metadataIngestion/workflow.py +8 -1
  600. metadata/generated/schema/monitoring/__init__.py +1 -1
  601. metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
  602. metadata/generated/schema/search/__init__.py +1 -1
  603. metadata/generated/schema/search/aggregationRequest.py +1 -1
  604. metadata/generated/schema/search/searchRequest.py +1 -1
  605. metadata/generated/schema/security/__init__.py +1 -1
  606. metadata/generated/schema/security/client/__init__.py +1 -1
  607. metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
  608. metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
  609. metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
  610. metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
  611. metadata/generated/schema/security/client/oidcClientConfig.py +4 -1
  612. metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
  613. metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
  614. metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
  615. metadata/generated/schema/security/credentials/__init__.py +1 -1
  616. metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
  617. metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
  618. metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
  619. metadata/generated/schema/security/credentials/azureCredentials.py +1 -1
  620. metadata/generated/schema/security/credentials/basicAuth.py +1 -1
  621. metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
  622. metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
  623. metadata/generated/schema/security/credentials/gcpExternalAccount.py +1 -1
  624. metadata/generated/schema/security/credentials/gcpValues.py +1 -1
  625. metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
  626. metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
  627. metadata/generated/schema/security/credentials/gitlabCredentials.py +1 -1
  628. metadata/generated/schema/security/sasl/__init__.py +1 -1
  629. metadata/generated/schema/security/sasl/saslClientConfig.py +1 -1
  630. metadata/generated/schema/security/secrets/__init__.py +1 -1
  631. metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
  632. metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
  633. metadata/generated/schema/security/secrets/secretsManagerProvider.py +1 -1
  634. metadata/generated/schema/security/securityConfiguration.py +1 -1
  635. metadata/generated/schema/security/ssl/__init__.py +1 -1
  636. metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
  637. metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
  638. metadata/generated/schema/settings/__init__.py +1 -1
  639. metadata/generated/schema/settings/settings.py +1 -1
  640. metadata/generated/schema/system/__init__.py +1 -1
  641. metadata/generated/schema/system/entityError.py +1 -1
  642. metadata/generated/schema/system/eventPublisherJob.py +17 -5
  643. metadata/generated/schema/system/indexingError.py +1 -1
  644. metadata/generated/schema/system/limitsResponse.py +1 -1
  645. metadata/generated/schema/system/ui/__init__.py +1 -1
  646. metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
  647. metadata/generated/schema/system/ui/navigationItem.py +1 -1
  648. metadata/generated/schema/system/ui/page.py +1 -1
  649. metadata/generated/schema/system/ui/tab.py +1 -1
  650. metadata/generated/schema/system/ui/uiCustomization.py +1 -1
  651. metadata/generated/schema/system/validationResponse.py +1 -1
  652. metadata/generated/schema/tests/__init__.py +1 -1
  653. metadata/generated/schema/tests/assigned.py +1 -1
  654. metadata/generated/schema/tests/basic.py +20 -20
  655. metadata/generated/schema/tests/customMetric.py +1 -1
  656. metadata/generated/schema/tests/dataQualityReport.py +1 -1
  657. metadata/generated/schema/tests/resolved.py +1 -1
  658. metadata/generated/schema/tests/testCase.py +1 -1
  659. metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
  660. metadata/generated/schema/tests/testDefinition.py +1 -1
  661. metadata/generated/schema/tests/testSuite.py +1 -1
  662. metadata/generated/schema/type/__init__.py +1 -1
  663. metadata/generated/schema/type/apiSchema.py +1 -1
  664. metadata/generated/schema/type/assetCertification.py +1 -1
  665. metadata/generated/schema/type/auditLog.py +1 -1
  666. metadata/generated/schema/type/basic.py +1 -1
  667. metadata/generated/schema/type/bulkOperationResult.py +1 -1
  668. metadata/generated/schema/type/changeEvent.py +1 -1
  669. metadata/generated/schema/type/changeEventType.py +1 -1
  670. metadata/generated/schema/type/changeSummaryMap.py +1 -1
  671. metadata/generated/schema/type/collectionDescriptor.py +1 -1
  672. metadata/generated/schema/type/csvDocumentation.py +1 -1
  673. metadata/generated/schema/type/csvErrorType.py +1 -1
  674. metadata/generated/schema/type/csvFile.py +1 -1
  675. metadata/generated/schema/type/csvImportResult.py +1 -1
  676. metadata/generated/schema/type/customProperties/__init__.py +1 -1
  677. metadata/generated/schema/type/customProperties/complexTypes.py +1 -1
  678. metadata/generated/schema/type/customProperties/enumConfig.py +1 -1
  679. metadata/generated/schema/type/customProperties/tableConfig.py +1 -1
  680. metadata/generated/schema/type/customProperty.py +1 -1
  681. metadata/generated/schema/type/dailyCount.py +1 -1
  682. metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
  683. metadata/generated/schema/type/entityHierarchy.py +1 -1
  684. metadata/generated/schema/type/entityHistory.py +1 -1
  685. metadata/generated/schema/type/entityLineage.py +1 -1
  686. metadata/generated/schema/type/entityReference.py +1 -1
  687. metadata/generated/schema/type/entityReferenceList.py +1 -1
  688. metadata/generated/schema/type/entityRelationship.py +1 -1
  689. metadata/generated/schema/type/entityUsage.py +1 -1
  690. metadata/generated/schema/type/filterPattern.py +1 -1
  691. metadata/generated/schema/type/function.py +1 -1
  692. metadata/generated/schema/type/include.py +1 -1
  693. metadata/generated/schema/type/jdbcConnection.py +1 -1
  694. metadata/generated/schema/type/lifeCycle.py +1 -1
  695. metadata/generated/schema/type/paging.py +1 -1
  696. metadata/generated/schema/type/profile.py +1 -1
  697. metadata/generated/schema/type/queryParserData.py +1 -1
  698. metadata/generated/schema/type/reaction.py +1 -1
  699. metadata/generated/schema/type/schedule.py +1 -1
  700. metadata/generated/schema/type/schema.py +1 -1
  701. metadata/generated/schema/type/tableQuery.py +1 -1
  702. metadata/generated/schema/type/tableUsageCount.py +1 -1
  703. metadata/generated/schema/type/tagLabel.py +2 -1
  704. metadata/generated/schema/type/usageDetails.py +1 -1
  705. metadata/generated/schema/type/usageRequest.py +1 -1
  706. metadata/generated/schema/type/votes.py +1 -1
  707. metadata/ingestion/api/topology_runner.py +5 -1
  708. metadata/ingestion/models/patch_request.py +71 -3
  709. metadata/ingestion/ometa/mixins/es_mixin.py +11 -5
  710. metadata/ingestion/source/api/rest/metadata.py +15 -2
  711. metadata/ingestion/source/dashboard/powerbi/metadata.py +122 -38
  712. metadata/ingestion/source/dashboard/powerbi/models.py +22 -0
  713. metadata/ingestion/source/dashboard/tableau/client.py +152 -171
  714. metadata/ingestion/source/dashboard/tableau/connection.py +23 -48
  715. metadata/ingestion/source/dashboard/tableau/metadata.py +73 -99
  716. metadata/ingestion/source/dashboard/tableau/models.py +8 -18
  717. metadata/ingestion/source/dashboard/tableau/queries.py +2 -2
  718. metadata/ingestion/source/database/athena/metadata.py +26 -0
  719. metadata/ingestion/source/database/bigquery/connection.py +8 -3
  720. metadata/ingestion/source/database/bigquery/helper.py +8 -6
  721. metadata/ingestion/source/database/bigquery/metadata.py +13 -5
  722. metadata/ingestion/source/database/dbt/metadata.py +30 -17
  723. metadata/ingestion/source/database/life_cycle_query_mixin.py +9 -0
  724. metadata/ingestion/source/database/mysql/connection.py +11 -3
  725. metadata/ingestion/source/database/mysql/lineage.py +4 -4
  726. metadata/ingestion/source/database/mysql/queries.py +29 -0
  727. metadata/ingestion/source/database/mysql/query_parser.py +31 -0
  728. metadata/ingestion/source/database/oracle/queries.py +2 -2
  729. metadata/ingestion/source/database/postgres/metadata.py +3 -1
  730. metadata/ingestion/source/database/postgres/queries.py +7 -0
  731. metadata/ingestion/source/database/postgres/utils.py +28 -19
  732. metadata/ingestion/source/database/snowflake/data_diff/__init__.py +0 -0
  733. metadata/ingestion/source/database/snowflake/data_diff/data_diff.py +37 -0
  734. metadata/ingestion/source/database/snowflake/metadata.py +14 -0
  735. metadata/ingestion/source/database/snowflake/queries.py +11 -7
  736. metadata/ingestion/source/database/snowflake/service_spec.py +4 -0
  737. metadata/ingestion/source/database/snowflake/utils.py +32 -4
  738. metadata/ingestion/source/database/vertica/queries.py +5 -20
  739. metadata/ingestion/source/pipeline/airbyte/constants.py +29 -0
  740. metadata/ingestion/source/pipeline/airbyte/metadata.py +67 -26
  741. metadata/ingestion/source/pipeline/airbyte/utils.py +99 -0
  742. metadata/ingestion/source/pipeline/openlineage/models.py +3 -2
  743. metadata/ingestion/source/pipeline/pipeline_service.py +2 -3
  744. metadata/ingestion/source/storage/s3/metadata.py +7 -8
  745. metadata/pii/algorithms/classifiers.py +180 -0
  746. metadata/pii/algorithms/column_patterns.py +61 -0
  747. metadata/pii/algorithms/feature_extraction.py +154 -0
  748. metadata/pii/algorithms/preprocessing.py +62 -0
  749. metadata/pii/algorithms/presidio_patches.py +45 -0
  750. metadata/pii/algorithms/presidio_utils.py +119 -0
  751. metadata/pii/algorithms/tags.py +111 -0
  752. metadata/pii/algorithms/utils.py +38 -0
  753. metadata/pii/base_processor.py +125 -0
  754. metadata/pii/constants.py +8 -0
  755. metadata/pii/processor.py +42 -138
  756. metadata/profiler/interface/sqlalchemy/profiler_interface.py +66 -36
  757. metadata/profiler/processor/runner.py +29 -6
  758. metadata/profiler/source/database/mssql/profiler_source.py +86 -0
  759. metadata/profiler/source/fetcher/profiler_source_factory.py +13 -0
  760. metadata/readers/dataframe/json.py +5 -1
  761. metadata/readers/dataframe/parquet.py +10 -2
  762. metadata/readers/dataframe/reader_factory.py +8 -0
  763. metadata/sampler/processor.py +1 -1
  764. metadata/sampler/sampler_interface.py +3 -0
  765. metadata/sampler/sqlalchemy/bigquery/sampler.py +2 -63
  766. metadata/sampler/sqlalchemy/sampler.py +32 -40
  767. metadata/utils/datalake/datalake_utils.py +9 -3
  768. metadata/utils/fqn.py +4 -4
  769. metadata/utils/service_spec/default.py +4 -0
  770. metadata/utils/service_spec/service_spec.py +1 -0
  771. metadata/utils/sqa_utils.py +15 -0
  772. metadata/workflow/base.py +8 -1
  773. metadata/workflow/profiler.py +9 -9
  774. metadata/workflow/workflow_status_mixin.py +1 -7
  775. {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/METADATA +420 -414
  776. {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/RECORD +780 -762
  777. {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/LICENSE +0 -0
  778. {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/WHEEL +0 -0
  779. {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/entry_points.txt +0 -0
  780. {openmetadata_ingestion-1.7.0.1.dist-info → openmetadata_ingestion-1.7.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,154 @@
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Extraction of PII features (from text, column names, and data types) to be used
13
+ for the PII classification model.
14
+ """
15
+ import logging
16
+ import re
17
+ from collections import defaultdict
18
+ from typing import DefaultDict, Dict, Iterable, List, Mapping, Optional, Sequence, Set
19
+
20
+ from presidio_analyzer import AnalyzerEngine
21
+
22
+ from metadata.generated.schema.entity.data.table import DataType
23
+ from metadata.pii.algorithms.presidio_patches import PresidioRecognizerResultPatcher
24
+ from metadata.pii.algorithms.tags import PIITag
25
+ from metadata.pii.scanners.ner_scanner import SUPPORTED_LANG
26
+ from metadata.utils.logger import pii_logger
27
+
28
+ logger = pii_logger()
29
+
30
+
31
+ def extract_pii_tags(
32
+ analyzer: AnalyzerEngine,
33
+ texts: Sequence[str],
34
+ context: Optional[List[str]] = None,
35
+ recognizer_result_patcher: Optional[PresidioRecognizerResultPatcher] = None,
36
+ ) -> Dict[PIITag, float]:
37
+ """
38
+ Extract PII entities from a batch of texts.
39
+
40
+ The results are averaged over the batch. In general, the larger the batch,
41
+ the better the results, as some single texts might be noisy or contain
42
+ false positives.
43
+
44
+ Args:
45
+ analyzer (AnalyzerEngine): The analyzer engine to use for PII detection.
46
+ texts (Sequence[str]): A sequence of texts to analyze.
47
+ context (Optional[List[str]]): Optional context to provide to the analyzer.
48
+ This can be used to improve the accuracy of the PII detection.
49
+ For example, keywords extracted from column names.
50
+ recognizer_result_patcher (Optional[PresidioRecognizerResultPatcher]): A function
51
+ that takes a recognizer result and returns a modified result.
52
+ Returns:
53
+ Mapping[PIITag, float]: A mapping of PII entity types to their average scores.
54
+ """
55
+ entity_scores: DefaultDict[PIITag, float] = defaultdict(float)
56
+
57
+ if SUPPORTED_LANG not in analyzer.supported_languages:
58
+ raise ValueError(
59
+ f"The analyzer does not support {SUPPORTED_LANG}, which is required for this function."
60
+ )
61
+
62
+ for text in texts:
63
+ results = analyzer.analyze(
64
+ text, language=SUPPORTED_LANG, context=context, entities=PIITag.values()
65
+ )
66
+ if recognizer_result_patcher is not None:
67
+ results = recognizer_result_patcher(results, text)
68
+
69
+ for result in results:
70
+ try:
71
+ # This should be safe because the analyzer only considers the entities that we passed
72
+ pii_entity = PIITag[result.entity_type]
73
+ entity_scores[pii_entity] += result.score
74
+ except KeyError:
75
+ logging.error(f"Unrecognized PII entity type: {result.entity_type}.")
76
+
77
+ # normalize the scores if the batch is not empty
78
+ if len(texts):
79
+ for entity in entity_scores:
80
+ entity_scores[entity] /= len(texts)
81
+
82
+ return entity_scores
83
+
84
+
85
+ def split_column_name(column_name: str) -> List[str]:
86
+ """
87
+ Split a column name into its components.
88
+ This is used for passing column names to the analyzer as context.
89
+ """
90
+ # Split by common delimiters
91
+ delimiters = ["_", "-", " ", ".", "/"]
92
+ regex_pattern = "|".join(map(re.escape, delimiters))
93
+ return list(re.split(regex_pattern, column_name.lower()))
94
+
95
+
96
+ def extract_pii_from_column_names(
97
+ column_name: str, patterns: Mapping[PIITag, Iterable[re.Pattern[str]]]
98
+ ) -> Set[PIITag]:
99
+ """
100
+ Extract PII entities from a column name using a collection of regex patterns
101
+ for each PII type. This is used to match patterns in column names that might
102
+ indicate the presence of PII data.
103
+
104
+ Example: "user_email" might match the EMAIL_ADDRESS pattern, returning
105
+ a set containing the PII tag PIITag.EMAIL_ADDRESS.
106
+ """
107
+ results: Set[PIITag] = set()
108
+
109
+ for pii_type, pii_type_patterns in patterns.items():
110
+ for pattern in pii_type_patterns:
111
+ if pattern.match(column_name) is not None:
112
+ results.add(pii_type)
113
+ break # No need to check other patterns for this PII type
114
+
115
+ return results
116
+
117
+
118
+ def is_non_pii_datatype(dtype: DataType) -> bool:
119
+ """
120
+ Determine whether a column with the given data type is unlikely
121
+ to contain PII and can be safely excluded from PII detection or scanning.
122
+ """
123
+ non_pii_types = {
124
+ DataType.BOOLEAN,
125
+ DataType.BIT,
126
+ DataType.NULL,
127
+ DataType.ERROR,
128
+ DataType.FIXED,
129
+ DataType.AGGREGATEFUNCTION,
130
+ DataType.HLLSKETCH,
131
+ DataType.QUANTILE_STATE,
132
+ DataType.AGG_STATE,
133
+ DataType.BITMAP,
134
+ DataType.PG_LSN,
135
+ DataType.PG_SNAPSHOT,
136
+ DataType.TXID_SNAPSHOT,
137
+ DataType.TSQUERY,
138
+ DataType.TSVECTOR,
139
+ DataType.UNKNOWN,
140
+ DataType.LOWCARDINALITY,
141
+ DataType.MEASURE_HIDDEN,
142
+ DataType.MEASURE_VISIBLE,
143
+ }
144
+ geo_data_types = {
145
+ DataType.GEOGRAPHY,
146
+ DataType.GEOMETRY,
147
+ DataType.SPATIAL,
148
+ DataType.POINT,
149
+ DataType.POLYGON,
150
+ }
151
+
152
+ excluded_data_types = non_pii_types | geo_data_types
153
+
154
+ return dtype in excluded_data_types
@@ -0,0 +1,62 @@
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Preprocessing functions for the classification tasks.
13
+ """
14
+ import datetime
15
+ import json
16
+ from typing import Any, List, Mapping, Optional, Sequence
17
+
18
+ from metadata.utils.logger import pii_logger
19
+
20
+ logger = pii_logger()
21
+
22
+
23
+ # pylint: disable=too-many-return-statements
24
+ def convert_to_str(value: Any) -> Optional[str]:
25
+ """
26
+ Convert the given value to a string. This is a conversion
27
+ tailored to our use case, not a generic one.
28
+ """
29
+ if isinstance(value, str):
30
+ return value
31
+ if isinstance(value, (int, float, datetime.datetime, datetime.date)):
32
+ # Values we want to convert to string out of the box
33
+ return str(value)
34
+ if isinstance(value, bytes):
35
+ return value.decode("utf-8", errors="ignore")
36
+ if isinstance(value, (Sequence, Mapping)):
37
+ try:
38
+ return json.dumps(value, default=str)
39
+ except (TypeError, ValueError, OverflowError) as e:
40
+ # If the value cannot be serialized to JSON, return None
41
+ logger.warning(f"Failed to convert value to JSON: {e}")
42
+ return None
43
+ if value is None:
44
+ # We want to skip None values, not convert them to "None"
45
+ return None
46
+ return None
47
+
48
+
49
+ def preprocess_values(values: Sequence[Any]) -> List[str]:
50
+ result: List[str] = []
51
+ for value in values:
52
+ converted_value = convert_to_str(value)
53
+ if converted_value is None:
54
+ # Skip None values
55
+ continue
56
+ # skip empty strings
57
+ if not converted_value.strip():
58
+ continue
59
+ # Add the converted value as is, without any further processing
60
+ result.append(converted_value)
61
+
62
+ return result
@@ -0,0 +1,45 @@
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Patch the Presidio recognizer results to make adapt them to specific use cases.
13
+ """
14
+ from typing import List, Protocol, Sequence
15
+
16
+ from presidio_analyzer import RecognizerResult
17
+
18
+
19
+ class PresidioRecognizerResultPatcher(Protocol):
20
+ """
21
+ A protocol for a function that takes a recognizer result and returns a modified result.
22
+ Sometimes we need to patch the recognizer result to make it compatible with our use case.
23
+ For instance, Presidio yields URL false positive with email address.
24
+ """
25
+
26
+ def __call__(
27
+ self, recognizer_results: Sequence[RecognizerResult], text: str
28
+ ) -> Sequence[RecognizerResult]:
29
+ ...
30
+
31
+
32
+ def url_patcher(
33
+ recognizer_results: Sequence[RecognizerResult], text: str
34
+ ) -> Sequence[RecognizerResult]:
35
+ """
36
+ Patch the recognizer result to remove URL false positive with email address.
37
+ """
38
+ patched_result: List[RecognizerResult] = []
39
+ for result in recognizer_results:
40
+ if result.entity_type == "URL":
41
+ if text[: result.start].endswith("@"):
42
+ # probably an email address, skip the URL
43
+ continue
44
+ patched_result.append(result)
45
+ return patched_result
@@ -0,0 +1,119 @@
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Utilities for working with the Presidio Library.
13
+ """
14
+ import inspect
15
+ import logging
16
+ from typing import Iterable, Optional, Type
17
+
18
+ import spacy
19
+ from presidio_analyzer import (
20
+ AnalyzerEngine,
21
+ EntityRecognizer,
22
+ PatternRecognizer,
23
+ predefined_recognizers,
24
+ )
25
+ from presidio_analyzer.nlp_engine import SpacyNlpEngine
26
+ from spacy.cli.download import download # pyright: ignore[reportUnknownVariableType]
27
+
28
+ from metadata.pii.constants import PRESIDIO_LOGGER, SPACY_EN_MODEL, SUPPORTED_LANG
29
+ from metadata.utils.logger import METADATA_LOGGER, pii_logger
30
+
31
+ logger = pii_logger()
32
+
33
+
34
+ def build_analyzer_engine(
35
+ model_name: str = SPACY_EN_MODEL,
36
+ ) -> AnalyzerEngine:
37
+ """
38
+ Build a Presidio analyzer engine for the model_name and tailored to our use case.
39
+
40
+ If the model is not found locally, it will be downloaded.
41
+ """
42
+ _load_spacy_model(model_name)
43
+
44
+ model = {
45
+ "lang_code": SUPPORTED_LANG,
46
+ "model_name": model_name,
47
+ }
48
+
49
+ nlp_engine = SpacyNlpEngine(models=[model])
50
+ analyzer_engine = AnalyzerEngine(
51
+ nlp_engine=nlp_engine, supported_languages=[SUPPORTED_LANG]
52
+ )
53
+ for recognizer in _get_all_pattern_recognizers():
54
+ # Register the recognizer by setting the appropriate language.
55
+ # Presidio recognizers are language-dependent: when analyzing text,
56
+ # Presidio filters recognizers based on the specified language, assuming
57
+ # language-specific patterns (e.g., for country-specific formats).
58
+ # However, our use case involves analyzing structured table data rather than free text,
59
+ # so this language-based approach doesn't always make sense.
60
+ # To fix this, we manually set the recognizer supported language to the one we want.
61
+ recognizer.supported_language = SUPPORTED_LANG
62
+ analyzer_engine.registry.add_recognizer(recognizer)
63
+
64
+ return analyzer_engine
65
+
66
+
67
+ def set_presidio_logger_level(log_level: Optional[int] = None) -> None:
68
+ """
69
+ Set the presidio logger to talk less about internal entities unless we are debugging.
70
+ """
71
+ if log_level is None:
72
+ log_level = (
73
+ logging.INFO
74
+ if logging.getLogger(METADATA_LOGGER).level == logging.DEBUG
75
+ else logging.ERROR
76
+ )
77
+
78
+ logging.getLogger(PRESIDIO_LOGGER).setLevel(log_level)
79
+
80
+
81
+ def _load_spacy_model(model_name: str) -> None:
82
+ """
83
+ Load the spaCy model for the given language.
84
+ If the model is not found locally, it will be downloaded.
85
+ """
86
+
87
+ try:
88
+ _ = spacy.load(model_name)
89
+ except OSError:
90
+
91
+ logger.warning(f"Downloading {model_name} language model for the spaCy")
92
+ download(model_name)
93
+ _ = spacy.load(model_name)
94
+
95
+
96
+ def _get_all_entity_recognizer_classes() -> Iterable[Type[EntityRecognizer]]:
97
+ """
98
+ Iterate over all subclasses of the `EntityRecognizer` exposed
99
+ in the predefined_recognizers module.
100
+ """
101
+ for name in getattr(predefined_recognizers, "__all__", []):
102
+ obj = getattr(predefined_recognizers, name, None)
103
+ if inspect.isclass(obj) and issubclass(obj, EntityRecognizer):
104
+ yield obj
105
+
106
+
107
+ def _get_all_pattern_recognizers() -> Iterable[EntityRecognizer]:
108
+ for cls in _get_all_entity_recognizer_classes():
109
+ if issubclass(cls, PatternRecognizer):
110
+ try:
111
+ # Try to instantiate the recognizer
112
+ yield cls(
113
+ supported_language=SUPPORTED_LANG
114
+ ) # pyright: ignore[reportCallIssue]
115
+ except Exception as e:
116
+ logger.warning(e)
117
+ elif cls == predefined_recognizers.PhoneRecognizer:
118
+ # Not a pattern recognizer, but pretty much the same
119
+ yield predefined_recognizers.PhoneRecognizer()
@@ -0,0 +1,111 @@
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Definition of tags for the PII algorithms.
13
+ These tags currently belong to the layer logic of the algorithms.
14
+ """
15
+ import enum
16
+ from typing import List
17
+
18
+
19
+ class PIISensitivityTag(enum.Enum):
20
+ SENSITIVE = "Sensitive"
21
+ NONSENSITIVE = "NonSensitive"
22
+
23
+
24
+ @enum.unique
25
+ class PIITag(enum.Enum):
26
+ """
27
+ PII Tags (borrowed from Presidio https://microsoft.github.io/presidio/supported_entities/).
28
+ """
29
+
30
+ # Global
31
+ CREDIT_CARD = "CREDIT_CARD"
32
+ CRYPTO = "CRYPTO" # Crypto Wallet Address
33
+ DATE_TIME = "DATE_TIME"
34
+ EMAIL_ADDRESS = "EMAIL_ADDRESS"
35
+ IBAN_CODE = "IBAN_CODE"
36
+ IP_ADDRESS = "IP_ADDRESS"
37
+ NRP = "NRP"
38
+ LOCATION = "LOCATION"
39
+ PERSON = "PERSON"
40
+ PHONE_NUMBER = "PHONE_NUMBER"
41
+ MEDICAL_LICENSE = "MEDICAL_LICENSE"
42
+ URL = "URL"
43
+
44
+ # USA
45
+ US_BANK_NUMBER = "US_BANK_NUMBER"
46
+ US_DRIVER_LICENSE = "US_DRIVER_LICENSE"
47
+ US_ITIN = "US_ITIN"
48
+ US_PASSPORT = "US_PASSPORT"
49
+ US_SSN = "US_SSN"
50
+
51
+ # UK
52
+ UK_NHS = "UK_NHS"
53
+
54
+ # Spain
55
+ ES_NIF = "ES_NIF"
56
+ ES_NIE = "ES_NIE"
57
+
58
+ # Italy
59
+ IT_FISCAL_CODE = "IT_FISCAL_CODE"
60
+ IT_DRIVER_LICENSE = "IT_DRIVER_LICENSE"
61
+ IT_VAT_CODE = "IT_VAT_CODE"
62
+ IT_PASSPORT = "IT_PASSPORT"
63
+ IT_IDENTITY_CARD = "IT_IDENTITY_CARD"
64
+
65
+ # Poland
66
+ PL_PESEL = "PL_PESEL"
67
+
68
+ # Singapore
69
+ SG_NRIC_FIN = "SG_NRIC_FIN"
70
+ SG_UEN = "SG_UEN"
71
+
72
+ # Australia
73
+ AU_ABN = "AU_ABN"
74
+ AU_ACN = "AU_ACN"
75
+ AU_TFN = "AU_TFN"
76
+ AU_MEDICARE = "AU_MEDICARE"
77
+
78
+ # India
79
+ IN_PAN = "IN_PAN"
80
+ IN_AADHAAR = "IN_AADHAAR"
81
+ IN_VEHICLE_REGISTRATION = "IN_VEHICLE_REGISTRATION"
82
+ IN_VOTER = "IN_VOTER"
83
+ IN_PASSPORT = "IN_PASSPORT"
84
+
85
+ # Finland
86
+ FI_PERSONAL_IDENTITY_CODE = "FI_PERSONAL_IDENTITY_CODE"
87
+
88
+ @classmethod
89
+ def values(cls) -> List[str]:
90
+ """
91
+ Get all the values of the enum as a set of strings.
92
+ """
93
+ return [tag.value for tag in cls]
94
+
95
+ def sensitivity(self) -> PIISensitivityTag:
96
+ """
97
+ Get the sensitivity level of the PII tag.
98
+ This map is opinionated and can be changed in the future according to users' needs.
99
+ """
100
+ if self in DEFAULT_NON_PII_SENSITIVE:
101
+ return PIISensitivityTag.NONSENSITIVE
102
+ return PIISensitivityTag.SENSITIVE
103
+
104
+
105
+ DEFAULT_NON_PII_SENSITIVE = (
106
+ PIITag.DATE_TIME,
107
+ PIITag.NRP,
108
+ PIITag.LOCATION,
109
+ PIITag.PHONE_NUMBER,
110
+ PIITag.URL,
111
+ )
@@ -0,0 +1,38 @@
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Utility functions for PII algorithms
13
+ """
14
+ from typing import Mapping, Sequence, TypeVar
15
+
16
+ T = TypeVar("T")
17
+
18
+
19
+ def normalize_scores(scores: Mapping[T, float], tol: float = 0.01) -> Mapping[T, float]:
20
+ """
21
+ Normalize the scores to sum to 1, while ignoring scores below the tolerance.
22
+ Scores must be positive.
23
+ """
24
+ scores = {key: score for key, score in scores.items() if score > tol}
25
+ total = sum(scores.values())
26
+ if total == 0:
27
+ return scores
28
+ return {key: score / total for key, score in scores.items()}
29
+
30
+
31
+ def get_top_classes(scores: Mapping[T, float], n: int, threshold: float) -> Sequence[T]:
32
+ """
33
+ Get the top n scores from the scores mapping that are above the threshold.
34
+ The classes are sorted in descending order of their scores.
35
+ """
36
+ sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
37
+ top_classes = [key for key, score in sorted_scores if score >= threshold]
38
+ return top_classes[:n]
@@ -0,0 +1,125 @@
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Base class for the Auto Classification Processor.
13
+ """
14
+ import traceback
15
+ from abc import ABC, abstractmethod
16
+ from typing import Any, Optional, Sequence, Type, TypeVar, cast, final
17
+
18
+ from metadata.generated.schema.entity.data.table import Column
19
+ from metadata.generated.schema.entity.services.ingestionPipelines.status import (
20
+ StackTraceError,
21
+ )
22
+ from metadata.generated.schema.metadataIngestion.databaseServiceAutoClassificationPipeline import (
23
+ DatabaseServiceAutoClassificationPipeline,
24
+ )
25
+ from metadata.generated.schema.metadataIngestion.workflow import (
26
+ OpenMetadataWorkflowConfig,
27
+ )
28
+ from metadata.generated.schema.type.tagLabel import TagLabel
29
+ from metadata.ingestion.api.models import Either
30
+ from metadata.ingestion.api.parser import parse_workflow_config_gracefully
31
+ from metadata.ingestion.api.steps import Processor
32
+ from metadata.ingestion.models.table_metadata import ColumnTag
33
+ from metadata.ingestion.ometa.ometa_api import OpenMetadata
34
+ from metadata.sampler.models import SamplerResponse
35
+
36
+ C = TypeVar("C", bound="AutoClassificationProcessor")
37
+
38
+
39
+ class AutoClassificationProcessor(Processor, ABC):
40
+ """
41
+ Abstract class for the Auto Classification Processor.
42
+
43
+ Implementations should only provide the logic for creating tags based on sample data,
44
+ and rely on the running part to be handled by the base class.
45
+ """
46
+
47
+ # Some methods are marked as final to prevent overriding in subclasses thus
48
+ # ensuring that the workflow is always run in the same way keeping implementer
49
+ # with the responsibility of *only* implementing the logic for creating tags.
50
+ def __init__(
51
+ self,
52
+ config: OpenMetadataWorkflowConfig,
53
+ metadata: OpenMetadata,
54
+ ):
55
+ super().__init__()
56
+ self.config = config
57
+ self.metadata = metadata
58
+
59
+ # Init and type the source config
60
+ self.source_config: DatabaseServiceAutoClassificationPipeline = cast(
61
+ DatabaseServiceAutoClassificationPipeline,
62
+ self.config.source.sourceConfig.config,
63
+ ) # Used to satisfy type checked
64
+
65
+ @abstractmethod
66
+ def create_column_tag_labels(
67
+ self, column: Column, sample_data: Sequence[Any]
68
+ ) -> Sequence[TagLabel]:
69
+ """
70
+ Create tags for the column based on the sample data.
71
+ """
72
+
73
+ @property
74
+ def name(self) -> str:
75
+ return "Auto Classification Processor"
76
+
77
+ def close(self) -> None:
78
+ """Nothing to close"""
79
+
80
+ @classmethod
81
+ @final
82
+ def create(
83
+ cls: Type[C],
84
+ config_dict: dict,
85
+ metadata: OpenMetadata,
86
+ pipeline_name: Optional[str] = None,
87
+ ) -> C:
88
+ config = parse_workflow_config_gracefully(config_dict)
89
+ return cls(config=config, metadata=metadata)
90
+
91
+ @final
92
+ def _run(self, record: SamplerResponse) -> Either[SamplerResponse]:
93
+ """
94
+ Main entrypoint for the processor.
95
+ """
96
+
97
+ # We don't always need to process
98
+ if not self.source_config.enableAutoClassification:
99
+ return Either(right=record, left=None)
100
+
101
+ column_tags = []
102
+
103
+ for idx, column in enumerate(record.table.columns):
104
+ try:
105
+ tags = self.create_column_tag_labels(
106
+ column=column,
107
+ sample_data=[row[idx] for row in record.sample_data.data.rows],
108
+ )
109
+ for tag in tags:
110
+ column_tag = ColumnTag(
111
+ column_fqn=column.fullyQualifiedName.root, tag_label=tag
112
+ )
113
+ column_tags.append(column_tag)
114
+ except Exception as err:
115
+ # TODO: Shouldn't we return a Left here?
116
+ self.status.failed(
117
+ StackTraceError(
118
+ name=record.table.fullyQualifiedName.root,
119
+ error=f"Error in Processor {self.name} computing tags for [{column}] - [{err}]",
120
+ stackTrace=traceback.format_exc(),
121
+ )
122
+ )
123
+
124
+ record.column_tags = column_tags
125
+ return Either(right=record, left=None)
metadata/pii/constants.py CHANGED
@@ -13,4 +13,12 @@ PII constants
13
13
  """
14
14
 
15
15
  PII = "PII"
16
+
17
+ # Constants for Presidio
18
+ PRESIDIO_LOGGER = "presidio-analyzer"
16
19
  SPACY_EN_MODEL = "en_core_web_md"
20
+
21
+ # Supported language for Presidio.
22
+ # Don't change this unless you know what you are doing.
23
+ # We are doing some tricks to make Presidio work for our use case.
24
+ SUPPORTED_LANG = "en"