openmetadata-ingestion 1.6.0.0rc1__py3-none-any.whl → 1.6.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openmetadata-ingestion might be problematic. Click here for more details.

Files changed (791) hide show
  1. metadata/cli/classify.py +52 -0
  2. metadata/cmd.py +9 -0
  3. metadata/data_quality/builders/{i_validator_builder.py → validator_builder.py} +40 -29
  4. metadata/data_quality/interface/pandas/pandas_test_suite_interface.py +25 -25
  5. metadata/data_quality/interface/sqlalchemy/sqa_test_suite_interface.py +23 -50
  6. metadata/data_quality/interface/test_suite_interface.py +45 -56
  7. metadata/data_quality/processor/test_case_runner.py +9 -9
  8. metadata/data_quality/runner/base_test_suite_source.py +52 -26
  9. metadata/data_quality/validations/base_test_handler.py +10 -5
  10. metadata/data_quality/validations/column/sqlalchemy/columnValueLengthsToBeBetween.py +1 -1
  11. metadata/data_quality/validations/column/sqlalchemy/columnValueMaxToBeBetween.py +1 -1
  12. metadata/data_quality/validations/column/sqlalchemy/columnValueMeanToBeBetween.py +1 -1
  13. metadata/data_quality/validations/column/sqlalchemy/columnValueMedianToBeBetween.py +1 -1
  14. metadata/data_quality/validations/column/sqlalchemy/columnValueMinToBeBetween.py +1 -1
  15. metadata/data_quality/validations/column/sqlalchemy/columnValueStdDevToBeBetween.py +1 -1
  16. metadata/data_quality/validations/column/sqlalchemy/columnValuesMissingCount.py +1 -1
  17. metadata/data_quality/validations/column/sqlalchemy/columnValuesSumToBeBetween.py +1 -1
  18. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeAtExpectedLocation.py +1 -1
  19. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeBetween.py +1 -1
  20. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeInSet.py +1 -1
  21. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotInSet.py +1 -1
  22. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotNull.py +1 -1
  23. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeUnique.py +2 -8
  24. metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py +1 -1
  25. metadata/data_quality/validations/column/sqlalchemy/columnValuesToNotMatchRegex.py +1 -1
  26. metadata/data_quality/validations/runtime_param_setter/param_setter.py +2 -3
  27. metadata/data_quality/validations/runtime_param_setter/param_setter_factory.py +45 -17
  28. metadata/data_quality/validations/runtime_param_setter/table_diff_params_setter.py +21 -16
  29. metadata/data_quality/validations/table/sqlalchemy/tableDiff.py +21 -18
  30. metadata/data_quality/validations/table/sqlalchemy/tableRowInsertedCountToBeBetween.py +2 -2
  31. metadata/examples/workflows/bigquery_classifier.yaml +56 -0
  32. metadata/examples/workflows/bigquery_profiler.yaml +1 -2
  33. metadata/examples/workflows/db2_profiler.yaml +1 -2
  34. metadata/examples/workflows/dbtcloud.yaml +2 -1
  35. metadata/examples/workflows/{mstr.yaml → microstrategy.yaml} +3 -2
  36. metadata/examples/workflows/mongodb.yaml +4 -0
  37. metadata/examples/workflows/mysql_profiler.yaml +0 -1
  38. metadata/examples/workflows/redshift_classifier.yaml +38 -0
  39. metadata/examples/workflows/redshift_profiler.yaml +2 -3
  40. metadata/generated/antlr/FqnLexer.py +15 -15
  41. metadata/generated/schema/analytics/__init__.py +1 -1
  42. metadata/generated/schema/analytics/basic.py +1 -1
  43. metadata/generated/schema/analytics/reportData.py +1 -1
  44. metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
  45. metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
  46. metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
  47. metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
  48. metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
  49. metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
  50. metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
  51. metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
  52. metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
  53. metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
  54. metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
  55. metadata/generated/schema/api/__init__.py +1 -1
  56. metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
  57. metadata/generated/schema/api/addTagToAssetsRequest.py +1 -1
  58. metadata/generated/schema/api/analytics/__init__.py +1 -1
  59. metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
  60. metadata/generated/schema/api/automations/__init__.py +1 -1
  61. metadata/generated/schema/api/automations/createWorkflow.py +1 -1
  62. metadata/generated/schema/api/bulkAssets.py +1 -1
  63. metadata/generated/schema/api/classification/__init__.py +1 -1
  64. metadata/generated/schema/api/classification/createClassification.py +1 -1
  65. metadata/generated/schema/api/classification/createTag.py +1 -1
  66. metadata/generated/schema/api/classification/loadTags.py +1 -1
  67. metadata/generated/schema/api/createBot.py +1 -1
  68. metadata/generated/schema/api/createEventPublisherJob.py +1 -1
  69. metadata/generated/schema/api/createType.py +1 -1
  70. metadata/generated/schema/api/data/__init__.py +1 -1
  71. metadata/generated/schema/api/data/createAPICollection.py +1 -1
  72. metadata/generated/schema/api/data/createAPIEndpoint.py +1 -1
  73. metadata/generated/schema/api/data/createChart.py +1 -1
  74. metadata/generated/schema/api/data/createContainer.py +1 -1
  75. metadata/generated/schema/api/data/createCustomProperty.py +1 -1
  76. metadata/generated/schema/api/data/createDashboard.py +1 -1
  77. metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
  78. metadata/generated/schema/api/data/createDatabase.py +1 -1
  79. metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
  80. metadata/generated/schema/api/data/createGlossary.py +1 -1
  81. metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
  82. metadata/generated/schema/api/data/createMetric.py +1 -1
  83. metadata/generated/schema/api/data/createMlModel.py +1 -1
  84. metadata/generated/schema/api/data/createPipeline.py +1 -1
  85. metadata/generated/schema/api/data/createQuery.py +1 -1
  86. metadata/generated/schema/api/data/createSearchIndex.py +8 -1
  87. metadata/generated/schema/api/data/createStoredProcedure.py +8 -1
  88. metadata/generated/schema/api/data/createTable.py +1 -1
  89. metadata/generated/schema/api/data/createTableProfile.py +1 -1
  90. metadata/generated/schema/api/data/createTopic.py +1 -1
  91. metadata/generated/schema/api/data/loadGlossary.py +1 -1
  92. metadata/generated/schema/api/data/restoreEntity.py +1 -1
  93. metadata/generated/schema/api/dataInsight/__init__.py +1 -1
  94. metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
  95. metadata/generated/schema/api/dataInsight/custom/__init__.py +1 -1
  96. metadata/generated/schema/api/dataInsight/custom/createDataInsightCustomChart.py +1 -1
  97. metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
  98. metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
  99. metadata/generated/schema/api/docStore/__init__.py +1 -1
  100. metadata/generated/schema/api/docStore/createDocument.py +1 -1
  101. metadata/generated/schema/api/domains/__init__.py +1 -1
  102. metadata/generated/schema/api/domains/createDataProduct.py +1 -1
  103. metadata/generated/schema/api/domains/createDomain.py +1 -1
  104. metadata/generated/schema/api/feed/__init__.py +1 -1
  105. metadata/generated/schema/api/feed/closeTask.py +1 -1
  106. metadata/generated/schema/api/feed/createPost.py +1 -1
  107. metadata/generated/schema/api/feed/createSuggestion.py +1 -1
  108. metadata/generated/schema/api/feed/createThread.py +1 -1
  109. metadata/generated/schema/api/feed/resolveTask.py +1 -1
  110. metadata/generated/schema/api/feed/threadCount.py +1 -1
  111. metadata/generated/schema/api/governance/__init__.py +1 -1
  112. metadata/generated/schema/api/governance/createWorkflowDefinition.py +1 -1
  113. metadata/generated/schema/api/governance/createWorkflowInstanceState.py +1 -1
  114. metadata/generated/schema/api/lineage/__init__.py +1 -1
  115. metadata/generated/schema/api/lineage/addLineage.py +1 -1
  116. metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
  117. metadata/generated/schema/api/policies/__init__.py +1 -1
  118. metadata/generated/schema/api/policies/createPolicy.py +1 -1
  119. metadata/generated/schema/api/services/__init__.py +1 -1
  120. metadata/generated/schema/api/services/createApiService.py +1 -1
  121. metadata/generated/schema/api/services/createDashboardService.py +1 -1
  122. metadata/generated/schema/api/services/createDatabaseService.py +1 -1
  123. metadata/generated/schema/api/services/createMessagingService.py +1 -1
  124. metadata/generated/schema/api/services/createMetadataService.py +1 -1
  125. metadata/generated/schema/api/services/createMlModelService.py +1 -1
  126. metadata/generated/schema/api/services/createPipelineService.py +1 -1
  127. metadata/generated/schema/api/services/createSearchService.py +1 -1
  128. metadata/generated/schema/api/services/createStorageService.py +1 -1
  129. metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
  130. metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +1 -1
  131. metadata/generated/schema/api/setOwner.py +1 -1
  132. metadata/generated/schema/api/teams/__init__.py +1 -1
  133. metadata/generated/schema/api/teams/createPersona.py +1 -1
  134. metadata/generated/schema/api/teams/createRole.py +1 -1
  135. metadata/generated/schema/api/teams/createTeam.py +1 -1
  136. metadata/generated/schema/api/teams/createUser.py +1 -1
  137. metadata/generated/schema/api/tests/__init__.py +1 -1
  138. metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
  139. metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
  140. metadata/generated/schema/api/tests/createTestCase.py +1 -1
  141. metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
  142. metadata/generated/schema/api/tests/createTestCaseResult.py +1 -1
  143. metadata/generated/schema/api/tests/createTestDefinition.py +1 -1
  144. metadata/generated/schema/api/tests/createTestSuite.py +1 -1
  145. metadata/generated/schema/api/voteRequest.py +1 -1
  146. metadata/generated/schema/auth/__init__.py +1 -1
  147. metadata/generated/schema/auth/basicAuth.py +1 -1
  148. metadata/generated/schema/auth/basicLoginRequest.py +1 -1
  149. metadata/generated/schema/auth/changePasswordRequest.py +1 -1
  150. metadata/generated/schema/auth/createPersonalToken.py +1 -1
  151. metadata/generated/schema/auth/emailRequest.py +1 -1
  152. metadata/generated/schema/auth/emailVerificationToken.py +1 -1
  153. metadata/generated/schema/auth/generateToken.py +1 -1
  154. metadata/generated/schema/auth/jwtAuth.py +1 -1
  155. metadata/generated/schema/auth/loginRequest.py +1 -1
  156. metadata/generated/schema/auth/logoutRequest.py +1 -1
  157. metadata/generated/schema/auth/passwordResetRequest.py +1 -1
  158. metadata/generated/schema/auth/passwordResetToken.py +1 -1
  159. metadata/generated/schema/auth/personalAccessToken.py +1 -1
  160. metadata/generated/schema/auth/refreshToken.py +1 -1
  161. metadata/generated/schema/auth/registrationRequest.py +1 -1
  162. metadata/generated/schema/auth/revokePersonalToken.py +1 -1
  163. metadata/generated/schema/auth/revokeToken.py +1 -1
  164. metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
  165. metadata/generated/schema/auth/ssoAuth.py +1 -1
  166. metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
  167. metadata/generated/schema/configuration/__init__.py +1 -1
  168. metadata/generated/schema/configuration/appsPrivateConfiguration.py +1 -1
  169. metadata/generated/schema/configuration/assetCertificationSettings.py +1 -1
  170. metadata/generated/schema/configuration/authConfig.py +1 -1
  171. metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
  172. metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
  173. metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
  174. metadata/generated/schema/configuration/dataQualityConfiguration.py +1 -1
  175. metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
  176. metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
  177. metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
  178. metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
  179. metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
  180. metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
  181. metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
  182. metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
  183. metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
  184. metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
  185. metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
  186. metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
  187. metadata/generated/schema/configuration/limitsConfiguration.py +1 -1
  188. metadata/generated/schema/configuration/lineageSettings.py +1 -1
  189. metadata/generated/schema/configuration/loginConfiguration.py +1 -1
  190. metadata/generated/schema/configuration/logoConfiguration.py +1 -1
  191. metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
  192. metadata/generated/schema/configuration/profilerConfiguration.py +1 -1
  193. metadata/generated/schema/configuration/searchSettings.py +1 -1
  194. metadata/generated/schema/configuration/slackAppConfiguration.py +1 -14
  195. metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
  196. metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
  197. metadata/generated/schema/configuration/themeConfiguration.py +1 -1
  198. metadata/generated/schema/configuration/uiThemePreference.py +1 -1
  199. metadata/generated/schema/dataInsight/__init__.py +1 -1
  200. metadata/generated/schema/dataInsight/custom/__init__.py +1 -1
  201. metadata/generated/schema/dataInsight/custom/dataInsightCustomChart.py +1 -1
  202. metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResult.py +1 -1
  203. metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResultList.py +1 -1
  204. metadata/generated/schema/dataInsight/custom/formulaHolder.py +1 -1
  205. metadata/generated/schema/dataInsight/custom/lineChart.py +1 -1
  206. metadata/generated/schema/dataInsight/custom/summaryCard.py +1 -1
  207. metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
  208. metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
  209. metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
  210. metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
  211. metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
  212. metadata/generated/schema/dataInsight/type/__init__.py +1 -1
  213. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
  214. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
  215. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
  216. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
  217. metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
  218. metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
  219. metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
  220. metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
  221. metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
  222. metadata/generated/schema/email/__init__.py +1 -1
  223. metadata/generated/schema/email/emailRequest.py +1 -1
  224. metadata/generated/schema/email/emailTemplate.py +1 -1
  225. metadata/generated/schema/email/emailTemplatePlaceholder.py +1 -1
  226. metadata/generated/schema/email/smtpSettings.py +1 -1
  227. metadata/generated/schema/email/templateValidationReponse.py +1 -1
  228. metadata/generated/schema/entity/__init__.py +1 -1
  229. metadata/generated/schema/entity/applications/__init__.py +1 -1
  230. metadata/generated/schema/entity/applications/app.py +2 -1
  231. metadata/generated/schema/entity/applications/appExtension.py +1 -1
  232. metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
  233. metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
  234. metadata/generated/schema/entity/applications/configuration/applicationConfig.py +4 -2
  235. metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
  236. metadata/generated/schema/entity/applications/configuration/external/automator/__init__.py +1 -1
  237. metadata/generated/schema/entity/applications/configuration/external/automator/addDescriptionAction.py +1 -1
  238. metadata/generated/schema/entity/applications/configuration/external/automator/addDomainAction.py +1 -1
  239. metadata/generated/schema/entity/applications/configuration/external/automator/addOwnerAction.py +1 -1
  240. metadata/generated/schema/entity/applications/configuration/external/automator/addTagsAction.py +1 -1
  241. metadata/generated/schema/entity/applications/configuration/external/automator/addTierAction.py +1 -1
  242. metadata/generated/schema/entity/applications/configuration/external/automator/lineagePropagationAction.py +1 -1
  243. metadata/generated/schema/entity/applications/configuration/external/automator/mlTaggingAction.py +1 -1
  244. metadata/generated/schema/entity/applications/configuration/external/automator/removeDescriptionAction.py +1 -1
  245. metadata/generated/schema/entity/applications/configuration/external/automator/removeDomainAction.py +1 -1
  246. metadata/generated/schema/entity/applications/configuration/external/automator/removeOwnerAction.py +1 -1
  247. metadata/generated/schema/entity/applications/configuration/external/automator/removeTagsAction.py +1 -1
  248. metadata/generated/schema/entity/applications/configuration/external/automator/removeTierAction.py +1 -1
  249. metadata/generated/schema/entity/applications/configuration/external/automatorAppConfig.py +1 -1
  250. metadata/generated/schema/entity/applications/configuration/external/collateAIAppConfig.py +1 -1
  251. metadata/generated/schema/entity/applications/configuration/external/slackAppTokenConfiguration.py +22 -0
  252. metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
  253. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +1 -1
  254. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
  255. metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +1 -1
  256. metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
  257. metadata/generated/schema/entity/applications/configuration/private/external/collateAIAppPrivateConfig.py +1 -1
  258. metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
  259. metadata/generated/schema/entity/applications/jobStatus.py +1 -1
  260. metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
  261. metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
  262. metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +1 -1
  263. metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +1 -1
  264. metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
  265. metadata/generated/schema/entity/automations/__init__.py +1 -1
  266. metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
  267. metadata/generated/schema/entity/automations/workflow.py +1 -1
  268. metadata/generated/schema/entity/bot.py +1 -1
  269. metadata/generated/schema/entity/classification/__init__.py +1 -1
  270. metadata/generated/schema/entity/classification/classification.py +1 -1
  271. metadata/generated/schema/entity/classification/tag.py +1 -1
  272. metadata/generated/schema/entity/data/__init__.py +1 -1
  273. metadata/generated/schema/entity/data/apiCollection.py +1 -1
  274. metadata/generated/schema/entity/data/apiEndpoint.py +1 -1
  275. metadata/generated/schema/entity/data/chart.py +1 -1
  276. metadata/generated/schema/entity/data/container.py +1 -1
  277. metadata/generated/schema/entity/data/dashboard.py +1 -1
  278. metadata/generated/schema/entity/data/dashboardDataModel.py +1 -1
  279. metadata/generated/schema/entity/data/database.py +1 -1
  280. metadata/generated/schema/entity/data/databaseSchema.py +1 -1
  281. metadata/generated/schema/entity/data/glossary.py +1 -1
  282. metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
  283. metadata/generated/schema/entity/data/metric.py +1 -1
  284. metadata/generated/schema/entity/data/mlmodel.py +1 -1
  285. metadata/generated/schema/entity/data/pipeline.py +1 -1
  286. metadata/generated/schema/entity/data/query.py +1 -1
  287. metadata/generated/schema/entity/data/report.py +1 -1
  288. metadata/generated/schema/entity/data/searchIndex.py +13 -1
  289. metadata/generated/schema/entity/data/storedProcedure.py +13 -1
  290. metadata/generated/schema/entity/data/table.py +1 -1
  291. metadata/generated/schema/entity/data/topic.py +1 -1
  292. metadata/generated/schema/entity/docStore/__init__.py +1 -1
  293. metadata/generated/schema/entity/docStore/document.py +1 -1
  294. metadata/generated/schema/entity/domains/__init__.py +1 -1
  295. metadata/generated/schema/entity/domains/dataProduct.py +1 -1
  296. metadata/generated/schema/entity/domains/domain.py +1 -1
  297. metadata/generated/schema/entity/events/__init__.py +1 -1
  298. metadata/generated/schema/entity/events/webhook.py +1 -1
  299. metadata/generated/schema/entity/feed/__init__.py +1 -1
  300. metadata/generated/schema/entity/feed/assets.py +1 -1
  301. metadata/generated/schema/entity/feed/customProperty.py +1 -1
  302. metadata/generated/schema/entity/feed/description.py +1 -1
  303. metadata/generated/schema/entity/feed/domain.py +1 -1
  304. metadata/generated/schema/entity/feed/entityInfo.py +1 -1
  305. metadata/generated/schema/entity/feed/owner.py +1 -1
  306. metadata/generated/schema/entity/feed/suggestion.py +1 -1
  307. metadata/generated/schema/entity/feed/tag.py +1 -1
  308. metadata/generated/schema/entity/feed/testCaseResult.py +1 -1
  309. metadata/generated/schema/entity/feed/thread.py +1 -1
  310. metadata/generated/schema/entity/policies/__init__.py +1 -1
  311. metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
  312. metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
  313. metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
  314. metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
  315. metadata/generated/schema/entity/policies/filters.py +1 -1
  316. metadata/generated/schema/entity/policies/policy.py +1 -1
  317. metadata/generated/schema/entity/services/__init__.py +1 -1
  318. metadata/generated/schema/entity/services/apiService.py +1 -1
  319. metadata/generated/schema/entity/services/connections/__init__.py +1 -1
  320. metadata/generated/schema/entity/services/connections/api/__init__.py +1 -1
  321. metadata/generated/schema/entity/services/connections/api/restConnection.py +1 -1
  322. metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
  323. metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
  324. metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
  325. metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
  326. metadata/generated/schema/entity/services/connections/connectionBasicType.py +1 -1
  327. metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
  328. metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
  329. metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
  330. metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
  331. metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
  332. metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
  333. metadata/generated/schema/entity/services/connections/dashboard/microStrategyConnection.py +67 -0
  334. metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
  335. metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
  336. metadata/generated/schema/entity/services/connections/dashboard/powerBIReportServerConnection.py +1 -1
  337. metadata/generated/schema/entity/services/connections/dashboard/powerbi/__init__.py +1 -1
  338. metadata/generated/schema/entity/services/connections/dashboard/powerbi/azureConfig.py +1 -1
  339. metadata/generated/schema/entity/services/connections/dashboard/powerbi/bucketDetails.py +1 -1
  340. metadata/generated/schema/entity/services/connections/dashboard/powerbi/gcsConfig.py +1 -1
  341. metadata/generated/schema/entity/services/connections/dashboard/powerbi/s3Config.py +1 -1
  342. metadata/generated/schema/entity/services/connections/dashboard/qlikCloudConnection.py +1 -1
  343. metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
  344. metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
  345. metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
  346. metadata/generated/schema/entity/services/connections/dashboard/sigmaConnection.py +1 -1
  347. metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
  348. metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +1 -1
  349. metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
  350. metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
  351. metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +1 -1
  352. metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
  353. metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +1 -1
  354. metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
  355. metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
  356. metadata/generated/schema/entity/services/connections/database/common/azureConfig.py +1 -1
  357. metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
  358. metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
  359. metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
  360. metadata/generated/schema/entity/services/connections/database/common/noConfigAuthenticationTypes.py +1 -1
  361. metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
  362. metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
  363. metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
  364. metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
  365. metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
  366. metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
  367. metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
  368. metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
  369. metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
  370. metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
  371. metadata/generated/schema/entity/services/connections/database/deltalake/__init__.py +1 -1
  372. metadata/generated/schema/entity/services/connections/database/deltalake/metastoreConfig.py +1 -1
  373. metadata/generated/schema/entity/services/connections/database/deltalake/storageConfig.py +1 -1
  374. metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
  375. metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
  376. metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
  377. metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
  378. metadata/generated/schema/entity/services/connections/database/exasolConnection.py +1 -1
  379. metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
  380. metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
  381. metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
  382. metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
  383. metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
  384. metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
  385. metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
  386. metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
  387. metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
  388. metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
  389. metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
  390. metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
  391. metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
  392. metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +6 -2
  393. metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
  394. metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +6 -3
  395. metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
  396. metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +1 -1
  397. metadata/generated/schema/entity/services/connections/database/postgresConnection.py +1 -1
  398. metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
  399. metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
  400. metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
  401. metadata/generated/schema/entity/services/connections/database/sapErpConnection.py +1 -1
  402. metadata/generated/schema/entity/services/connections/database/sapHana/__init__.py +1 -1
  403. metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaHDBConnection.py +1 -1
  404. metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaSQLConnection.py +1 -1
  405. metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +1 -1
  406. metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
  407. metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
  408. metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
  409. metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
  410. metadata/generated/schema/entity/services/connections/database/synapseConnection.py +1 -1
  411. metadata/generated/schema/entity/services/connections/database/teradataConnection.py +1 -1
  412. metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
  413. metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
  414. metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
  415. metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
  416. metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
  417. metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +1 -1
  418. metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
  419. metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
  420. metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +1 -1
  421. metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
  422. metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
  423. metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
  424. metadata/generated/schema/entity/services/connections/metadata/alationSinkConnection.py +1 -1
  425. metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
  426. metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
  427. metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +1 -1
  428. metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
  429. metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
  430. metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
  431. metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
  432. metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
  433. metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
  434. metadata/generated/schema/entity/services/connections/mlmodel/vertexaiConnection.py +1 -1
  435. metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
  436. metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
  437. metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
  438. metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
  439. metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
  440. metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
  441. metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
  442. metadata/generated/schema/entity/services/connections/pipeline/datafactoryConnection.py +1 -1
  443. metadata/generated/schema/entity/services/connections/pipeline/dbtCloudConnection.py +17 -5
  444. metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
  445. metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
  446. metadata/generated/schema/entity/services/connections/pipeline/flinkConnection.py +1 -1
  447. metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
  448. metadata/generated/schema/entity/services/connections/pipeline/kafkaConnectConnection.py +1 -1
  449. metadata/generated/schema/entity/services/connections/pipeline/matillionConnection.py +1 -1
  450. metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
  451. metadata/generated/schema/entity/services/connections/pipeline/openLineageConnection.py +1 -1
  452. metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
  453. metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
  454. metadata/generated/schema/entity/services/connections/pipeline/stitchConnection.py +1 -1
  455. metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
  456. metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
  457. metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
  458. metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
  459. metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
  460. metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
  461. metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
  462. metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
  463. metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
  464. metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
  465. metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
  466. metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
  467. metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
  468. metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
  469. metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
  470. metadata/generated/schema/entity/services/dashboardService.py +4 -4
  471. metadata/generated/schema/entity/services/databaseService.py +1 -1
  472. metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
  473. metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +2 -1
  474. metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
  475. metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
  476. metadata/generated/schema/entity/services/messagingService.py +1 -1
  477. metadata/generated/schema/entity/services/metadataService.py +1 -1
  478. metadata/generated/schema/entity/services/mlmodelService.py +1 -1
  479. metadata/generated/schema/entity/services/pipelineService.py +1 -1
  480. metadata/generated/schema/entity/services/searchService.py +1 -1
  481. metadata/generated/schema/entity/services/serviceType.py +1 -1
  482. metadata/generated/schema/entity/services/storageService.py +1 -1
  483. metadata/generated/schema/entity/teams/__init__.py +1 -1
  484. metadata/generated/schema/entity/teams/persona.py +1 -1
  485. metadata/generated/schema/entity/teams/role.py +1 -1
  486. metadata/generated/schema/entity/teams/team.py +1 -1
  487. metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
  488. metadata/generated/schema/entity/teams/user.py +1 -1
  489. metadata/generated/schema/entity/type.py +1 -1
  490. metadata/generated/schema/entity/utils/__init__.py +1 -1
  491. metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
  492. metadata/generated/schema/entity/utils/servicesCount.py +1 -1
  493. metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
  494. metadata/generated/schema/events/__init__.py +1 -1
  495. metadata/generated/schema/events/alertMetrics.py +1 -1
  496. metadata/generated/schema/events/api/__init__.py +1 -1
  497. metadata/generated/schema/events/api/createEventSubscription.py +1 -1
  498. metadata/generated/schema/events/api/eventSubscriptionDiagnosticInfo.py +1 -1
  499. metadata/generated/schema/events/api/eventsRecord.py +1 -1
  500. metadata/generated/schema/events/api/testEventSubscriptionDestination.py +1 -1
  501. metadata/generated/schema/events/api/typedEvent.py +1 -1
  502. metadata/generated/schema/events/emailAlertConfig.py +1 -1
  503. metadata/generated/schema/events/eventFilterRule.py +1 -1
  504. metadata/generated/schema/events/eventSubscription.py +1 -1
  505. metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
  506. metadata/generated/schema/events/failedEvent.py +1 -1
  507. metadata/generated/schema/events/failedEventResponse.py +1 -1
  508. metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
  509. metadata/generated/schema/events/statusContext.py +1 -1
  510. metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
  511. metadata/generated/schema/events/subscriptionStatus.py +1 -1
  512. metadata/generated/schema/events/testDestinationStatus.py +1 -1
  513. metadata/generated/schema/governance/workflows/__init__.py +1 -1
  514. metadata/generated/schema/governance/workflows/elements/__init__.py +1 -1
  515. metadata/generated/schema/governance/workflows/elements/edge.py +1 -1
  516. metadata/generated/schema/governance/workflows/elements/nodeSubType.py +1 -1
  517. metadata/generated/schema/governance/workflows/elements/nodeType.py +1 -1
  518. metadata/generated/schema/governance/workflows/elements/nodes/__init__.py +1 -1
  519. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/__init__.py +1 -1
  520. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/checkEntityAttributesTask.py +1 -1
  521. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setEntityCertificationTask.py +1 -1
  522. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setGlossaryTermStatusTask.py +1 -1
  523. metadata/generated/schema/governance/workflows/elements/nodes/endEvent/__init__.py +1 -1
  524. metadata/generated/schema/governance/workflows/elements/nodes/endEvent/endEvent.py +1 -1
  525. metadata/generated/schema/governance/workflows/elements/nodes/startEvent/__init__.py +1 -1
  526. metadata/generated/schema/governance/workflows/elements/nodes/startEvent/startEvent.py +1 -1
  527. metadata/generated/schema/governance/workflows/elements/nodes/userTask/__init__.py +1 -1
  528. metadata/generated/schema/governance/workflows/elements/nodes/userTask/userApprovalTask.py +1 -1
  529. metadata/generated/schema/governance/workflows/elements/triggers/__init__.py +1 -1
  530. metadata/generated/schema/governance/workflows/elements/triggers/eventBasedEntityTrigger.py +1 -1
  531. metadata/generated/schema/governance/workflows/elements/triggers/periodicBatchEntityTrigger.py +2 -2
  532. metadata/generated/schema/governance/workflows/workflowDefinition.py +1 -1
  533. metadata/generated/schema/governance/workflows/workflowInstance.py +8 -1
  534. metadata/generated/schema/governance/workflows/workflowInstanceState.py +8 -1
  535. metadata/generated/schema/metadataIngestion/__init__.py +1 -1
  536. metadata/generated/schema/metadataIngestion/apiServiceMetadataPipeline.py +1 -1
  537. metadata/generated/schema/metadataIngestion/application.py +1 -1
  538. metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
  539. metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
  540. metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
  541. metadata/generated/schema/metadataIngestion/databaseServiceAutoClassificationPipeline.py +127 -0
  542. metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
  543. metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -25
  544. metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
  545. metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
  546. metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
  547. metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
  548. metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +1 -1
  549. metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
  550. metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
  551. metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +1 -1
  552. metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +9 -1
  553. metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +9 -1
  554. metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +1 -1
  555. metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
  556. metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
  557. metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
  558. metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
  559. metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +9 -1
  560. metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
  561. metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
  562. metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
  563. metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
  564. metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
  565. metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
  566. metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
  567. metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
  568. metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
  569. metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
  570. metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
  571. metadata/generated/schema/metadataIngestion/workflow.py +3 -1
  572. metadata/generated/schema/monitoring/__init__.py +1 -1
  573. metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
  574. metadata/generated/schema/security/__init__.py +1 -1
  575. metadata/generated/schema/security/client/__init__.py +1 -1
  576. metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
  577. metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
  578. metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
  579. metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
  580. metadata/generated/schema/security/client/oidcClientConfig.py +1 -1
  581. metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
  582. metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
  583. metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
  584. metadata/generated/schema/security/credentials/__init__.py +1 -1
  585. metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
  586. metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
  587. metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
  588. metadata/generated/schema/security/credentials/azureCredentials.py +1 -1
  589. metadata/generated/schema/security/credentials/basicAuth.py +1 -1
  590. metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
  591. metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
  592. metadata/generated/schema/security/credentials/gcpExternalAccount.py +1 -1
  593. metadata/generated/schema/security/credentials/gcpValues.py +1 -1
  594. metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
  595. metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
  596. metadata/generated/schema/security/credentials/gitlabCredentials.py +1 -1
  597. metadata/generated/schema/security/sasl/__init__.py +1 -1
  598. metadata/generated/schema/security/sasl/saslClientConfig.py +1 -1
  599. metadata/generated/schema/security/secrets/__init__.py +1 -1
  600. metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
  601. metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
  602. metadata/generated/schema/security/secrets/secretsManagerProvider.py +1 -1
  603. metadata/generated/schema/security/securityConfiguration.py +1 -1
  604. metadata/generated/schema/security/ssl/__init__.py +1 -1
  605. metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
  606. metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
  607. metadata/generated/schema/settings/__init__.py +1 -1
  608. metadata/generated/schema/settings/settings.py +1 -1
  609. metadata/generated/schema/system/__init__.py +1 -1
  610. metadata/generated/schema/system/entityError.py +1 -1
  611. metadata/generated/schema/system/eventPublisherJob.py +2 -1
  612. metadata/generated/schema/system/indexingError.py +1 -1
  613. metadata/generated/schema/system/limitsResponse.py +1 -1
  614. metadata/generated/schema/system/ui/__init__.py +1 -1
  615. metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
  616. metadata/generated/schema/system/ui/page.py +1 -1
  617. metadata/generated/schema/system/validationResponse.py +1 -1
  618. metadata/generated/schema/tests/__init__.py +1 -1
  619. metadata/generated/schema/tests/assigned.py +1 -1
  620. metadata/generated/schema/tests/basic.py +1 -1
  621. metadata/generated/schema/tests/customMetric.py +1 -1
  622. metadata/generated/schema/tests/dataQualityReport.py +1 -1
  623. metadata/generated/schema/tests/resolved.py +1 -1
  624. metadata/generated/schema/tests/testCase.py +1 -1
  625. metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
  626. metadata/generated/schema/tests/testDefinition.py +1 -1
  627. metadata/generated/schema/tests/testSuite.py +1 -1
  628. metadata/generated/schema/type/__init__.py +1 -1
  629. metadata/generated/schema/type/apiSchema.py +1 -1
  630. metadata/generated/schema/type/assetCertification.py +1 -1
  631. metadata/generated/schema/type/auditLog.py +1 -1
  632. metadata/generated/schema/type/basic.py +1 -1
  633. metadata/generated/schema/type/bulkOperationResult.py +1 -1
  634. metadata/generated/schema/type/changeEvent.py +1 -1
  635. metadata/generated/schema/type/changeEventType.py +1 -1
  636. metadata/generated/schema/type/collectionDescriptor.py +1 -1
  637. metadata/generated/schema/type/csvDocumentation.py +1 -1
  638. metadata/generated/schema/type/csvErrorType.py +1 -1
  639. metadata/generated/schema/type/csvFile.py +1 -1
  640. metadata/generated/schema/type/csvImportResult.py +1 -1
  641. metadata/generated/schema/type/customProperties/__init__.py +1 -1
  642. metadata/generated/schema/type/customProperties/complexTypes.py +1 -1
  643. metadata/generated/schema/type/customProperties/enumConfig.py +1 -1
  644. metadata/generated/schema/type/customProperties/tableConfig.py +1 -1
  645. metadata/generated/schema/type/customProperty.py +1 -1
  646. metadata/generated/schema/type/dailyCount.py +1 -1
  647. metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
  648. metadata/generated/schema/type/entityHierarchy.py +1 -1
  649. metadata/generated/schema/type/entityHistory.py +1 -1
  650. metadata/generated/schema/type/entityLineage.py +1 -1
  651. metadata/generated/schema/type/entityReference.py +1 -1
  652. metadata/generated/schema/type/entityReferenceList.py +1 -1
  653. metadata/generated/schema/type/entityRelationship.py +1 -1
  654. metadata/generated/schema/type/entityUsage.py +1 -1
  655. metadata/generated/schema/type/filterPattern.py +1 -1
  656. metadata/generated/schema/type/function.py +1 -1
  657. metadata/generated/schema/type/include.py +1 -1
  658. metadata/generated/schema/type/jdbcConnection.py +1 -1
  659. metadata/generated/schema/type/lifeCycle.py +1 -1
  660. metadata/generated/schema/type/paging.py +1 -1
  661. metadata/generated/schema/type/profile.py +1 -1
  662. metadata/generated/schema/type/queryParserData.py +1 -1
  663. metadata/generated/schema/type/reaction.py +1 -1
  664. metadata/generated/schema/type/schedule.py +1 -1
  665. metadata/generated/schema/type/schema.py +1 -1
  666. metadata/generated/schema/type/tableQuery.py +1 -1
  667. metadata/generated/schema/type/tableUsageCount.py +1 -1
  668. metadata/generated/schema/type/tagLabel.py +1 -1
  669. metadata/generated/schema/type/usageDetails.py +1 -1
  670. metadata/generated/schema/type/usageRequest.py +1 -1
  671. metadata/generated/schema/type/votes.py +1 -1
  672. metadata/ingestion/api/models.py +9 -5
  673. metadata/ingestion/models/patch_request.py +1 -0
  674. metadata/ingestion/ometa/client.py +13 -8
  675. metadata/ingestion/ometa/models.py +1 -0
  676. metadata/ingestion/ometa/ometa_api.py +6 -3
  677. metadata/ingestion/sink/metadata_rest.py +23 -18
  678. metadata/ingestion/source/dashboard/microstrategy/client.py +252 -0
  679. metadata/ingestion/source/dashboard/{mstr → microstrategy}/connection.py +7 -10
  680. metadata/ingestion/source/dashboard/{mstr → microstrategy}/metadata.py +74 -51
  681. metadata/ingestion/source/dashboard/{mstr → microstrategy}/models.py +9 -3
  682. metadata/ingestion/source/dashboard/microstrategy/service_spec.py +6 -0
  683. metadata/ingestion/source/dashboard/powerbi/client.py +1 -2
  684. metadata/ingestion/source/dashboard/superset/db_source.py +1 -1
  685. metadata/ingestion/source/database/athena/metadata.py +8 -0
  686. metadata/ingestion/source/database/azuresql/service_spec.py +2 -0
  687. metadata/ingestion/source/database/bigquery/profiler/profiler.py +1 -1
  688. metadata/ingestion/source/database/bigquery/service_spec.py +2 -0
  689. metadata/ingestion/source/database/common_nosql_source.py +7 -0
  690. metadata/ingestion/source/database/databricks/metadata.py +1 -3
  691. metadata/ingestion/source/database/databricks/service_spec.py +4 -0
  692. metadata/ingestion/source/database/datalake/clients/azure_blob.py +0 -3
  693. metadata/ingestion/source/database/datalake/clients/base.py +10 -1
  694. metadata/ingestion/source/database/datalake/clients/gcs.py +4 -2
  695. metadata/ingestion/source/database/datalake/clients/s3.py +0 -3
  696. metadata/ingestion/source/database/datalake/service_spec.py +8 -1
  697. metadata/ingestion/source/database/dbt/constants.py +4 -0
  698. metadata/ingestion/source/database/dbt/dbt_config.py +19 -1
  699. metadata/ingestion/source/database/dbt/dbt_service.py +8 -1
  700. metadata/ingestion/source/database/dbt/dbt_utils.py +29 -0
  701. metadata/ingestion/source/database/dbt/metadata.py +70 -1
  702. metadata/ingestion/source/database/dbt/models.py +2 -0
  703. metadata/ingestion/source/database/dynamodb/service_spec.py +4 -1
  704. metadata/ingestion/source/database/mongodb/service_spec.py +4 -1
  705. metadata/ingestion/source/database/mysql/connection.py +5 -0
  706. metadata/ingestion/source/database/mysql/lineage.py +12 -30
  707. metadata/ingestion/source/database/mysql/queries.py +43 -0
  708. metadata/ingestion/source/database/mysql/query_parser.py +46 -0
  709. metadata/ingestion/source/database/mysql/service_spec.py +7 -1
  710. metadata/{data_quality/builders/pandas_validator_builder.py → ingestion/source/database/mysql/usage.py} +9 -14
  711. metadata/ingestion/source/database/sample_data.py +4 -12
  712. metadata/ingestion/source/database/snowflake/connection.py +1 -1
  713. metadata/ingestion/source/database/snowflake/service_spec.py +6 -0
  714. metadata/ingestion/source/database/trino/service_spec.py +2 -0
  715. metadata/ingestion/source/database/unitycatalog/service_spec.py +4 -0
  716. metadata/ingestion/source/pipeline/airflow/connection.py +45 -2
  717. metadata/ingestion/source/pipeline/airflow/metadata.py +35 -25
  718. metadata/ingestion/source/pipeline/dbtcloud/client.py +67 -28
  719. metadata/ingestion/source/pipeline/dbtcloud/connection.py +1 -3
  720. metadata/ingestion/source/pipeline/dbtcloud/models.py +1 -1
  721. metadata/ingestion/source/pipeline/kafkaconnect/client.py +1 -1
  722. metadata/ingestion/source/search/elasticsearch/metadata.py +53 -0
  723. metadata/ingestion/source/search/search_service.py +44 -1
  724. metadata/mixins/pandas/pandas_mixin.py +2 -31
  725. metadata/mixins/sqalchemy/sqa_mixin.py +16 -16
  726. metadata/pii/processor.py +10 -9
  727. metadata/profiler/api/models.py +3 -79
  728. metadata/profiler/config.py +39 -0
  729. metadata/profiler/interface/nosql/profiler_interface.py +1 -26
  730. metadata/profiler/interface/pandas/profiler_interface.py +37 -77
  731. metadata/profiler/interface/profiler_interface.py +10 -282
  732. metadata/profiler/interface/sqlalchemy/bigquery/profiler_interface.py +0 -19
  733. metadata/profiler/interface/sqlalchemy/databricks/profiler_interface.py +2 -17
  734. metadata/profiler/interface/sqlalchemy/db2/profiler_interface.py +1 -1
  735. metadata/profiler/interface/sqlalchemy/mariadb/profiler_interface.py +2 -2
  736. metadata/profiler/interface/sqlalchemy/profiler_interface.py +46 -109
  737. metadata/profiler/interface/sqlalchemy/single_store/profiler_interface.py +2 -2
  738. metadata/profiler/interface/sqlalchemy/snowflake/profiler_interface.py +1 -1
  739. metadata/profiler/interface/sqlalchemy/stored_statistics_profiler.py +6 -8
  740. metadata/profiler/interface/sqlalchemy/trino/profiler_interface.py +2 -2
  741. metadata/profiler/orm/converter/base.py +21 -12
  742. metadata/profiler/orm/functions/table_metric_computer.py +5 -4
  743. metadata/profiler/processor/core.py +5 -58
  744. metadata/profiler/processor/handle_partition.py +0 -48
  745. metadata/profiler/processor/runner.py +111 -35
  746. metadata/profiler/processor/sample_data_handler.py +7 -4
  747. metadata/profiler/source/database/base/profiler_source.py +57 -138
  748. metadata/profiler/source/database/bigquery/profiler_source.py +3 -3
  749. metadata/profiler/source/database/databricks/profiler_source.py +2 -3
  750. metadata/profiler/source/fetcher/config.py +44 -0
  751. metadata/profiler/source/fetcher/fetcher_strategy.py +2 -4
  752. metadata/sampler/config.py +237 -0
  753. metadata/sampler/models.py +106 -0
  754. metadata/{profiler/processor/sampler → sampler}/nosql/sampler.py +44 -12
  755. metadata/sampler/pandas/sampler.py +239 -0
  756. metadata/{utils → sampler}/partition.py +66 -51
  757. metadata/sampler/processor.py +158 -0
  758. metadata/sampler/sampler_interface.py +251 -0
  759. metadata/{profiler/processor/sampler → sampler}/sqlalchemy/azuresql/sampler.py +1 -1
  760. metadata/{profiler/processor/sampler → sampler}/sqlalchemy/bigquery/sampler.py +55 -26
  761. metadata/sampler/sqlalchemy/postgres/sampler.py +91 -0
  762. metadata/{profiler/processor/sampler → sampler}/sqlalchemy/sampler.py +90 -98
  763. metadata/sampler/sqlalchemy/snowflake/sampler.py +95 -0
  764. metadata/{profiler/processor/sampler → sampler}/sqlalchemy/trino/sampler.py +5 -3
  765. metadata/utils/constants.py +63 -0
  766. metadata/utils/helpers.py +1 -2
  767. metadata/utils/logger.py +9 -0
  768. metadata/utils/profiler_utils.py +42 -0
  769. metadata/utils/service_spec/default.py +6 -0
  770. metadata/utils/service_spec/service_spec.py +47 -0
  771. metadata/utils/sqlalchemy_utils.py +1 -1
  772. metadata/utils/ssl_manager.py +39 -1
  773. metadata/workflow/classification.py +53 -0
  774. metadata/workflow/profiler.py +1 -17
  775. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/METADATA +366 -366
  776. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/RECORD +781 -771
  777. metadata/data_quality/builders/sqa_validator_builder.py +0 -25
  778. metadata/data_quality/interface/test_suite_interface_factory.py +0 -158
  779. metadata/data_quality/runner/test_suite_source_factory.py +0 -38
  780. metadata/generated/schema/entity/services/connections/dashboard/mstrConnection.py +0 -54
  781. metadata/ingestion/source/dashboard/mstr/client.py +0 -209
  782. metadata/ingestion/source/dashboard/mstr/service_spec.py +0 -4
  783. metadata/profiler/processor/sampler/pandas/sampler.py +0 -170
  784. metadata/profiler/processor/sampler/sampler_factory.py +0 -100
  785. metadata/profiler/processor/sampler/sampler_interface.py +0 -74
  786. metadata/profiler/processor/sampler/sqlalchemy/snowflake/sampler.py +0 -83
  787. /metadata/ingestion/source/dashboard/{mstr → microstrategy}/__init__.py +0 -0
  788. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/LICENSE +0 -0
  789. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/WHEEL +0 -0
  790. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/entry_points.txt +0 -0
  791. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,158 @@
1
+ # Copyright 2021 Collate
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Data Sampler for the PII Workflow
13
+ """
14
+ import traceback
15
+ from copy import deepcopy
16
+ from typing import Optional, cast
17
+
18
+ from metadata.generated.schema.entity.data.database import Database
19
+ from metadata.generated.schema.entity.data.table import Table
20
+ from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
21
+ from metadata.generated.schema.entity.services.ingestionPipelines.status import (
22
+ StackTraceError,
23
+ )
24
+ from metadata.generated.schema.entity.services.serviceType import ServiceType
25
+ from metadata.generated.schema.metadataIngestion.databaseServiceAutoClassificationPipeline import (
26
+ DatabaseServiceAutoClassificationPipeline,
27
+ )
28
+ from metadata.generated.schema.metadataIngestion.workflow import (
29
+ OpenMetadataWorkflowConfig,
30
+ )
31
+ from metadata.ingestion.api.models import Either
32
+ from metadata.ingestion.api.parser import parse_workflow_config_gracefully
33
+ from metadata.ingestion.api.step import Step
34
+ from metadata.ingestion.api.steps import Processor
35
+ from metadata.ingestion.ometa.ometa_api import OpenMetadata
36
+ from metadata.profiler.api.models import ProfilerProcessorConfig
37
+ from metadata.profiler.source.metadata import ProfilerSourceAndEntity
38
+ from metadata.sampler.config import get_config_for_table
39
+ from metadata.sampler.models import SampleConfig, SampleData, SamplerResponse
40
+ from metadata.sampler.sampler_interface import SamplerInterface
41
+ from metadata.utils.profiler_utils import get_context_entities
42
+ from metadata.utils.service_spec.service_spec import import_sampler_class
43
+
44
+
45
+ class SamplerProcessor(Processor):
46
+ """Use the profiler interface to fetch the sample data"""
47
+
48
+ def __init__(self, config: OpenMetadataWorkflowConfig, metadata: OpenMetadata):
49
+ super().__init__()
50
+
51
+ self.config = config
52
+ self.metadata = metadata
53
+
54
+ self.source_config: DatabaseServiceAutoClassificationPipeline = cast(
55
+ DatabaseServiceAutoClassificationPipeline,
56
+ self.config.source.sourceConfig.config,
57
+ ) # Used to satisfy type checked
58
+ # We still rely on the orm-processor. We should decouple this in the future
59
+ self.profiler_config = ProfilerProcessorConfig.model_validate(
60
+ self.config.processor.model_dump().get("config")
61
+ )
62
+
63
+ self._interface_type: str = config.source.type.lower()
64
+ self.sampler_class = import_sampler_class(
65
+ ServiceType.Database, source_type=self._interface_type
66
+ )
67
+
68
+ @property
69
+ def name(self) -> str:
70
+ return "Sampler"
71
+
72
+ def _run(self, record: ProfilerSourceAndEntity) -> Either[SamplerResponse]:
73
+ """Fetch the sample data and pass it down the pipeline"""
74
+
75
+ try:
76
+ entity = cast(Table, record.entity)
77
+ schema_entity, database_entity, _ = get_context_entities(
78
+ entity=entity, metadata=self.metadata
79
+ )
80
+ service_conn_config = self._copy_service_config(
81
+ self.config, database_entity
82
+ )
83
+
84
+ sampler_interface: SamplerInterface = self.sampler_class.create(
85
+ service_connection_config=service_conn_config,
86
+ ometa_client=self.metadata,
87
+ entity=entity,
88
+ schema_entity=schema_entity,
89
+ database_entity=database_entity,
90
+ table_config=get_config_for_table(entity, self.profiler_config),
91
+ default_sample_config=SampleConfig(
92
+ profile_sample=self.source_config.profileSample,
93
+ profile_sample_type=self.source_config.profileSampleType,
94
+ sampling_method_type=self.source_config.samplingMethodType,
95
+ ),
96
+ default_sample_data_count=self.source_config.sampleDataCount,
97
+ )
98
+ sample_data = SampleData(
99
+ data=sampler_interface.generate_sample_data(),
100
+ store=self.source_config.storeSampleData,
101
+ )
102
+
103
+ return Either(
104
+ right=SamplerResponse(
105
+ table=entity,
106
+ sample_data=sample_data,
107
+ )
108
+ )
109
+
110
+ except Exception as exc:
111
+ return Either(
112
+ left=StackTraceError(
113
+ name=record.entity.fullyQualifiedName.root,
114
+ error=f"Unexpected exception processing entity {record.entity.fullyQualifiedName.root}: {exc}",
115
+ stackTrace=traceback.format_exc(),
116
+ )
117
+ )
118
+
119
+ @classmethod
120
+ def create(
121
+ cls,
122
+ config_dict: dict,
123
+ metadata: OpenMetadata,
124
+ pipeline_name: Optional[str] = None,
125
+ ) -> "Step":
126
+ config = parse_workflow_config_gracefully(config_dict)
127
+ return cls(config=config, metadata=metadata)
128
+
129
+ def _copy_service_config(
130
+ self, config: OpenMetadataWorkflowConfig, database: Database
131
+ ) -> DatabaseConnection:
132
+ """Make a copy of the service config and update the database name
133
+
134
+ Args:
135
+ database (_type_): a database entity
136
+
137
+ Returns:
138
+ DatabaseService.__config__
139
+ """
140
+ config_copy = deepcopy(
141
+ config.source.serviceConnection.root.config # type: ignore
142
+ )
143
+ if hasattr(
144
+ config_copy, # type: ignore
145
+ "supportsDatabase",
146
+ ):
147
+ if hasattr(config_copy, "database"):
148
+ config_copy.database = database.name.root # type: ignore
149
+ if hasattr(config_copy, "catalog"):
150
+ config_copy.catalog = database.name.root # type: ignore
151
+
152
+ # we know we'll only be working with DatabaseConnection, we cast the type to satisfy type checker
153
+ config_copy = cast(DatabaseConnection, config_copy)
154
+
155
+ return config_copy
156
+
157
+ def close(self) -> None:
158
+ """Nothing to close"""
@@ -0,0 +1,251 @@
1
+ # Copyright 2021 Collate
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Interface for sampler
13
+ """
14
+ import traceback
15
+ from abc import ABC, abstractmethod
16
+ from typing import Dict, List, Optional, Set, Union
17
+
18
+ from metadata.generated.schema.entity.data.database import Database
19
+ from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
20
+ from metadata.generated.schema.entity.data.table import (
21
+ ColumnProfilerConfig,
22
+ Table,
23
+ TableData,
24
+ )
25
+ from metadata.generated.schema.entity.services.connections.connectionBasicType import (
26
+ DataStorageConfig,
27
+ )
28
+ from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
29
+ DatalakeConnection,
30
+ )
31
+ from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
32
+ from metadata.ingestion.ometa.ometa_api import OpenMetadata
33
+ from metadata.profiler.api.models import TableConfig
34
+ from metadata.profiler.processor.sample_data_handler import upload_sample_data
35
+ from metadata.sampler.config import (
36
+ get_exclude_columns,
37
+ get_include_columns,
38
+ get_profile_sample_config,
39
+ get_sample_data_count_config,
40
+ get_sample_query,
41
+ )
42
+ from metadata.sampler.models import SampleConfig
43
+ from metadata.sampler.partition import get_partition_details
44
+ from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
45
+ from metadata.utils.execution_time_tracker import calculate_execution_time
46
+ from metadata.utils.logger import sampler_logger
47
+ from metadata.utils.sqa_like_column import SQALikeColumn
48
+ from metadata.utils.ssl_manager import get_ssl_connection
49
+
50
+ logger = sampler_logger()
51
+
52
+
53
+ class SamplerInterface(ABC):
54
+ """Sampler interface
55
+ This should be the entrypoint for computing any metrics that are required downstream for
56
+ data quality, profiling, etc.
57
+ """
58
+
59
+ # pylint: disable=too-many-instance-attributes, too-many-arguments
60
+ def __init__(
61
+ self,
62
+ service_connection_config: Union[DatabaseConnection, DatalakeConnection],
63
+ ometa_client: OpenMetadata,
64
+ entity: Table,
65
+ include_columns: Optional[List[ColumnProfilerConfig]] = None,
66
+ exclude_columns: Optional[List[str]] = None,
67
+ sample_config: SampleConfig = SampleConfig(),
68
+ partition_details: Optional[Dict] = None,
69
+ sample_query: Optional[str] = None,
70
+ storage_config: DataStorageConfig = None,
71
+ sample_data_count: Optional[int] = SAMPLE_DATA_DEFAULT_COUNT,
72
+ **__,
73
+ ):
74
+ self.ometa_client = ometa_client
75
+ self._sample = None
76
+ self._columns: Optional[List[SQALikeColumn]] = None
77
+ self.sample_config = sample_config
78
+
79
+ if not self.sample_config.profile_sample:
80
+ self.sample_config.profile_sample = 100
81
+
82
+ self.entity = entity
83
+ self.include_columns = include_columns
84
+ self.exclude_columns = exclude_columns
85
+ self.sample_query = sample_query
86
+ self.sample_limit = sample_data_count
87
+ self.partition_details = partition_details
88
+ self.storage_config = storage_config
89
+
90
+ self.service_connection_config = service_connection_config
91
+ self.connection = get_ssl_connection(self.service_connection_config)
92
+ self.client = self.get_client()
93
+
94
+ # pylint: disable=too-many-arguments, too-many-locals
95
+ @classmethod
96
+ def create(
97
+ cls,
98
+ service_connection_config: Union[DatabaseConnection, DatalakeConnection],
99
+ ometa_client: OpenMetadata,
100
+ entity: Table,
101
+ schema_entity: DatabaseSchema,
102
+ database_entity: Database,
103
+ table_config: Optional[TableConfig] = None,
104
+ storage_config: Optional[DataStorageConfig] = None,
105
+ default_sample_config: Optional[SampleConfig] = None,
106
+ default_sample_data_count: Optional[int] = SAMPLE_DATA_DEFAULT_COUNT,
107
+ **kwargs,
108
+ ) -> "SamplerInterface":
109
+ """Create sampler"""
110
+
111
+ sample_data_count = get_sample_data_count_config(
112
+ entity=entity,
113
+ schema_entity=schema_entity,
114
+ database_entity=database_entity,
115
+ entity_config=table_config,
116
+ default_sample_data_count=default_sample_data_count,
117
+ )
118
+ sample_config = get_profile_sample_config(
119
+ entity=entity,
120
+ schema_entity=schema_entity,
121
+ database_entity=database_entity,
122
+ entity_config=table_config,
123
+ default_sample_config=default_sample_config,
124
+ )
125
+ sample_query = get_sample_query(entity=entity, entity_config=table_config)
126
+ partition_details = get_partition_details(
127
+ entity=entity, entity_config=table_config
128
+ )
129
+ include_columns = get_include_columns(entity, entity_config=table_config)
130
+ exclude_columns = get_exclude_columns(entity, entity_config=table_config)
131
+
132
+ return cls(
133
+ service_connection_config=service_connection_config,
134
+ ometa_client=ometa_client,
135
+ entity=entity,
136
+ include_columns=include_columns,
137
+ exclude_columns=exclude_columns,
138
+ sample_config=sample_config,
139
+ partition_details=partition_details,
140
+ sample_query=sample_query,
141
+ storage_config=storage_config,
142
+ sample_data_count=sample_data_count,
143
+ **kwargs,
144
+ )
145
+
146
+ @property
147
+ def columns(self) -> List[SQALikeColumn]:
148
+ """
149
+ Return the list of columns to profile
150
+ by skipping the columns to ignore.
151
+ """
152
+
153
+ if self._columns:
154
+ return self._columns
155
+
156
+ if self._get_included_columns():
157
+ self._columns = [
158
+ column
159
+ for column in self.get_columns()
160
+ if column.name in self._get_included_columns()
161
+ ]
162
+
163
+ if not self._get_included_columns():
164
+ self._columns = [
165
+ column
166
+ for column in self._columns or self.get_columns()
167
+ if column.name not in self._get_excluded_columns()
168
+ ]
169
+
170
+ return self._columns
171
+
172
+ def _get_excluded_columns(self) -> Optional[Set[str]]:
173
+ """Get excluded columns for table being profiled"""
174
+ if self.exclude_columns:
175
+ return set(self.exclude_columns)
176
+ return set()
177
+
178
+ def _get_included_columns(self) -> Optional[Set[str]]:
179
+ """Get include columns for table being profiled"""
180
+ if self.include_columns:
181
+ return {include_col.columnName for include_col in self.include_columns}
182
+ return set()
183
+
184
+ @property
185
+ @abstractmethod
186
+ def raw_dataset(self):
187
+ """Table object to run the sampling"""
188
+ raise NotImplementedError
189
+
190
+ @abstractmethod
191
+ def get_client(self):
192
+ """Get client"""
193
+ raise NotImplementedError
194
+
195
+ @abstractmethod
196
+ def _rdn_sample_from_user_query(self):
197
+ """Get random sample from user query"""
198
+ raise NotImplementedError
199
+
200
+ @abstractmethod
201
+ def _fetch_sample_data_from_user_query(self) -> TableData:
202
+ """Fetch sample data from user query"""
203
+ raise NotImplementedError
204
+
205
+ @abstractmethod
206
+ def get_dataset(self, **kwargs):
207
+ """Get random sample"""
208
+ raise NotImplementedError
209
+
210
+ @abstractmethod
211
+ def fetch_sample_data(self, columns: Optional[List[SQALikeColumn]]) -> TableData:
212
+ """Fetch sample data
213
+
214
+ Args:
215
+ columns (Optional[List]): List of columns to fetch
216
+ """
217
+ raise NotImplementedError
218
+
219
+ @abstractmethod
220
+ def get_columns(self) -> List[SQALikeColumn]:
221
+ """get columns"""
222
+ raise NotImplementedError
223
+
224
+ @calculate_execution_time(store=False)
225
+ def generate_sample_data(self) -> Optional[TableData]:
226
+ """Fetch and ingest sample data
227
+
228
+ Returns:
229
+ TableData: sample data
230
+ """
231
+ try:
232
+ logger.debug(
233
+ f"Fetching sample data for {self.entity.fullyQualifiedName.root}..."
234
+ )
235
+ table_data = self.fetch_sample_data(self.columns)
236
+ # Only store the data if configured to do so
237
+ if self.storage_config:
238
+ upload_sample_data(
239
+ data=table_data,
240
+ entity=self.entity,
241
+ sample_storage_config=self.storage_config,
242
+ )
243
+ table_data.rows = table_data.rows[
244
+ : min(SAMPLE_DATA_DEFAULT_COUNT, self.sample_limit)
245
+ ]
246
+ return table_data
247
+
248
+ except Exception as err:
249
+ logger.debug(traceback.format_exc())
250
+ logger.warning(f"Error fetching sample data: {err}")
251
+ raise err
@@ -17,7 +17,7 @@ from typing import List, Optional
17
17
  from sqlalchemy import Column
18
18
 
19
19
  from metadata.generated.schema.entity.data.table import TableData
20
- from metadata.profiler.processor.sampler.sqlalchemy.sampler import SQASampler
20
+ from metadata.sampler.sqlalchemy.sampler import SQASampler
21
21
 
22
22
 
23
23
  class AzureSQLSampler(SQASampler):
@@ -12,15 +12,28 @@
12
12
  Helper module to handle data sampling
13
13
  for the profiler
14
14
  """
15
- from typing import Dict, Optional
15
+ from typing import Dict, Optional, Union
16
16
 
17
17
  from sqlalchemy import Column
18
+ from sqlalchemy import Table as SqaTable
19
+ from sqlalchemy import text
18
20
  from sqlalchemy.orm import Query
19
21
 
20
- from metadata.generated.schema.entity.data.table import ProfileSampleType, TableType
21
- from metadata.profiler.api.models import ProfileSampleConfig
22
- from metadata.profiler.processor.handle_partition import partition_filter_handler
23
- from metadata.profiler.processor.sampler.sqlalchemy.sampler import SQASampler
22
+ from metadata.generated.schema.entity.data.table import (
23
+ ProfileSampleType,
24
+ Table,
25
+ TableType,
26
+ )
27
+ from metadata.generated.schema.entity.services.connections.connectionBasicType import (
28
+ DataStorageConfig,
29
+ )
30
+ from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
31
+ DatalakeConnection,
32
+ )
33
+ from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
34
+ from metadata.ingestion.ometa.ometa_api import OpenMetadata
35
+ from metadata.sampler.models import SampleConfig
36
+ from metadata.sampler.sqlalchemy.sampler import SQASampler
24
37
  from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
25
38
 
26
39
 
@@ -33,23 +46,44 @@ class BigQuerySampler(SQASampler):
33
46
  # pylint: disable=too-many-arguments
34
47
  def __init__(
35
48
  self,
36
- client,
37
- table,
38
- profile_sample_config: Optional[ProfileSampleConfig] = None,
49
+ service_connection_config: Union[DatabaseConnection, DatalakeConnection],
50
+ ometa_client: OpenMetadata,
51
+ entity: Table,
52
+ sample_config: Optional[SampleConfig] = None,
39
53
  partition_details: Optional[Dict] = None,
40
- profile_sample_query: Optional[str] = None,
54
+ sample_query: Optional[str] = None,
55
+ storage_config: DataStorageConfig = None,
41
56
  sample_data_count: Optional[int] = SAMPLE_DATA_DEFAULT_COUNT,
42
57
  table_type: TableType = None,
58
+ **kwargs,
43
59
  ):
44
60
  super().__init__(
45
- client,
46
- table,
47
- profile_sample_config,
48
- partition_details,
49
- profile_sample_query,
50
- sample_data_count,
61
+ service_connection_config=service_connection_config,
62
+ ometa_client=ometa_client,
63
+ entity=entity,
64
+ sample_config=sample_config,
65
+ partition_details=partition_details,
66
+ sample_query=sample_query,
67
+ storage_config=storage_config,
68
+ sample_data_count=sample_data_count,
69
+ **kwargs,
51
70
  )
52
- self.table_type: TableType = table_type
71
+ self.raw_dataset_type: TableType = table_type
72
+
73
+ def set_tablesample(self, selectable: SqaTable):
74
+ """Set the TABLESAMPLE clause for BigQuery
75
+ Args:
76
+ selectable (Table): Table object
77
+ """
78
+ if (
79
+ self.sample_config.profile_sample_type == ProfileSampleType.PERCENTAGE
80
+ and self.raw_dataset_type != TableType.View
81
+ ):
82
+ return selectable.tablesample(
83
+ text(f"{self.sample_config.profile_sample or 100} PERCENT")
84
+ )
85
+
86
+ return selectable
53
87
 
54
88
  def _base_sample_query(self, column: Optional[Column], label=None):
55
89
  """Base query for sampling
@@ -73,25 +107,20 @@ class BigQuerySampler(SQASampler):
73
107
  # FROM sample TABLESAMPLE SYSTEM (n PERCENT)
74
108
  column = Column(column_parts[0], STRUCT)
75
109
  # pylint: disable=protected-access
76
- column._set_parent(self.table.__table__)
110
+ column._set_parent(self.raw_dataset.__table__)
77
111
  # pylint: enable=protected-access
78
112
 
79
113
  return super()._base_sample_query(column, label=label)
80
114
 
81
- @partition_filter_handler(build_sample=True)
82
115
  def get_sample_query(self, *, column=None) -> Query:
83
116
  """get query for sample data"""
84
117
  # TABLESAMPLE SYSTEM is not supported for views
85
118
  if (
86
- self.profile_sample_type == ProfileSampleType.PERCENTAGE
87
- and self.table_type != TableType.View
119
+ self.sample_config.profile_sample_type == ProfileSampleType.PERCENTAGE
120
+ and self.raw_dataset_type != TableType.View
88
121
  ):
89
- return (
90
- self._base_sample_query(column)
91
- .suffix_with(
92
- f"TABLESAMPLE SYSTEM ({self.profile_sample or 100} PERCENT)",
93
- )
94
- .cte(f"{self.table.__tablename__}_sample")
122
+ return self._base_sample_query(column).cte(
123
+ f"{self.raw_dataset.__tablename__}_sample"
95
124
  )
96
125
 
97
126
  return super().get_sample_query(column=column)
@@ -0,0 +1,91 @@
1
+ # Copyright 2021 Collate
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Helper module to handle data sampling for the profiler
13
+ """
14
+ from typing import Dict, Optional, Union
15
+
16
+ from sqlalchemy import Table as SqaTable
17
+ from sqlalchemy import func
18
+ from sqlalchemy.orm import Query
19
+
20
+ from metadata.generated.schema.entity.data.table import ProfileSampleType, Table
21
+ from metadata.generated.schema.entity.services.connections.connectionBasicType import (
22
+ DataStorageConfig,
23
+ )
24
+ from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
25
+ DatalakeConnection,
26
+ )
27
+ from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
28
+ from metadata.ingestion.ometa.ometa_api import OpenMetadata
29
+ from metadata.sampler.models import SampleConfig
30
+ from metadata.sampler.sqlalchemy.sampler import SQASampler
31
+ from metadata.sampler.sqlalchemy.snowflake.sampler import SamplingMethodType
32
+ from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
33
+
34
+
35
+ class PostgresSampler(SQASampler):
36
+ """
37
+ Generates a sample of the data to not
38
+ run the query in the whole table.
39
+ """
40
+
41
+ # pylint: disable=too-many-arguments
42
+ def __init__(
43
+ self,
44
+ service_connection_config: Union[DatabaseConnection, DatalakeConnection],
45
+ ometa_client: OpenMetadata,
46
+ entity: Table,
47
+ sample_config: Optional[SampleConfig] = None,
48
+ partition_details: Optional[Dict] = None,
49
+ sample_query: Optional[str] = None,
50
+ storage_config: DataStorageConfig = None,
51
+ sample_data_count: Optional[int] = SAMPLE_DATA_DEFAULT_COUNT,
52
+ **kwargs,
53
+ ):
54
+ super().__init__(
55
+ service_connection_config=service_connection_config,
56
+ ometa_client=ometa_client,
57
+ entity=entity,
58
+ sample_config=sample_config,
59
+ partition_details=partition_details,
60
+ sample_query=sample_query,
61
+ storage_config=storage_config,
62
+ sample_data_count=sample_data_count,
63
+ **kwargs,
64
+ )
65
+ self.sampling_fn = func.bernoulli
66
+ self.sampling_method_type = SamplingMethodType.BERNOULLI
67
+ if (
68
+ sample_config
69
+ and sample_config.sampling_method_type == SamplingMethodType.SYSTEM
70
+ ):
71
+ self.sampling_fn = func.system
72
+
73
+ def set_tablesample(self, selectable: SqaTable):
74
+ """Set the TABLESAMPLE clause for postgres
75
+ Args:
76
+ selectable (Table): _description_
77
+ """
78
+ if self.sample_config.profile_sample_type == ProfileSampleType.PERCENTAGE:
79
+ return selectable.tablesample(
80
+ self.sampling_fn(self.sample_config.profile_sample or 100)
81
+ )
82
+
83
+ return selectable
84
+
85
+ def get_sample_query(self, *, column=None) -> Query:
86
+ if self.sample_config.profile_sample_type == ProfileSampleType.PERCENTAGE:
87
+ return self._base_sample_query(column).cte(
88
+ f"{self.raw_dataset.__tablename__}_rnd"
89
+ )
90
+
91
+ return super().get_sample_query(column=column)