openmetadata-ingestion 1.6.0.0rc1__py3-none-any.whl → 1.6.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openmetadata-ingestion might be problematic. Click here for more details.

Files changed (791) hide show
  1. metadata/cli/classify.py +52 -0
  2. metadata/cmd.py +9 -0
  3. metadata/data_quality/builders/{i_validator_builder.py → validator_builder.py} +40 -29
  4. metadata/data_quality/interface/pandas/pandas_test_suite_interface.py +25 -25
  5. metadata/data_quality/interface/sqlalchemy/sqa_test_suite_interface.py +23 -50
  6. metadata/data_quality/interface/test_suite_interface.py +45 -56
  7. metadata/data_quality/processor/test_case_runner.py +9 -9
  8. metadata/data_quality/runner/base_test_suite_source.py +52 -26
  9. metadata/data_quality/validations/base_test_handler.py +10 -5
  10. metadata/data_quality/validations/column/sqlalchemy/columnValueLengthsToBeBetween.py +1 -1
  11. metadata/data_quality/validations/column/sqlalchemy/columnValueMaxToBeBetween.py +1 -1
  12. metadata/data_quality/validations/column/sqlalchemy/columnValueMeanToBeBetween.py +1 -1
  13. metadata/data_quality/validations/column/sqlalchemy/columnValueMedianToBeBetween.py +1 -1
  14. metadata/data_quality/validations/column/sqlalchemy/columnValueMinToBeBetween.py +1 -1
  15. metadata/data_quality/validations/column/sqlalchemy/columnValueStdDevToBeBetween.py +1 -1
  16. metadata/data_quality/validations/column/sqlalchemy/columnValuesMissingCount.py +1 -1
  17. metadata/data_quality/validations/column/sqlalchemy/columnValuesSumToBeBetween.py +1 -1
  18. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeAtExpectedLocation.py +1 -1
  19. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeBetween.py +1 -1
  20. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeInSet.py +1 -1
  21. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotInSet.py +1 -1
  22. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotNull.py +1 -1
  23. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeUnique.py +2 -8
  24. metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py +1 -1
  25. metadata/data_quality/validations/column/sqlalchemy/columnValuesToNotMatchRegex.py +1 -1
  26. metadata/data_quality/validations/runtime_param_setter/param_setter.py +2 -3
  27. metadata/data_quality/validations/runtime_param_setter/param_setter_factory.py +45 -17
  28. metadata/data_quality/validations/runtime_param_setter/table_diff_params_setter.py +21 -16
  29. metadata/data_quality/validations/table/sqlalchemy/tableDiff.py +21 -18
  30. metadata/data_quality/validations/table/sqlalchemy/tableRowInsertedCountToBeBetween.py +2 -2
  31. metadata/examples/workflows/bigquery_classifier.yaml +56 -0
  32. metadata/examples/workflows/bigquery_profiler.yaml +1 -2
  33. metadata/examples/workflows/db2_profiler.yaml +1 -2
  34. metadata/examples/workflows/dbtcloud.yaml +2 -1
  35. metadata/examples/workflows/{mstr.yaml → microstrategy.yaml} +3 -2
  36. metadata/examples/workflows/mongodb.yaml +4 -0
  37. metadata/examples/workflows/mysql_profiler.yaml +0 -1
  38. metadata/examples/workflows/redshift_classifier.yaml +38 -0
  39. metadata/examples/workflows/redshift_profiler.yaml +2 -3
  40. metadata/generated/antlr/FqnLexer.py +15 -15
  41. metadata/generated/schema/analytics/__init__.py +1 -1
  42. metadata/generated/schema/analytics/basic.py +1 -1
  43. metadata/generated/schema/analytics/reportData.py +1 -1
  44. metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
  45. metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
  46. metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
  47. metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
  48. metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
  49. metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
  50. metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
  51. metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
  52. metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
  53. metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
  54. metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
  55. metadata/generated/schema/api/__init__.py +1 -1
  56. metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
  57. metadata/generated/schema/api/addTagToAssetsRequest.py +1 -1
  58. metadata/generated/schema/api/analytics/__init__.py +1 -1
  59. metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
  60. metadata/generated/schema/api/automations/__init__.py +1 -1
  61. metadata/generated/schema/api/automations/createWorkflow.py +1 -1
  62. metadata/generated/schema/api/bulkAssets.py +1 -1
  63. metadata/generated/schema/api/classification/__init__.py +1 -1
  64. metadata/generated/schema/api/classification/createClassification.py +1 -1
  65. metadata/generated/schema/api/classification/createTag.py +1 -1
  66. metadata/generated/schema/api/classification/loadTags.py +1 -1
  67. metadata/generated/schema/api/createBot.py +1 -1
  68. metadata/generated/schema/api/createEventPublisherJob.py +1 -1
  69. metadata/generated/schema/api/createType.py +1 -1
  70. metadata/generated/schema/api/data/__init__.py +1 -1
  71. metadata/generated/schema/api/data/createAPICollection.py +1 -1
  72. metadata/generated/schema/api/data/createAPIEndpoint.py +1 -1
  73. metadata/generated/schema/api/data/createChart.py +1 -1
  74. metadata/generated/schema/api/data/createContainer.py +1 -1
  75. metadata/generated/schema/api/data/createCustomProperty.py +1 -1
  76. metadata/generated/schema/api/data/createDashboard.py +1 -1
  77. metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
  78. metadata/generated/schema/api/data/createDatabase.py +1 -1
  79. metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
  80. metadata/generated/schema/api/data/createGlossary.py +1 -1
  81. metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
  82. metadata/generated/schema/api/data/createMetric.py +1 -1
  83. metadata/generated/schema/api/data/createMlModel.py +1 -1
  84. metadata/generated/schema/api/data/createPipeline.py +1 -1
  85. metadata/generated/schema/api/data/createQuery.py +1 -1
  86. metadata/generated/schema/api/data/createSearchIndex.py +8 -1
  87. metadata/generated/schema/api/data/createStoredProcedure.py +8 -1
  88. metadata/generated/schema/api/data/createTable.py +1 -1
  89. metadata/generated/schema/api/data/createTableProfile.py +1 -1
  90. metadata/generated/schema/api/data/createTopic.py +1 -1
  91. metadata/generated/schema/api/data/loadGlossary.py +1 -1
  92. metadata/generated/schema/api/data/restoreEntity.py +1 -1
  93. metadata/generated/schema/api/dataInsight/__init__.py +1 -1
  94. metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
  95. metadata/generated/schema/api/dataInsight/custom/__init__.py +1 -1
  96. metadata/generated/schema/api/dataInsight/custom/createDataInsightCustomChart.py +1 -1
  97. metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
  98. metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
  99. metadata/generated/schema/api/docStore/__init__.py +1 -1
  100. metadata/generated/schema/api/docStore/createDocument.py +1 -1
  101. metadata/generated/schema/api/domains/__init__.py +1 -1
  102. metadata/generated/schema/api/domains/createDataProduct.py +1 -1
  103. metadata/generated/schema/api/domains/createDomain.py +1 -1
  104. metadata/generated/schema/api/feed/__init__.py +1 -1
  105. metadata/generated/schema/api/feed/closeTask.py +1 -1
  106. metadata/generated/schema/api/feed/createPost.py +1 -1
  107. metadata/generated/schema/api/feed/createSuggestion.py +1 -1
  108. metadata/generated/schema/api/feed/createThread.py +1 -1
  109. metadata/generated/schema/api/feed/resolveTask.py +1 -1
  110. metadata/generated/schema/api/feed/threadCount.py +1 -1
  111. metadata/generated/schema/api/governance/__init__.py +1 -1
  112. metadata/generated/schema/api/governance/createWorkflowDefinition.py +1 -1
  113. metadata/generated/schema/api/governance/createWorkflowInstanceState.py +1 -1
  114. metadata/generated/schema/api/lineage/__init__.py +1 -1
  115. metadata/generated/schema/api/lineage/addLineage.py +1 -1
  116. metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
  117. metadata/generated/schema/api/policies/__init__.py +1 -1
  118. metadata/generated/schema/api/policies/createPolicy.py +1 -1
  119. metadata/generated/schema/api/services/__init__.py +1 -1
  120. metadata/generated/schema/api/services/createApiService.py +1 -1
  121. metadata/generated/schema/api/services/createDashboardService.py +1 -1
  122. metadata/generated/schema/api/services/createDatabaseService.py +1 -1
  123. metadata/generated/schema/api/services/createMessagingService.py +1 -1
  124. metadata/generated/schema/api/services/createMetadataService.py +1 -1
  125. metadata/generated/schema/api/services/createMlModelService.py +1 -1
  126. metadata/generated/schema/api/services/createPipelineService.py +1 -1
  127. metadata/generated/schema/api/services/createSearchService.py +1 -1
  128. metadata/generated/schema/api/services/createStorageService.py +1 -1
  129. metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
  130. metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +1 -1
  131. metadata/generated/schema/api/setOwner.py +1 -1
  132. metadata/generated/schema/api/teams/__init__.py +1 -1
  133. metadata/generated/schema/api/teams/createPersona.py +1 -1
  134. metadata/generated/schema/api/teams/createRole.py +1 -1
  135. metadata/generated/schema/api/teams/createTeam.py +1 -1
  136. metadata/generated/schema/api/teams/createUser.py +1 -1
  137. metadata/generated/schema/api/tests/__init__.py +1 -1
  138. metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
  139. metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
  140. metadata/generated/schema/api/tests/createTestCase.py +1 -1
  141. metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
  142. metadata/generated/schema/api/tests/createTestCaseResult.py +1 -1
  143. metadata/generated/schema/api/tests/createTestDefinition.py +1 -1
  144. metadata/generated/schema/api/tests/createTestSuite.py +1 -1
  145. metadata/generated/schema/api/voteRequest.py +1 -1
  146. metadata/generated/schema/auth/__init__.py +1 -1
  147. metadata/generated/schema/auth/basicAuth.py +1 -1
  148. metadata/generated/schema/auth/basicLoginRequest.py +1 -1
  149. metadata/generated/schema/auth/changePasswordRequest.py +1 -1
  150. metadata/generated/schema/auth/createPersonalToken.py +1 -1
  151. metadata/generated/schema/auth/emailRequest.py +1 -1
  152. metadata/generated/schema/auth/emailVerificationToken.py +1 -1
  153. metadata/generated/schema/auth/generateToken.py +1 -1
  154. metadata/generated/schema/auth/jwtAuth.py +1 -1
  155. metadata/generated/schema/auth/loginRequest.py +1 -1
  156. metadata/generated/schema/auth/logoutRequest.py +1 -1
  157. metadata/generated/schema/auth/passwordResetRequest.py +1 -1
  158. metadata/generated/schema/auth/passwordResetToken.py +1 -1
  159. metadata/generated/schema/auth/personalAccessToken.py +1 -1
  160. metadata/generated/schema/auth/refreshToken.py +1 -1
  161. metadata/generated/schema/auth/registrationRequest.py +1 -1
  162. metadata/generated/schema/auth/revokePersonalToken.py +1 -1
  163. metadata/generated/schema/auth/revokeToken.py +1 -1
  164. metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
  165. metadata/generated/schema/auth/ssoAuth.py +1 -1
  166. metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
  167. metadata/generated/schema/configuration/__init__.py +1 -1
  168. metadata/generated/schema/configuration/appsPrivateConfiguration.py +1 -1
  169. metadata/generated/schema/configuration/assetCertificationSettings.py +1 -1
  170. metadata/generated/schema/configuration/authConfig.py +1 -1
  171. metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
  172. metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
  173. metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
  174. metadata/generated/schema/configuration/dataQualityConfiguration.py +1 -1
  175. metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
  176. metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
  177. metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
  178. metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
  179. metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
  180. metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
  181. metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
  182. metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
  183. metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
  184. metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
  185. metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
  186. metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
  187. metadata/generated/schema/configuration/limitsConfiguration.py +1 -1
  188. metadata/generated/schema/configuration/lineageSettings.py +1 -1
  189. metadata/generated/schema/configuration/loginConfiguration.py +1 -1
  190. metadata/generated/schema/configuration/logoConfiguration.py +1 -1
  191. metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
  192. metadata/generated/schema/configuration/profilerConfiguration.py +1 -1
  193. metadata/generated/schema/configuration/searchSettings.py +1 -1
  194. metadata/generated/schema/configuration/slackAppConfiguration.py +1 -14
  195. metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
  196. metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
  197. metadata/generated/schema/configuration/themeConfiguration.py +1 -1
  198. metadata/generated/schema/configuration/uiThemePreference.py +1 -1
  199. metadata/generated/schema/dataInsight/__init__.py +1 -1
  200. metadata/generated/schema/dataInsight/custom/__init__.py +1 -1
  201. metadata/generated/schema/dataInsight/custom/dataInsightCustomChart.py +1 -1
  202. metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResult.py +1 -1
  203. metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResultList.py +1 -1
  204. metadata/generated/schema/dataInsight/custom/formulaHolder.py +1 -1
  205. metadata/generated/schema/dataInsight/custom/lineChart.py +1 -1
  206. metadata/generated/schema/dataInsight/custom/summaryCard.py +1 -1
  207. metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
  208. metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
  209. metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
  210. metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
  211. metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
  212. metadata/generated/schema/dataInsight/type/__init__.py +1 -1
  213. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
  214. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
  215. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
  216. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
  217. metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
  218. metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
  219. metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
  220. metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
  221. metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
  222. metadata/generated/schema/email/__init__.py +1 -1
  223. metadata/generated/schema/email/emailRequest.py +1 -1
  224. metadata/generated/schema/email/emailTemplate.py +1 -1
  225. metadata/generated/schema/email/emailTemplatePlaceholder.py +1 -1
  226. metadata/generated/schema/email/smtpSettings.py +1 -1
  227. metadata/generated/schema/email/templateValidationReponse.py +1 -1
  228. metadata/generated/schema/entity/__init__.py +1 -1
  229. metadata/generated/schema/entity/applications/__init__.py +1 -1
  230. metadata/generated/schema/entity/applications/app.py +2 -1
  231. metadata/generated/schema/entity/applications/appExtension.py +1 -1
  232. metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
  233. metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
  234. metadata/generated/schema/entity/applications/configuration/applicationConfig.py +4 -2
  235. metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
  236. metadata/generated/schema/entity/applications/configuration/external/automator/__init__.py +1 -1
  237. metadata/generated/schema/entity/applications/configuration/external/automator/addDescriptionAction.py +1 -1
  238. metadata/generated/schema/entity/applications/configuration/external/automator/addDomainAction.py +1 -1
  239. metadata/generated/schema/entity/applications/configuration/external/automator/addOwnerAction.py +1 -1
  240. metadata/generated/schema/entity/applications/configuration/external/automator/addTagsAction.py +1 -1
  241. metadata/generated/schema/entity/applications/configuration/external/automator/addTierAction.py +1 -1
  242. metadata/generated/schema/entity/applications/configuration/external/automator/lineagePropagationAction.py +1 -1
  243. metadata/generated/schema/entity/applications/configuration/external/automator/mlTaggingAction.py +1 -1
  244. metadata/generated/schema/entity/applications/configuration/external/automator/removeDescriptionAction.py +1 -1
  245. metadata/generated/schema/entity/applications/configuration/external/automator/removeDomainAction.py +1 -1
  246. metadata/generated/schema/entity/applications/configuration/external/automator/removeOwnerAction.py +1 -1
  247. metadata/generated/schema/entity/applications/configuration/external/automator/removeTagsAction.py +1 -1
  248. metadata/generated/schema/entity/applications/configuration/external/automator/removeTierAction.py +1 -1
  249. metadata/generated/schema/entity/applications/configuration/external/automatorAppConfig.py +1 -1
  250. metadata/generated/schema/entity/applications/configuration/external/collateAIAppConfig.py +1 -1
  251. metadata/generated/schema/entity/applications/configuration/external/slackAppTokenConfiguration.py +22 -0
  252. metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
  253. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +1 -1
  254. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
  255. metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +1 -1
  256. metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
  257. metadata/generated/schema/entity/applications/configuration/private/external/collateAIAppPrivateConfig.py +1 -1
  258. metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
  259. metadata/generated/schema/entity/applications/jobStatus.py +1 -1
  260. metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
  261. metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
  262. metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +1 -1
  263. metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +1 -1
  264. metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
  265. metadata/generated/schema/entity/automations/__init__.py +1 -1
  266. metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
  267. metadata/generated/schema/entity/automations/workflow.py +1 -1
  268. metadata/generated/schema/entity/bot.py +1 -1
  269. metadata/generated/schema/entity/classification/__init__.py +1 -1
  270. metadata/generated/schema/entity/classification/classification.py +1 -1
  271. metadata/generated/schema/entity/classification/tag.py +1 -1
  272. metadata/generated/schema/entity/data/__init__.py +1 -1
  273. metadata/generated/schema/entity/data/apiCollection.py +1 -1
  274. metadata/generated/schema/entity/data/apiEndpoint.py +1 -1
  275. metadata/generated/schema/entity/data/chart.py +1 -1
  276. metadata/generated/schema/entity/data/container.py +1 -1
  277. metadata/generated/schema/entity/data/dashboard.py +1 -1
  278. metadata/generated/schema/entity/data/dashboardDataModel.py +1 -1
  279. metadata/generated/schema/entity/data/database.py +1 -1
  280. metadata/generated/schema/entity/data/databaseSchema.py +1 -1
  281. metadata/generated/schema/entity/data/glossary.py +1 -1
  282. metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
  283. metadata/generated/schema/entity/data/metric.py +1 -1
  284. metadata/generated/schema/entity/data/mlmodel.py +1 -1
  285. metadata/generated/schema/entity/data/pipeline.py +1 -1
  286. metadata/generated/schema/entity/data/query.py +1 -1
  287. metadata/generated/schema/entity/data/report.py +1 -1
  288. metadata/generated/schema/entity/data/searchIndex.py +13 -1
  289. metadata/generated/schema/entity/data/storedProcedure.py +13 -1
  290. metadata/generated/schema/entity/data/table.py +1 -1
  291. metadata/generated/schema/entity/data/topic.py +1 -1
  292. metadata/generated/schema/entity/docStore/__init__.py +1 -1
  293. metadata/generated/schema/entity/docStore/document.py +1 -1
  294. metadata/generated/schema/entity/domains/__init__.py +1 -1
  295. metadata/generated/schema/entity/domains/dataProduct.py +1 -1
  296. metadata/generated/schema/entity/domains/domain.py +1 -1
  297. metadata/generated/schema/entity/events/__init__.py +1 -1
  298. metadata/generated/schema/entity/events/webhook.py +1 -1
  299. metadata/generated/schema/entity/feed/__init__.py +1 -1
  300. metadata/generated/schema/entity/feed/assets.py +1 -1
  301. metadata/generated/schema/entity/feed/customProperty.py +1 -1
  302. metadata/generated/schema/entity/feed/description.py +1 -1
  303. metadata/generated/schema/entity/feed/domain.py +1 -1
  304. metadata/generated/schema/entity/feed/entityInfo.py +1 -1
  305. metadata/generated/schema/entity/feed/owner.py +1 -1
  306. metadata/generated/schema/entity/feed/suggestion.py +1 -1
  307. metadata/generated/schema/entity/feed/tag.py +1 -1
  308. metadata/generated/schema/entity/feed/testCaseResult.py +1 -1
  309. metadata/generated/schema/entity/feed/thread.py +1 -1
  310. metadata/generated/schema/entity/policies/__init__.py +1 -1
  311. metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
  312. metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
  313. metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
  314. metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
  315. metadata/generated/schema/entity/policies/filters.py +1 -1
  316. metadata/generated/schema/entity/policies/policy.py +1 -1
  317. metadata/generated/schema/entity/services/__init__.py +1 -1
  318. metadata/generated/schema/entity/services/apiService.py +1 -1
  319. metadata/generated/schema/entity/services/connections/__init__.py +1 -1
  320. metadata/generated/schema/entity/services/connections/api/__init__.py +1 -1
  321. metadata/generated/schema/entity/services/connections/api/restConnection.py +1 -1
  322. metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
  323. metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
  324. metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
  325. metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
  326. metadata/generated/schema/entity/services/connections/connectionBasicType.py +1 -1
  327. metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
  328. metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
  329. metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
  330. metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
  331. metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
  332. metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
  333. metadata/generated/schema/entity/services/connections/dashboard/microStrategyConnection.py +67 -0
  334. metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
  335. metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
  336. metadata/generated/schema/entity/services/connections/dashboard/powerBIReportServerConnection.py +1 -1
  337. metadata/generated/schema/entity/services/connections/dashboard/powerbi/__init__.py +1 -1
  338. metadata/generated/schema/entity/services/connections/dashboard/powerbi/azureConfig.py +1 -1
  339. metadata/generated/schema/entity/services/connections/dashboard/powerbi/bucketDetails.py +1 -1
  340. metadata/generated/schema/entity/services/connections/dashboard/powerbi/gcsConfig.py +1 -1
  341. metadata/generated/schema/entity/services/connections/dashboard/powerbi/s3Config.py +1 -1
  342. metadata/generated/schema/entity/services/connections/dashboard/qlikCloudConnection.py +1 -1
  343. metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
  344. metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
  345. metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
  346. metadata/generated/schema/entity/services/connections/dashboard/sigmaConnection.py +1 -1
  347. metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
  348. metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +1 -1
  349. metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
  350. metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
  351. metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +1 -1
  352. metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
  353. metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +1 -1
  354. metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
  355. metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
  356. metadata/generated/schema/entity/services/connections/database/common/azureConfig.py +1 -1
  357. metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
  358. metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
  359. metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
  360. metadata/generated/schema/entity/services/connections/database/common/noConfigAuthenticationTypes.py +1 -1
  361. metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
  362. metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
  363. metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
  364. metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
  365. metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
  366. metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
  367. metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
  368. metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
  369. metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
  370. metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
  371. metadata/generated/schema/entity/services/connections/database/deltalake/__init__.py +1 -1
  372. metadata/generated/schema/entity/services/connections/database/deltalake/metastoreConfig.py +1 -1
  373. metadata/generated/schema/entity/services/connections/database/deltalake/storageConfig.py +1 -1
  374. metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
  375. metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
  376. metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
  377. metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
  378. metadata/generated/schema/entity/services/connections/database/exasolConnection.py +1 -1
  379. metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
  380. metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
  381. metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
  382. metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
  383. metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
  384. metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
  385. metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
  386. metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
  387. metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
  388. metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
  389. metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
  390. metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
  391. metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
  392. metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +6 -2
  393. metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
  394. metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +6 -3
  395. metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
  396. metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +1 -1
  397. metadata/generated/schema/entity/services/connections/database/postgresConnection.py +1 -1
  398. metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
  399. metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
  400. metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
  401. metadata/generated/schema/entity/services/connections/database/sapErpConnection.py +1 -1
  402. metadata/generated/schema/entity/services/connections/database/sapHana/__init__.py +1 -1
  403. metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaHDBConnection.py +1 -1
  404. metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaSQLConnection.py +1 -1
  405. metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +1 -1
  406. metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
  407. metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
  408. metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
  409. metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
  410. metadata/generated/schema/entity/services/connections/database/synapseConnection.py +1 -1
  411. metadata/generated/schema/entity/services/connections/database/teradataConnection.py +1 -1
  412. metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
  413. metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
  414. metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
  415. metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
  416. metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
  417. metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +1 -1
  418. metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
  419. metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
  420. metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +1 -1
  421. metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
  422. metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
  423. metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
  424. metadata/generated/schema/entity/services/connections/metadata/alationSinkConnection.py +1 -1
  425. metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
  426. metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
  427. metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +1 -1
  428. metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
  429. metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
  430. metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
  431. metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
  432. metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
  433. metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
  434. metadata/generated/schema/entity/services/connections/mlmodel/vertexaiConnection.py +1 -1
  435. metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
  436. metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
  437. metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
  438. metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
  439. metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
  440. metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
  441. metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
  442. metadata/generated/schema/entity/services/connections/pipeline/datafactoryConnection.py +1 -1
  443. metadata/generated/schema/entity/services/connections/pipeline/dbtCloudConnection.py +17 -5
  444. metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
  445. metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
  446. metadata/generated/schema/entity/services/connections/pipeline/flinkConnection.py +1 -1
  447. metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
  448. metadata/generated/schema/entity/services/connections/pipeline/kafkaConnectConnection.py +1 -1
  449. metadata/generated/schema/entity/services/connections/pipeline/matillionConnection.py +1 -1
  450. metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
  451. metadata/generated/schema/entity/services/connections/pipeline/openLineageConnection.py +1 -1
  452. metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
  453. metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
  454. metadata/generated/schema/entity/services/connections/pipeline/stitchConnection.py +1 -1
  455. metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
  456. metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
  457. metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
  458. metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
  459. metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
  460. metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
  461. metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
  462. metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
  463. metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
  464. metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
  465. metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
  466. metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
  467. metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
  468. metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
  469. metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
  470. metadata/generated/schema/entity/services/dashboardService.py +4 -4
  471. metadata/generated/schema/entity/services/databaseService.py +1 -1
  472. metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
  473. metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +2 -1
  474. metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
  475. metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
  476. metadata/generated/schema/entity/services/messagingService.py +1 -1
  477. metadata/generated/schema/entity/services/metadataService.py +1 -1
  478. metadata/generated/schema/entity/services/mlmodelService.py +1 -1
  479. metadata/generated/schema/entity/services/pipelineService.py +1 -1
  480. metadata/generated/schema/entity/services/searchService.py +1 -1
  481. metadata/generated/schema/entity/services/serviceType.py +1 -1
  482. metadata/generated/schema/entity/services/storageService.py +1 -1
  483. metadata/generated/schema/entity/teams/__init__.py +1 -1
  484. metadata/generated/schema/entity/teams/persona.py +1 -1
  485. metadata/generated/schema/entity/teams/role.py +1 -1
  486. metadata/generated/schema/entity/teams/team.py +1 -1
  487. metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
  488. metadata/generated/schema/entity/teams/user.py +1 -1
  489. metadata/generated/schema/entity/type.py +1 -1
  490. metadata/generated/schema/entity/utils/__init__.py +1 -1
  491. metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
  492. metadata/generated/schema/entity/utils/servicesCount.py +1 -1
  493. metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
  494. metadata/generated/schema/events/__init__.py +1 -1
  495. metadata/generated/schema/events/alertMetrics.py +1 -1
  496. metadata/generated/schema/events/api/__init__.py +1 -1
  497. metadata/generated/schema/events/api/createEventSubscription.py +1 -1
  498. metadata/generated/schema/events/api/eventSubscriptionDiagnosticInfo.py +1 -1
  499. metadata/generated/schema/events/api/eventsRecord.py +1 -1
  500. metadata/generated/schema/events/api/testEventSubscriptionDestination.py +1 -1
  501. metadata/generated/schema/events/api/typedEvent.py +1 -1
  502. metadata/generated/schema/events/emailAlertConfig.py +1 -1
  503. metadata/generated/schema/events/eventFilterRule.py +1 -1
  504. metadata/generated/schema/events/eventSubscription.py +1 -1
  505. metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
  506. metadata/generated/schema/events/failedEvent.py +1 -1
  507. metadata/generated/schema/events/failedEventResponse.py +1 -1
  508. metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
  509. metadata/generated/schema/events/statusContext.py +1 -1
  510. metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
  511. metadata/generated/schema/events/subscriptionStatus.py +1 -1
  512. metadata/generated/schema/events/testDestinationStatus.py +1 -1
  513. metadata/generated/schema/governance/workflows/__init__.py +1 -1
  514. metadata/generated/schema/governance/workflows/elements/__init__.py +1 -1
  515. metadata/generated/schema/governance/workflows/elements/edge.py +1 -1
  516. metadata/generated/schema/governance/workflows/elements/nodeSubType.py +1 -1
  517. metadata/generated/schema/governance/workflows/elements/nodeType.py +1 -1
  518. metadata/generated/schema/governance/workflows/elements/nodes/__init__.py +1 -1
  519. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/__init__.py +1 -1
  520. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/checkEntityAttributesTask.py +1 -1
  521. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setEntityCertificationTask.py +1 -1
  522. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setGlossaryTermStatusTask.py +1 -1
  523. metadata/generated/schema/governance/workflows/elements/nodes/endEvent/__init__.py +1 -1
  524. metadata/generated/schema/governance/workflows/elements/nodes/endEvent/endEvent.py +1 -1
  525. metadata/generated/schema/governance/workflows/elements/nodes/startEvent/__init__.py +1 -1
  526. metadata/generated/schema/governance/workflows/elements/nodes/startEvent/startEvent.py +1 -1
  527. metadata/generated/schema/governance/workflows/elements/nodes/userTask/__init__.py +1 -1
  528. metadata/generated/schema/governance/workflows/elements/nodes/userTask/userApprovalTask.py +1 -1
  529. metadata/generated/schema/governance/workflows/elements/triggers/__init__.py +1 -1
  530. metadata/generated/schema/governance/workflows/elements/triggers/eventBasedEntityTrigger.py +1 -1
  531. metadata/generated/schema/governance/workflows/elements/triggers/periodicBatchEntityTrigger.py +2 -2
  532. metadata/generated/schema/governance/workflows/workflowDefinition.py +1 -1
  533. metadata/generated/schema/governance/workflows/workflowInstance.py +8 -1
  534. metadata/generated/schema/governance/workflows/workflowInstanceState.py +8 -1
  535. metadata/generated/schema/metadataIngestion/__init__.py +1 -1
  536. metadata/generated/schema/metadataIngestion/apiServiceMetadataPipeline.py +1 -1
  537. metadata/generated/schema/metadataIngestion/application.py +1 -1
  538. metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
  539. metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
  540. metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
  541. metadata/generated/schema/metadataIngestion/databaseServiceAutoClassificationPipeline.py +127 -0
  542. metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
  543. metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -25
  544. metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
  545. metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
  546. metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
  547. metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
  548. metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +1 -1
  549. metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
  550. metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
  551. metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +1 -1
  552. metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +9 -1
  553. metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +9 -1
  554. metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +1 -1
  555. metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
  556. metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
  557. metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
  558. metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
  559. metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +9 -1
  560. metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
  561. metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
  562. metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
  563. metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
  564. metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
  565. metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
  566. metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
  567. metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
  568. metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
  569. metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
  570. metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
  571. metadata/generated/schema/metadataIngestion/workflow.py +3 -1
  572. metadata/generated/schema/monitoring/__init__.py +1 -1
  573. metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
  574. metadata/generated/schema/security/__init__.py +1 -1
  575. metadata/generated/schema/security/client/__init__.py +1 -1
  576. metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
  577. metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
  578. metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
  579. metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
  580. metadata/generated/schema/security/client/oidcClientConfig.py +1 -1
  581. metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
  582. metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
  583. metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
  584. metadata/generated/schema/security/credentials/__init__.py +1 -1
  585. metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
  586. metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
  587. metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
  588. metadata/generated/schema/security/credentials/azureCredentials.py +1 -1
  589. metadata/generated/schema/security/credentials/basicAuth.py +1 -1
  590. metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
  591. metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
  592. metadata/generated/schema/security/credentials/gcpExternalAccount.py +1 -1
  593. metadata/generated/schema/security/credentials/gcpValues.py +1 -1
  594. metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
  595. metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
  596. metadata/generated/schema/security/credentials/gitlabCredentials.py +1 -1
  597. metadata/generated/schema/security/sasl/__init__.py +1 -1
  598. metadata/generated/schema/security/sasl/saslClientConfig.py +1 -1
  599. metadata/generated/schema/security/secrets/__init__.py +1 -1
  600. metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
  601. metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
  602. metadata/generated/schema/security/secrets/secretsManagerProvider.py +1 -1
  603. metadata/generated/schema/security/securityConfiguration.py +1 -1
  604. metadata/generated/schema/security/ssl/__init__.py +1 -1
  605. metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
  606. metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
  607. metadata/generated/schema/settings/__init__.py +1 -1
  608. metadata/generated/schema/settings/settings.py +1 -1
  609. metadata/generated/schema/system/__init__.py +1 -1
  610. metadata/generated/schema/system/entityError.py +1 -1
  611. metadata/generated/schema/system/eventPublisherJob.py +2 -1
  612. metadata/generated/schema/system/indexingError.py +1 -1
  613. metadata/generated/schema/system/limitsResponse.py +1 -1
  614. metadata/generated/schema/system/ui/__init__.py +1 -1
  615. metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
  616. metadata/generated/schema/system/ui/page.py +1 -1
  617. metadata/generated/schema/system/validationResponse.py +1 -1
  618. metadata/generated/schema/tests/__init__.py +1 -1
  619. metadata/generated/schema/tests/assigned.py +1 -1
  620. metadata/generated/schema/tests/basic.py +1 -1
  621. metadata/generated/schema/tests/customMetric.py +1 -1
  622. metadata/generated/schema/tests/dataQualityReport.py +1 -1
  623. metadata/generated/schema/tests/resolved.py +1 -1
  624. metadata/generated/schema/tests/testCase.py +1 -1
  625. metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
  626. metadata/generated/schema/tests/testDefinition.py +1 -1
  627. metadata/generated/schema/tests/testSuite.py +1 -1
  628. metadata/generated/schema/type/__init__.py +1 -1
  629. metadata/generated/schema/type/apiSchema.py +1 -1
  630. metadata/generated/schema/type/assetCertification.py +1 -1
  631. metadata/generated/schema/type/auditLog.py +1 -1
  632. metadata/generated/schema/type/basic.py +1 -1
  633. metadata/generated/schema/type/bulkOperationResult.py +1 -1
  634. metadata/generated/schema/type/changeEvent.py +1 -1
  635. metadata/generated/schema/type/changeEventType.py +1 -1
  636. metadata/generated/schema/type/collectionDescriptor.py +1 -1
  637. metadata/generated/schema/type/csvDocumentation.py +1 -1
  638. metadata/generated/schema/type/csvErrorType.py +1 -1
  639. metadata/generated/schema/type/csvFile.py +1 -1
  640. metadata/generated/schema/type/csvImportResult.py +1 -1
  641. metadata/generated/schema/type/customProperties/__init__.py +1 -1
  642. metadata/generated/schema/type/customProperties/complexTypes.py +1 -1
  643. metadata/generated/schema/type/customProperties/enumConfig.py +1 -1
  644. metadata/generated/schema/type/customProperties/tableConfig.py +1 -1
  645. metadata/generated/schema/type/customProperty.py +1 -1
  646. metadata/generated/schema/type/dailyCount.py +1 -1
  647. metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
  648. metadata/generated/schema/type/entityHierarchy.py +1 -1
  649. metadata/generated/schema/type/entityHistory.py +1 -1
  650. metadata/generated/schema/type/entityLineage.py +1 -1
  651. metadata/generated/schema/type/entityReference.py +1 -1
  652. metadata/generated/schema/type/entityReferenceList.py +1 -1
  653. metadata/generated/schema/type/entityRelationship.py +1 -1
  654. metadata/generated/schema/type/entityUsage.py +1 -1
  655. metadata/generated/schema/type/filterPattern.py +1 -1
  656. metadata/generated/schema/type/function.py +1 -1
  657. metadata/generated/schema/type/include.py +1 -1
  658. metadata/generated/schema/type/jdbcConnection.py +1 -1
  659. metadata/generated/schema/type/lifeCycle.py +1 -1
  660. metadata/generated/schema/type/paging.py +1 -1
  661. metadata/generated/schema/type/profile.py +1 -1
  662. metadata/generated/schema/type/queryParserData.py +1 -1
  663. metadata/generated/schema/type/reaction.py +1 -1
  664. metadata/generated/schema/type/schedule.py +1 -1
  665. metadata/generated/schema/type/schema.py +1 -1
  666. metadata/generated/schema/type/tableQuery.py +1 -1
  667. metadata/generated/schema/type/tableUsageCount.py +1 -1
  668. metadata/generated/schema/type/tagLabel.py +1 -1
  669. metadata/generated/schema/type/usageDetails.py +1 -1
  670. metadata/generated/schema/type/usageRequest.py +1 -1
  671. metadata/generated/schema/type/votes.py +1 -1
  672. metadata/ingestion/api/models.py +9 -5
  673. metadata/ingestion/models/patch_request.py +1 -0
  674. metadata/ingestion/ometa/client.py +13 -8
  675. metadata/ingestion/ometa/models.py +1 -0
  676. metadata/ingestion/ometa/ometa_api.py +6 -3
  677. metadata/ingestion/sink/metadata_rest.py +23 -18
  678. metadata/ingestion/source/dashboard/microstrategy/client.py +252 -0
  679. metadata/ingestion/source/dashboard/{mstr → microstrategy}/connection.py +7 -10
  680. metadata/ingestion/source/dashboard/{mstr → microstrategy}/metadata.py +74 -51
  681. metadata/ingestion/source/dashboard/{mstr → microstrategy}/models.py +9 -3
  682. metadata/ingestion/source/dashboard/microstrategy/service_spec.py +6 -0
  683. metadata/ingestion/source/dashboard/powerbi/client.py +1 -2
  684. metadata/ingestion/source/dashboard/superset/db_source.py +1 -1
  685. metadata/ingestion/source/database/athena/metadata.py +8 -0
  686. metadata/ingestion/source/database/azuresql/service_spec.py +2 -0
  687. metadata/ingestion/source/database/bigquery/profiler/profiler.py +1 -1
  688. metadata/ingestion/source/database/bigquery/service_spec.py +2 -0
  689. metadata/ingestion/source/database/common_nosql_source.py +7 -0
  690. metadata/ingestion/source/database/databricks/metadata.py +1 -3
  691. metadata/ingestion/source/database/databricks/service_spec.py +4 -0
  692. metadata/ingestion/source/database/datalake/clients/azure_blob.py +0 -3
  693. metadata/ingestion/source/database/datalake/clients/base.py +10 -1
  694. metadata/ingestion/source/database/datalake/clients/gcs.py +4 -2
  695. metadata/ingestion/source/database/datalake/clients/s3.py +0 -3
  696. metadata/ingestion/source/database/datalake/service_spec.py +8 -1
  697. metadata/ingestion/source/database/dbt/constants.py +4 -0
  698. metadata/ingestion/source/database/dbt/dbt_config.py +19 -1
  699. metadata/ingestion/source/database/dbt/dbt_service.py +8 -1
  700. metadata/ingestion/source/database/dbt/dbt_utils.py +29 -0
  701. metadata/ingestion/source/database/dbt/metadata.py +70 -1
  702. metadata/ingestion/source/database/dbt/models.py +2 -0
  703. metadata/ingestion/source/database/dynamodb/service_spec.py +4 -1
  704. metadata/ingestion/source/database/mongodb/service_spec.py +4 -1
  705. metadata/ingestion/source/database/mysql/connection.py +5 -0
  706. metadata/ingestion/source/database/mysql/lineage.py +12 -30
  707. metadata/ingestion/source/database/mysql/queries.py +43 -0
  708. metadata/ingestion/source/database/mysql/query_parser.py +46 -0
  709. metadata/ingestion/source/database/mysql/service_spec.py +7 -1
  710. metadata/{data_quality/builders/pandas_validator_builder.py → ingestion/source/database/mysql/usage.py} +9 -14
  711. metadata/ingestion/source/database/sample_data.py +4 -12
  712. metadata/ingestion/source/database/snowflake/connection.py +1 -1
  713. metadata/ingestion/source/database/snowflake/service_spec.py +6 -0
  714. metadata/ingestion/source/database/trino/service_spec.py +2 -0
  715. metadata/ingestion/source/database/unitycatalog/service_spec.py +4 -0
  716. metadata/ingestion/source/pipeline/airflow/connection.py +45 -2
  717. metadata/ingestion/source/pipeline/airflow/metadata.py +35 -25
  718. metadata/ingestion/source/pipeline/dbtcloud/client.py +67 -28
  719. metadata/ingestion/source/pipeline/dbtcloud/connection.py +1 -3
  720. metadata/ingestion/source/pipeline/dbtcloud/models.py +1 -1
  721. metadata/ingestion/source/pipeline/kafkaconnect/client.py +1 -1
  722. metadata/ingestion/source/search/elasticsearch/metadata.py +53 -0
  723. metadata/ingestion/source/search/search_service.py +44 -1
  724. metadata/mixins/pandas/pandas_mixin.py +2 -31
  725. metadata/mixins/sqalchemy/sqa_mixin.py +16 -16
  726. metadata/pii/processor.py +10 -9
  727. metadata/profiler/api/models.py +3 -79
  728. metadata/profiler/config.py +39 -0
  729. metadata/profiler/interface/nosql/profiler_interface.py +1 -26
  730. metadata/profiler/interface/pandas/profiler_interface.py +37 -77
  731. metadata/profiler/interface/profiler_interface.py +10 -282
  732. metadata/profiler/interface/sqlalchemy/bigquery/profiler_interface.py +0 -19
  733. metadata/profiler/interface/sqlalchemy/databricks/profiler_interface.py +2 -17
  734. metadata/profiler/interface/sqlalchemy/db2/profiler_interface.py +1 -1
  735. metadata/profiler/interface/sqlalchemy/mariadb/profiler_interface.py +2 -2
  736. metadata/profiler/interface/sqlalchemy/profiler_interface.py +46 -109
  737. metadata/profiler/interface/sqlalchemy/single_store/profiler_interface.py +2 -2
  738. metadata/profiler/interface/sqlalchemy/snowflake/profiler_interface.py +1 -1
  739. metadata/profiler/interface/sqlalchemy/stored_statistics_profiler.py +6 -8
  740. metadata/profiler/interface/sqlalchemy/trino/profiler_interface.py +2 -2
  741. metadata/profiler/orm/converter/base.py +21 -12
  742. metadata/profiler/orm/functions/table_metric_computer.py +5 -4
  743. metadata/profiler/processor/core.py +5 -58
  744. metadata/profiler/processor/handle_partition.py +0 -48
  745. metadata/profiler/processor/runner.py +111 -35
  746. metadata/profiler/processor/sample_data_handler.py +7 -4
  747. metadata/profiler/source/database/base/profiler_source.py +57 -138
  748. metadata/profiler/source/database/bigquery/profiler_source.py +3 -3
  749. metadata/profiler/source/database/databricks/profiler_source.py +2 -3
  750. metadata/profiler/source/fetcher/config.py +44 -0
  751. metadata/profiler/source/fetcher/fetcher_strategy.py +2 -4
  752. metadata/sampler/config.py +237 -0
  753. metadata/sampler/models.py +106 -0
  754. metadata/{profiler/processor/sampler → sampler}/nosql/sampler.py +44 -12
  755. metadata/sampler/pandas/sampler.py +239 -0
  756. metadata/{utils → sampler}/partition.py +66 -51
  757. metadata/sampler/processor.py +158 -0
  758. metadata/sampler/sampler_interface.py +251 -0
  759. metadata/{profiler/processor/sampler → sampler}/sqlalchemy/azuresql/sampler.py +1 -1
  760. metadata/{profiler/processor/sampler → sampler}/sqlalchemy/bigquery/sampler.py +55 -26
  761. metadata/sampler/sqlalchemy/postgres/sampler.py +91 -0
  762. metadata/{profiler/processor/sampler → sampler}/sqlalchemy/sampler.py +90 -98
  763. metadata/sampler/sqlalchemy/snowflake/sampler.py +95 -0
  764. metadata/{profiler/processor/sampler → sampler}/sqlalchemy/trino/sampler.py +5 -3
  765. metadata/utils/constants.py +63 -0
  766. metadata/utils/helpers.py +1 -2
  767. metadata/utils/logger.py +9 -0
  768. metadata/utils/profiler_utils.py +42 -0
  769. metadata/utils/service_spec/default.py +6 -0
  770. metadata/utils/service_spec/service_spec.py +47 -0
  771. metadata/utils/sqlalchemy_utils.py +1 -1
  772. metadata/utils/ssl_manager.py +39 -1
  773. metadata/workflow/classification.py +53 -0
  774. metadata/workflow/profiler.py +1 -17
  775. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/METADATA +366 -366
  776. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/RECORD +781 -771
  777. metadata/data_quality/builders/sqa_validator_builder.py +0 -25
  778. metadata/data_quality/interface/test_suite_interface_factory.py +0 -158
  779. metadata/data_quality/runner/test_suite_source_factory.py +0 -38
  780. metadata/generated/schema/entity/services/connections/dashboard/mstrConnection.py +0 -54
  781. metadata/ingestion/source/dashboard/mstr/client.py +0 -209
  782. metadata/ingestion/source/dashboard/mstr/service_spec.py +0 -4
  783. metadata/profiler/processor/sampler/pandas/sampler.py +0 -170
  784. metadata/profiler/processor/sampler/sampler_factory.py +0 -100
  785. metadata/profiler/processor/sampler/sampler_interface.py +0 -74
  786. metadata/profiler/processor/sampler/sqlalchemy/snowflake/sampler.py +0 -83
  787. /metadata/ingestion/source/dashboard/{mstr → microstrategy}/__init__.py +0 -0
  788. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/LICENSE +0 -0
  789. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/WHEEL +0 -0
  790. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/entry_points.txt +0 -0
  791. {openmetadata_ingestion-1.6.0.0rc1.dist-info → openmetadata_ingestion-1.6.0.0rc2.dist-info}/top_level.txt +0 -0
@@ -16,29 +16,24 @@ import traceback
16
16
  from typing import List, Optional, Union, cast
17
17
 
18
18
  from sqlalchemy import Column, inspect, text
19
- from sqlalchemy.orm import DeclarativeMeta, Query, aliased
19
+ from sqlalchemy.orm import DeclarativeMeta, Query
20
20
  from sqlalchemy.orm.util import AliasedClass
21
+ from sqlalchemy.schema import Table
21
22
  from sqlalchemy.sql.sqltypes import Enum
22
23
 
23
24
  from metadata.generated.schema.entity.data.table import (
24
- PartitionIntervalTypes,
25
25
  PartitionProfilerConfig,
26
26
  ProfileSampleType,
27
27
  TableData,
28
28
  )
29
+ from metadata.ingestion.connections.session import create_and_bind_thread_safe_session
30
+ from metadata.mixins.sqalchemy.sqa_mixin import SQAInterfaceMixin
29
31
  from metadata.profiler.orm.functions.modulo import ModuloFn
30
32
  from metadata.profiler.orm.functions.random_num import RandomNumFn
31
- from metadata.profiler.processor.handle_partition import partition_filter_handler
32
- from metadata.profiler.processor.sampler.sampler_interface import SamplerInterface
33
+ from metadata.profiler.processor.handle_partition import build_partition_predicate
34
+ from metadata.sampler.sampler_interface import SamplerInterface
33
35
  from metadata.utils.helpers import is_safe_sql_query
34
36
  from metadata.utils.logger import profiler_interface_registry_logger
35
- from metadata.utils.sqa_utils import (
36
- build_query_filter,
37
- dispatch_to_date_or_datetime,
38
- get_integer_range_filter,
39
- get_partition_col_type,
40
- get_value_filter,
41
- )
42
37
 
43
38
  logger = profiler_interface_registry_logger()
44
39
 
@@ -61,12 +56,37 @@ def _object_value_for_elem(self, elem):
61
56
  Enum._object_value_for_elem = _object_value_for_elem # pylint: disable=protected-access
62
57
 
63
58
 
64
- class SQASampler(SamplerInterface):
59
+ class SQASampler(SamplerInterface, SQAInterfaceMixin):
65
60
  """
66
61
  Generates a sample of the data to not
67
62
  run the query in the whole table.
63
+
64
+ Args:
65
+ orm_table (Optional[DeclarativeMeta]): ORM Table
68
66
  """
69
67
 
68
+ def __init__(self, *args, **kwargs):
69
+ super().__init__(*args, **kwargs)
70
+ self._table = self.build_table_orm(
71
+ self.entity, self.service_connection_config, self.ometa_client
72
+ )
73
+
74
+ @property
75
+ def raw_dataset(self):
76
+ return self._table
77
+
78
+ def get_client(self):
79
+ """Build the SQA Client"""
80
+ session_factory = create_and_bind_thread_safe_session(self.connection)
81
+ return session_factory()
82
+
83
+ def set_tablesample(self, selectable: Table):
84
+ """Set the tablesample for the table. To be implemented by the child SQA sampler class
85
+ Args:
86
+ selectable (Table): a selectable table
87
+ """
88
+ return selectable
89
+
70
90
  def _base_sample_query(self, column: Optional[Column], label=None):
71
91
  """Base query for sampling
72
92
 
@@ -77,54 +97,60 @@ class SQASampler(SamplerInterface):
77
97
  Returns:
78
98
  """
79
99
  # only sample the column if we are computing a column metric to limit the amount of data scaned
80
- entity = self.table if column is None else column
100
+ selectable = self.set_tablesample(self.raw_dataset.__table__)
101
+
102
+ entity = selectable if column is None else selectable.c.get(column.key)
81
103
  if label is not None:
82
- return self.client.query(entity, label)
83
- return self.client.query(entity)
104
+ query = self.client.query(entity, label)
105
+ else:
106
+ query = self.client.query(entity)
107
+
108
+ if self.partition_details:
109
+ query = self.get_partitioned_query(query)
110
+ return query
84
111
 
85
- @partition_filter_handler(build_sample=True)
86
112
  def get_sample_query(self, *, column=None) -> Query:
87
113
  """get query for sample data"""
88
- if self.profile_sample_type == ProfileSampleType.PERCENTAGE:
114
+ if self.sample_config.profile_sample_type == ProfileSampleType.PERCENTAGE:
89
115
  rnd = self._base_sample_query(
90
116
  column,
91
117
  (ModuloFn(RandomNumFn(), 100)).label(RANDOM_LABEL),
92
- ).cte(f"{self.table.__tablename__}_rnd")
118
+ ).cte(f"{self.raw_dataset.__tablename__}_rnd")
93
119
  session_query = self.client.query(rnd)
94
- return session_query.where(rnd.c.random <= self.profile_sample).cte(
95
- f"{self.table.__tablename__}_sample"
96
- )
120
+ return session_query.where(
121
+ rnd.c.random <= self.sample_config.profile_sample
122
+ ).cte(f"{self.raw_dataset.__tablename__}_sample")
97
123
 
98
- table_query = self.client.query(self.table)
124
+ table_query = self.client.query(self.raw_dataset)
99
125
  session_query = self._base_sample_query(
100
126
  column,
101
127
  (ModuloFn(RandomNumFn(), table_query.count())).label(RANDOM_LABEL),
102
128
  )
103
129
  return (
104
130
  session_query.order_by(RANDOM_LABEL)
105
- .limit(self.profile_sample)
106
- .cte(f"{self.table.__tablename__}_rnd")
131
+ .limit(self.sample_config.profile_sample)
132
+ .cte(f"{self.raw_dataset.__tablename__}_rnd")
107
133
  )
108
134
 
109
- def random_sample(self, ccolumn=None) -> Union[DeclarativeMeta, AliasedClass]:
135
+ def get_dataset(self, column=None, **__) -> Union[DeclarativeMeta, AliasedClass]:
110
136
  """
111
137
  Either return a sampled CTE of table, or
112
138
  the full table if no sampling is required.
113
139
  """
114
- if self._profile_sample_query:
140
+ if self.sample_query:
115
141
  return self._rdn_sample_from_user_query()
116
142
 
117
- if not self.profile_sample or int(self.profile_sample) == 100:
118
- if self._partition_details:
119
- return self._partitioned_table()
120
-
121
- return self.table
143
+ if not self.sample_config.profile_sample or (
144
+ int(self.sample_config.profile_sample) == 100
145
+ and self.sample_config.profile_sample_type == ProfileSampleType.PERCENTAGE
146
+ ):
147
+ if self.partition_details:
148
+ partitioned = self._partitioned_table()
149
+ return partitioned.cte(f"{self.raw_dataset.__tablename__}_partitioned")
122
150
 
123
- # Add new RandomNumFn column
124
- sampled = self.get_sample_query(column=ccolumn)
151
+ return self.raw_dataset
125
152
 
126
- # Assign as an alias
127
- return aliased(self.table, sampled)
153
+ return self.get_sample_query(column=column)
128
154
 
129
155
  def fetch_sample_data(self, columns: Optional[List[Column]] = None) -> TableData:
130
156
  """
@@ -132,10 +158,10 @@ class SQASampler(SamplerInterface):
132
158
 
133
159
  Args:
134
160
  columns (Optional[List]): List of columns to fetch
135
- Retunrs:
161
+ Returns:
136
162
  TableData to be added to the Table Entity
137
163
  """
138
- if self._profile_sample_query:
164
+ if self.sample_query:
139
165
  return self._fetch_sample_data_from_user_query()
140
166
 
141
167
  # Add new RandomNumFn column
@@ -143,8 +169,8 @@ class SQASampler(SamplerInterface):
143
169
  if not columns:
144
170
  sqa_columns = [col for col in inspect(rnd).c if col.name != RANDOM_LABEL]
145
171
  else:
146
- # we can't directly use columns as it is bound to self.table and not the rnd table.
147
- # If we use it, it will result in a cross join between self.table and rnd table
172
+ # we can't directly use columns as it is bound to self.raw_dataset and not the rnd table.
173
+ # If we use it, it will result in a cross join between self.raw_dataset and rnd table
148
174
  names = [col.name for col in columns]
149
175
  sqa_columns = [
150
176
  col
@@ -164,9 +190,12 @@ class SQASampler(SamplerInterface):
164
190
  "Cannot fetch sample data with random sampling. Falling back to 100 rows."
165
191
  )
166
192
  logger.debug(traceback.format_exc())
167
- sqa_columns = list(inspect(self.table).c)
193
+ sqa_columns = list(inspect(self.raw_dataset).c)
168
194
  sqa_sample = (
169
- self.client.query(*sqa_columns).select_from(self.table).limit(100).all()
195
+ self.client.query(*sqa_columns)
196
+ .select_from(self.raw_dataset)
197
+ .limit(100)
198
+ .all()
170
199
  )
171
200
 
172
201
  return TableData(
@@ -176,12 +205,10 @@ class SQASampler(SamplerInterface):
176
205
 
177
206
  def _fetch_sample_data_from_user_query(self) -> TableData:
178
207
  """Returns a table data object using results from query execution"""
179
- if not is_safe_sql_query(self._profile_sample_query):
180
- raise RuntimeError(
181
- f"SQL expression is not safe\n\n{self._profile_sample_query}"
182
- )
208
+ if not is_safe_sql_query(self.sample_query):
209
+ raise RuntimeError(f"SQL expression is not safe\n\n{self.sample_query}")
183
210
 
184
- rnd = self.client.execute(f"{self._profile_sample_query}")
211
+ rnd = self.client.execute(f"{self.sample_query}")
185
212
  try:
186
213
  columns = [col.name for col in rnd.cursor.description]
187
214
  except AttributeError:
@@ -193,65 +220,30 @@ class SQASampler(SamplerInterface):
193
220
 
194
221
  def _rdn_sample_from_user_query(self) -> Query:
195
222
  """Returns sql alchemy object to use when running profiling"""
196
- if not is_safe_sql_query(self._profile_sample_query):
197
- raise RuntimeError(
198
- f"SQL expression is not safe\n\n{self._profile_sample_query}"
199
- )
223
+ if not is_safe_sql_query(self.sample_query):
224
+ raise RuntimeError(f"SQL expression is not safe\n\n{self.sample_query}")
200
225
 
201
- return self.client.query(self.table).from_statement(
202
- text(f"{self._profile_sample_query}")
226
+ return self.client.query(self.raw_dataset).from_statement(
227
+ text(f"{self.sample_query}")
203
228
  )
204
229
 
205
230
  def _partitioned_table(self) -> Query:
206
231
  """Return the Query object for partitioned tables"""
207
- return aliased(self.get_partitioned_query().subquery())
232
+ return self.get_partitioned_query()
208
233
 
209
- def get_partitioned_query(self) -> Query:
234
+ def get_partitioned_query(self, query=None) -> Query:
210
235
  """Return the partitioned query"""
211
- self._partition_details = cast(
212
- PartitionProfilerConfig, self._partition_details
236
+ self.partition_details = cast(
237
+ PartitionProfilerConfig, self.partition_details
213
238
  ) # satisfying type checker
214
- partition_field = self._partition_details.partitionColumnName
215
-
216
- type_ = get_partition_col_type(
217
- partition_field,
218
- self.table.__table__.c,
239
+ partition_filter = build_partition_predicate(
240
+ self.partition_details,
241
+ self.raw_dataset.__table__.c,
219
242
  )
243
+ if query is not None:
244
+ return query.filter(partition_filter)
245
+ return self.client.query(self.raw_dataset).filter(partition_filter)
220
246
 
221
- if (
222
- self._partition_details.partitionIntervalType
223
- == PartitionIntervalTypes.COLUMN_VALUE
224
- ):
225
- return self.client.query(self.table).filter(
226
- get_value_filter(
227
- Column(partition_field),
228
- self._partition_details.partitionValues,
229
- )
230
- )
231
- if (
232
- self._partition_details.partitionIntervalType
233
- == PartitionIntervalTypes.INTEGER_RANGE
234
- ):
235
- return self.client.query(self.table).filter(
236
- get_integer_range_filter(
237
- Column(partition_field),
238
- self._partition_details.partitionIntegerRangeStart,
239
- self._partition_details.partitionIntegerRangeEnd,
240
- )
241
- )
242
- return self.client.query(self.table).filter(
243
- build_query_filter(
244
- [
245
- (
246
- Column(partition_field),
247
- "ge",
248
- dispatch_to_date_or_datetime(
249
- self._partition_details.partitionInterval,
250
- text(self._partition_details.partitionIntervalUnit.value),
251
- type_,
252
- ),
253
- )
254
- ],
255
- False,
256
- )
257
- )
247
+ def get_columns(self):
248
+ """get columns from entity"""
249
+ return list(inspect(self.raw_dataset).c)
@@ -0,0 +1,95 @@
1
+ # Copyright 2021 Collate
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # http://www.apache.org/licenses/LICENSE-2.0
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Helper module to handle data sampling
13
+ for the profiler
14
+ """
15
+
16
+ from typing import Dict, Optional, Union
17
+
18
+ from sqlalchemy import Table, func, text
19
+ from sqlalchemy.sql.selectable import CTE
20
+
21
+ from metadata.generated.schema.entity.data.table import (
22
+ ProfileSampleType,
23
+ SamplingMethodType,
24
+ )
25
+ from metadata.generated.schema.entity.services.connections.connectionBasicType import (
26
+ DataStorageConfig,
27
+ )
28
+ from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
29
+ DatalakeConnection,
30
+ )
31
+ from metadata.generated.schema.entity.services.databaseService import DatabaseConnection
32
+ from metadata.ingestion.ometa.ometa_api import OpenMetadata
33
+ from metadata.sampler.models import SampleConfig
34
+ from metadata.sampler.sqlalchemy.sampler import SQASampler
35
+ from metadata.utils.constants import SAMPLE_DATA_DEFAULT_COUNT
36
+
37
+
38
+ class SnowflakeSampler(SQASampler):
39
+ """
40
+ Generates a sample of the data to not
41
+ run the query in the whole table.
42
+ """
43
+
44
+ # pylint: disable=too-many-arguments
45
+ def __init__(
46
+ self,
47
+ service_connection_config: Union[DatabaseConnection, DatalakeConnection],
48
+ ometa_client: OpenMetadata,
49
+ entity: Table,
50
+ sample_config: Optional[SampleConfig] = None,
51
+ partition_details: Optional[Dict] = None,
52
+ sample_query: Optional[str] = None,
53
+ storage_config: DataStorageConfig = None,
54
+ sample_data_count: Optional[int] = SAMPLE_DATA_DEFAULT_COUNT,
55
+ **kwargs,
56
+ ):
57
+ super().__init__(
58
+ service_connection_config=service_connection_config,
59
+ ometa_client=ometa_client,
60
+ entity=entity,
61
+ sample_config=sample_config,
62
+ partition_details=partition_details,
63
+ sample_query=sample_query,
64
+ storage_config=storage_config,
65
+ sample_data_count=sample_data_count,
66
+ **kwargs,
67
+ )
68
+ self.sampling_method_type = func.bernoulli
69
+ if (
70
+ sample_config
71
+ and sample_config.sampling_method_type == SamplingMethodType.SYSTEM
72
+ ):
73
+ self.sampling_method_type = func.system
74
+
75
+ def set_tablesample(self, selectable: Table):
76
+ """Set the TABLESAMPLE clause for Snowflake
77
+ Args:
78
+ selectable (Table): _description_
79
+ """
80
+ if self.sample_config.profile_sample_type == ProfileSampleType.PERCENTAGE:
81
+ return selectable.tablesample(
82
+ self.sampling_method_type(self.sample_config.profile_sample or 100)
83
+ )
84
+
85
+ return selectable.tablesample(
86
+ func.ROW(text(f"{self.sample_config.profile_sample or 100} ROWS"))
87
+ )
88
+
89
+ def get_sample_query(self, *, column=None) -> CTE:
90
+ """get query for sample data"""
91
+ rnd = self._base_sample_query(column).cte(
92
+ f"{self.raw_dataset.__tablename__}_rnd"
93
+ )
94
+ query = self.client.query(rnd)
95
+ return query.cte(f"{self.raw_dataset.__tablename__}_sample")
@@ -16,7 +16,7 @@ from sqlalchemy import inspect, or_, text
16
16
 
17
17
  from metadata.profiler.orm.registry import FLOAT_SET
18
18
  from metadata.profiler.processor.handle_partition import RANDOM_LABEL
19
- from metadata.profiler.processor.sampler.sqlalchemy.sampler import SQASampler
19
+ from metadata.sampler.sqlalchemy.sampler import SQASampler
20
20
 
21
21
 
22
22
  class TrinoSampler(SQASampler):
@@ -34,8 +34,10 @@ class TrinoSampler(SQASampler):
34
34
  super().__init__(*args, **kwargs)
35
35
 
36
36
  def _base_sample_query(self, column, label=None):
37
- sqa_columns = [col for col in inspect(self.table).c if col.name != RANDOM_LABEL]
38
- entity = self.table if column is None else column
37
+ sqa_columns = [
38
+ col for col in inspect(self.raw_dataset).c if col.name != RANDOM_LABEL
39
+ ]
40
+ entity = self.raw_dataset if column is None else column
39
41
  return self.client.query(entity, label).where(
40
42
  or_(
41
43
  *[
@@ -27,6 +27,51 @@ from metadata.generated.schema.entity.data.topic import Topic
27
27
  from metadata.generated.schema.entity.domains.dataProduct import DataProduct
28
28
  from metadata.generated.schema.entity.domains.domain import Domain
29
29
  from metadata.generated.schema.entity.services.apiService import ApiService
30
+ from metadata.generated.schema.entity.services.connections.database.bigTableConnection import (
31
+ BigtableType,
32
+ )
33
+ from metadata.generated.schema.entity.services.connections.database.couchbaseConnection import (
34
+ CouchbaseType,
35
+ )
36
+ from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
37
+ DatalakeType,
38
+ )
39
+ from metadata.generated.schema.entity.services.connections.database.deltaLakeConnection import (
40
+ DeltaLakeType,
41
+ )
42
+ from metadata.generated.schema.entity.services.connections.database.domoDatabaseConnection import (
43
+ DomoDatabaseType,
44
+ )
45
+ from metadata.generated.schema.entity.services.connections.database.dorisConnection import (
46
+ DorisType,
47
+ )
48
+ from metadata.generated.schema.entity.services.connections.database.druidConnection import (
49
+ DruidType,
50
+ )
51
+ from metadata.generated.schema.entity.services.connections.database.dynamoDBConnection import (
52
+ DynamoDBType,
53
+ )
54
+ from metadata.generated.schema.entity.services.connections.database.glueConnection import (
55
+ GlueType,
56
+ )
57
+ from metadata.generated.schema.entity.services.connections.database.icebergConnection import (
58
+ IcebergType,
59
+ )
60
+ from metadata.generated.schema.entity.services.connections.database.mongoDBConnection import (
61
+ MongoDBType,
62
+ )
63
+ from metadata.generated.schema.entity.services.connections.database.salesforceConnection import (
64
+ SalesforceType,
65
+ )
66
+ from metadata.generated.schema.entity.services.connections.database.sapErpConnection import (
67
+ SapErpType,
68
+ )
69
+ from metadata.generated.schema.entity.services.connections.database.sasConnection import (
70
+ SasType,
71
+ )
72
+ from metadata.generated.schema.entity.services.connections.database.unityCatalogConnection import (
73
+ DatabricksType,
74
+ )
30
75
  from metadata.generated.schema.entity.services.dashboardService import DashboardService
31
76
  from metadata.generated.schema.entity.services.databaseService import DatabaseService
32
77
  from metadata.generated.schema.entity.services.messagingService import MessagingService
@@ -111,3 +156,21 @@ ENTITY_REFERENCE_TYPE_MAP = {
111
156
  }
112
157
 
113
158
  CUSTOM_CONNECTOR_PREFIX = "custom"
159
+
160
+ NON_SQA_DATABASE_CONNECTIONS = (
161
+ DatalakeType.Datalake.value,
162
+ BigtableType.BigTable.value,
163
+ CouchbaseType.Couchbase.value,
164
+ DatabricksType.UnityCatalog.value,
165
+ DeltaLakeType.DeltaLake.value,
166
+ DomoDatabaseType.DomoDatabase.value,
167
+ DorisType.Doris.value,
168
+ DruidType.Druid.value,
169
+ DynamoDBType.DynamoDB.value,
170
+ GlueType.Glue.value,
171
+ IcebergType.Iceberg.value,
172
+ MongoDBType.MongoDB.value,
173
+ SalesforceType.Salesforce.value,
174
+ SapErpType.SapErp.value,
175
+ SasType.SAS.value,
176
+ )
metadata/utils/helpers.py CHANGED
@@ -360,8 +360,7 @@ def clean_uri(uri: Union[str, Url]) -> str:
360
360
  make it http://localhost:9000
361
361
  """
362
362
  # force a string of the given Uri if needed
363
- if isinstance(uri, Url):
364
- uri = str(uri)
363
+ uri = str(uri)
365
364
  return uri[:-1] if uri.endswith("/") else uri
366
365
 
367
366
 
metadata/utils/logger.py CHANGED
@@ -52,6 +52,7 @@ class Loggers(Enum):
52
52
  OMETA = "OMetaAPI"
53
53
  CLI = "Metadata"
54
54
  PROFILER = "Profiler"
55
+ SAMPLER = "Sampler"
55
56
  PII = "PII"
56
57
  INGESTION = "Ingestion"
57
58
  UTILS = "Utils"
@@ -103,6 +104,14 @@ def profiler_logger():
103
104
  return logging.getLogger(Loggers.PROFILER.value)
104
105
 
105
106
 
107
+ def sampler_logger():
108
+ """
109
+ Method to get the SAMPLER logger
110
+ """
111
+
112
+ return logging.getLogger(Loggers.SAMPLER.value)
113
+
114
+
106
115
  def pii_logger():
107
116
  """
108
117
  Method to get the PROFILER logger
@@ -20,6 +20,11 @@ from typing import Optional, Tuple
20
20
  import sqlparse
21
21
  from pydantic import BaseModel
22
22
 
23
+ from metadata.generated.schema.entity.data.database import Database
24
+ from metadata.generated.schema.entity.data.databaseSchema import DatabaseSchema
25
+ from metadata.generated.schema.entity.data.table import Table
26
+ from metadata.generated.schema.entity.services.databaseService import DatabaseService
27
+ from metadata.ingestion.ometa.ometa_api import OpenMetadata
23
28
  from metadata.utils.logger import profiler_logger
24
29
 
25
30
  logger = profiler_logger()
@@ -100,3 +105,40 @@ def set_cache(cache: defaultdict, key: str, value):
100
105
  cache[key_] = value
101
106
  break
102
107
  cache = cache[key_]
108
+
109
+
110
+ def get_context_entities(
111
+ entity: Table, metadata: OpenMetadata
112
+ ) -> Tuple[DatabaseSchema, Database, DatabaseService]:
113
+ """Based on the table, get all the parent entities"""
114
+ schema_entity = None
115
+ database_entity = None
116
+ db_service = None
117
+
118
+ if entity.databaseSchema:
119
+ schema_entity_list = metadata.es_search_from_fqn(
120
+ entity_type=DatabaseSchema,
121
+ fqn_search_string=entity.databaseSchema.fullyQualifiedName,
122
+ fields="databaseSchemaProfilerConfig",
123
+ )
124
+ if schema_entity_list:
125
+ schema_entity = schema_entity_list[0]
126
+
127
+ if entity.database:
128
+ database_entity_list = metadata.es_search_from_fqn(
129
+ entity_type=Database,
130
+ fqn_search_string=entity.database.fullyQualifiedName,
131
+ fields="databaseProfilerConfig",
132
+ )
133
+ if database_entity_list:
134
+ database_entity = database_entity_list[0]
135
+
136
+ if entity.service:
137
+ db_service_list = metadata.es_search_from_fqn(
138
+ entity_type=DatabaseService,
139
+ fqn_search_string=entity.service.fullyQualifiedName,
140
+ )
141
+ if db_service_list:
142
+ db_service = db_service_list[0]
143
+
144
+ return schema_entity, database_entity, db_service
@@ -4,12 +4,18 @@ Default service specs for services.
4
4
 
5
5
  from typing import Optional
6
6
 
7
+ from metadata.data_quality.interface.sqlalchemy.sqa_test_suite_interface import (
8
+ SQATestSuiteInterface,
9
+ )
7
10
  from metadata.profiler.interface.sqlalchemy.profiler_interface import (
8
11
  SQAProfilerInterface,
9
12
  )
13
+ from metadata.sampler.sqlalchemy.sampler import SQASampler
10
14
  from metadata.utils.importer import get_class_path
11
15
  from metadata.utils.service_spec.service_spec import BaseSpec
12
16
 
13
17
 
14
18
  class DefaultDatabaseSpec(BaseSpec):
15
19
  profiler_class: Optional[str] = get_class_path(SQAProfilerInterface)
20
+ sampler_class: Optional[str] = get_class_path(SQASampler)
21
+ test_suite_class: Optional[str] = get_class_path(SQATestSuiteInterface)
@@ -6,11 +6,15 @@ from typing import Optional, Type, cast
6
6
 
7
7
  from pydantic import model_validator
8
8
 
9
+ from metadata.data_quality.interface.test_suite_interface import TestSuiteInterface
9
10
  from metadata.generated.schema.entity.services.serviceType import ServiceType
10
11
  from metadata.ingestion.api.steps import Source
11
12
  from metadata.ingestion.models.custom_pydantic import BaseModel
13
+ from metadata.profiler.interface.profiler_interface import ProfilerInterface
14
+ from metadata.sampler.sampler_interface import SamplerInterface
12
15
  from metadata.utils.importer import (
13
16
  TYPE_SEPARATOR,
17
+ DynamicImportException,
14
18
  get_class_path,
15
19
  get_module_dir,
16
20
  import_from_module,
@@ -43,9 +47,11 @@ class BaseSpec(BaseModel):
43
47
  """
44
48
 
45
49
  profiler_class: Optional[str] = None
50
+ test_suite_class: Optional[str] = None
46
51
  metadata_source_class: str
47
52
  lineage_source_class: Optional[str] = None
48
53
  usage_source_class: Optional[str] = None
54
+ sampler_class: Optional[str] = None
49
55
 
50
56
  @model_validator(mode="before")
51
57
  @classmethod
@@ -97,3 +103,44 @@ def import_source_class(
97
103
  Type[Source],
98
104
  import_from_module(spec.model_dump()[field]),
99
105
  )
106
+
107
+
108
+ def import_profiler_class(
109
+ service_type: ServiceType, source_type: str
110
+ ) -> Type[ProfilerInterface]:
111
+ class_path = BaseSpec.get_for_source(service_type, source_type).profiler_class
112
+ return cast(Type[ProfilerInterface], import_from_module(class_path))
113
+
114
+
115
+ def import_test_suite_class(
116
+ service_type: ServiceType,
117
+ source_type: str,
118
+ source_config_type: Optional[str] = None,
119
+ ) -> Type[TestSuiteInterface]:
120
+ try:
121
+ class_path = BaseSpec.get_for_source(service_type, source_type).test_suite_class
122
+ except DynamicImportException:
123
+ if source_config_type:
124
+ class_path = BaseSpec.get_for_source(
125
+ service_type, source_config_type.lower()
126
+ ).test_suite_class
127
+ else:
128
+ raise
129
+ return cast(Type[TestSuiteInterface], import_from_module(class_path))
130
+
131
+
132
+ def import_sampler_class(
133
+ service_type: ServiceType,
134
+ source_type: str,
135
+ source_config_type: Optional[str] = None,
136
+ ) -> Type[SamplerInterface]:
137
+ try:
138
+ class_path = BaseSpec.get_for_source(service_type, source_type).sampler_class
139
+ except DynamicImportException:
140
+ if source_config_type:
141
+ class_path = BaseSpec.get_for_source(
142
+ service_type, source_config_type.lower()
143
+ ).sampler_class
144
+ else:
145
+ raise
146
+ return cast(Type[SamplerInterface], import_from_module(class_path))
@@ -180,7 +180,7 @@ def get_table_comment_results(
180
180
  """
181
181
  self.table_comment_result: Dict[Tuple[str, str], str] = {}
182
182
  self.current_db: str = database
183
- result = connection.execute(query)
183
+ result = connection.execute(query).fetchall()
184
184
  self.table_comment_result[(table_name, schema)] = result
185
185
 
186
186