openmetadata-ingestion 1.2.5.2__py3-none-any.whl → 1.3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openmetadata-ingestion might be problematic. Click here for more details.

Files changed (826) hide show
  1. airflow_provider_openmetadata/lineage/runner.py +44 -5
  2. airflow_provider_openmetadata/lineage/status.py +1 -1
  3. metadata/antlr/split_listener.py +1 -4
  4. metadata/applications/auto_tagger.py +212 -0
  5. metadata/cli/app.py +47 -0
  6. metadata/cli/db_dump.py +6 -3
  7. metadata/clients/aws_client.py +4 -0
  8. metadata/clients/domo_client.py +0 -2
  9. metadata/cmd.py +24 -13
  10. metadata/data_insight/processor/reports/entity_report_data_processor.py +8 -2
  11. metadata/data_insight/processor/reports/web_analytic_report_data_processor.py +4 -0
  12. metadata/data_insight/source/metadata.py +11 -4
  13. metadata/data_quality/interface/sqlalchemy/databricks/test_suite_interface.py +29 -0
  14. metadata/data_quality/interface/sqlalchemy/snowflake/test_suite_interface.py +29 -0
  15. metadata/data_quality/interface/sqlalchemy/sqa_test_suite_interface.py +6 -6
  16. metadata/data_quality/interface/sqlalchemy/unity_catalog/test_suite_interface.py +35 -0
  17. metadata/data_quality/interface/test_suite_interface_factory.py +40 -0
  18. metadata/data_quality/processor/test_case_runner.py +17 -8
  19. metadata/data_quality/source/test_suite.py +8 -2
  20. metadata/data_quality/validations/base_test_handler.py +21 -17
  21. metadata/data_quality/validations/column/base/columnValueLengthsToBeBetween.py +38 -1
  22. metadata/data_quality/validations/column/base/columnValuesToBeBetween.py +36 -1
  23. metadata/data_quality/validations/column/base/columnValuesToBeInSet.py +27 -0
  24. metadata/data_quality/validations/column/base/columnValuesToBeNotInSet.py +27 -0
  25. metadata/data_quality/validations/column/base/columnValuesToBeNotNull.py +27 -0
  26. metadata/data_quality/validations/column/base/columnValuesToBeUnique.py +15 -0
  27. metadata/data_quality/validations/column/base/columnValuesToMatchRegex.py +27 -0
  28. metadata/data_quality/validations/column/base/columnValuesToNotMatchRegex.py +27 -0
  29. metadata/data_quality/validations/column/pandas/columnValueLengthsToBeBetween.py +23 -0
  30. metadata/data_quality/validations/column/pandas/columnValuesToBeBetween.py +23 -0
  31. metadata/data_quality/validations/column/pandas/columnValuesToBeInSet.py +11 -0
  32. metadata/data_quality/validations/column/pandas/columnValuesToBeNotInSet.py +11 -0
  33. metadata/data_quality/validations/column/pandas/columnValuesToBeNotNull.py +11 -0
  34. metadata/data_quality/validations/column/pandas/columnValuesToMatchRegex.py +11 -0
  35. metadata/data_quality/validations/column/pandas/columnValuesToNotMatchRegex.py +11 -0
  36. metadata/data_quality/validations/column/sqlalchemy/columnValueLengthsToBeBetween.py +27 -0
  37. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeBetween.py +23 -0
  38. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeInSet.py +11 -0
  39. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotInSet.py +11 -0
  40. metadata/data_quality/validations/column/sqlalchemy/columnValuesToBeNotNull.py +11 -0
  41. metadata/data_quality/validations/column/sqlalchemy/columnValuesToMatchRegex.py +11 -0
  42. metadata/data_quality/validations/column/sqlalchemy/columnValuesToNotMatchRegex.py +11 -0
  43. metadata/data_quality/validations/mixins/pandas_validator_mixin.py +13 -2
  44. metadata/data_quality/validations/mixins/sqa_validator_mixin.py +40 -0
  45. metadata/examples/workflows/databricks.yaml +0 -1
  46. metadata/examples/workflows/mongodb.yaml +4 -2
  47. metadata/examples/workflows/mstr.yaml +24 -0
  48. metadata/examples/workflows/sas.yaml +28 -0
  49. metadata/examples/workflows/unity_catalog.yaml +27 -0
  50. metadata/examples/workflows/unity_catalog_lineage.yaml +18 -0
  51. metadata/examples/workflows/unity_catalog_usage.yaml +35 -0
  52. metadata/generated/antlr/EntityLinkLexer.py +391 -292
  53. metadata/generated/antlr/EntityLinkListener.py +12 -12
  54. metadata/generated/antlr/EntityLinkParser.py +228 -103
  55. metadata/generated/schema/analytics/__init__.py +1 -1
  56. metadata/generated/schema/analytics/basic.py +1 -1
  57. metadata/generated/schema/analytics/reportData.py +1 -1
  58. metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
  59. metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
  60. metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
  61. metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
  62. metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
  63. metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
  64. metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
  65. metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
  66. metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
  67. metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
  68. metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
  69. metadata/generated/schema/api/__init__.py +1 -1
  70. metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
  71. metadata/generated/schema/api/analytics/__init__.py +1 -1
  72. metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
  73. metadata/generated/schema/api/automations/__init__.py +1 -1
  74. metadata/generated/schema/api/automations/createWorkflow.py +1 -1
  75. metadata/generated/schema/api/bulkAssets.py +1 -1
  76. metadata/generated/schema/api/classification/__init__.py +1 -1
  77. metadata/generated/schema/api/classification/createClassification.py +1 -1
  78. metadata/generated/schema/api/classification/createTag.py +1 -1
  79. metadata/generated/schema/api/classification/loadTags.py +1 -1
  80. metadata/generated/schema/api/createBot.py +2 -3
  81. metadata/generated/schema/api/createEventPublisherJob.py +1 -1
  82. metadata/generated/schema/api/createType.py +1 -1
  83. metadata/generated/schema/api/data/__init__.py +1 -1
  84. metadata/generated/schema/api/data/createChart.py +1 -1
  85. metadata/generated/schema/api/data/createContainer.py +2 -2
  86. metadata/generated/schema/api/data/createCustomProperty.py +2 -3
  87. metadata/generated/schema/api/data/createDashboard.py +1 -1
  88. metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
  89. metadata/generated/schema/api/data/createDatabase.py +2 -3
  90. metadata/generated/schema/api/data/createDatabaseSchema.py +2 -3
  91. metadata/generated/schema/api/data/createGlossary.py +2 -3
  92. metadata/generated/schema/api/data/createGlossaryTerm.py +2 -3
  93. metadata/generated/schema/api/data/createMlModel.py +1 -1
  94. metadata/generated/schema/api/data/createPipeline.py +2 -2
  95. metadata/generated/schema/api/data/createQuery.py +2 -2
  96. metadata/generated/schema/api/data/createSearchIndex.py +1 -1
  97. metadata/generated/schema/api/data/createStoredProcedure.py +2 -4
  98. metadata/generated/schema/api/data/createTable.py +2 -2
  99. metadata/generated/schema/api/data/createTableProfile.py +1 -1
  100. metadata/generated/schema/api/data/createTopic.py +1 -1
  101. metadata/generated/schema/api/data/loadGlossary.py +1 -1
  102. metadata/generated/schema/api/data/restoreEntity.py +1 -1
  103. metadata/generated/schema/api/dataInsight/__init__.py +1 -1
  104. metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
  105. metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
  106. metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
  107. metadata/generated/schema/api/docStore/__init__.py +1 -1
  108. metadata/generated/schema/api/docStore/createDocument.py +1 -1
  109. metadata/generated/schema/api/domains/__init__.py +1 -1
  110. metadata/generated/schema/api/domains/createDataProduct.py +1 -1
  111. metadata/generated/schema/api/domains/createDomain.py +1 -1
  112. metadata/generated/schema/api/feed/__init__.py +1 -1
  113. metadata/generated/schema/api/feed/closeTask.py +8 -1
  114. metadata/generated/schema/api/feed/createPost.py +1 -1
  115. metadata/generated/schema/api/feed/createSuggestion.py +30 -0
  116. metadata/generated/schema/api/feed/createThread.py +1 -1
  117. metadata/generated/schema/api/feed/resolveTask.py +12 -1
  118. metadata/generated/schema/api/feed/threadCount.py +18 -14
  119. metadata/generated/schema/api/lineage/__init__.py +1 -1
  120. metadata/generated/schema/api/lineage/addLineage.py +1 -1
  121. metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
  122. metadata/generated/schema/api/policies/__init__.py +1 -1
  123. metadata/generated/schema/api/policies/createPolicy.py +1 -1
  124. metadata/generated/schema/api/services/__init__.py +1 -1
  125. metadata/generated/schema/api/services/createDashboardService.py +1 -1
  126. metadata/generated/schema/api/services/createDatabaseService.py +1 -1
  127. metadata/generated/schema/api/services/createMessagingService.py +1 -1
  128. metadata/generated/schema/api/services/createMetadataService.py +1 -1
  129. metadata/generated/schema/api/services/createMlModelService.py +1 -1
  130. metadata/generated/schema/api/services/createPipelineService.py +1 -1
  131. metadata/generated/schema/api/services/createSearchService.py +1 -1
  132. metadata/generated/schema/api/services/createStorageService.py +1 -1
  133. metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
  134. metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +1 -1
  135. metadata/generated/schema/api/setOwner.py +1 -1
  136. metadata/generated/schema/api/teams/__init__.py +1 -1
  137. metadata/generated/schema/api/teams/createPersona.py +1 -1
  138. metadata/generated/schema/api/teams/createRole.py +2 -3
  139. metadata/generated/schema/api/teams/createTeam.py +1 -1
  140. metadata/generated/schema/api/teams/createUser.py +2 -2
  141. metadata/generated/schema/api/tests/__init__.py +1 -1
  142. metadata/generated/schema/api/tests/createCustomMetric.py +4 -2
  143. metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
  144. metadata/generated/schema/api/tests/createTestCase.py +4 -1
  145. metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +31 -0
  146. metadata/generated/schema/api/tests/createTestDefinition.py +1 -1
  147. metadata/generated/schema/api/tests/createTestSuite.py +1 -1
  148. metadata/generated/schema/api/voteRequest.py +1 -1
  149. metadata/generated/schema/auth/__init__.py +1 -1
  150. metadata/generated/schema/auth/basicAuth.py +1 -1
  151. metadata/generated/schema/auth/basicLoginRequest.py +1 -1
  152. metadata/generated/schema/auth/changePasswordRequest.py +1 -1
  153. metadata/generated/schema/auth/createPersonalToken.py +1 -1
  154. metadata/generated/schema/auth/emailRequest.py +1 -1
  155. metadata/generated/schema/auth/emailVerificationToken.py +1 -1
  156. metadata/generated/schema/auth/generateToken.py +1 -1
  157. metadata/generated/schema/auth/jwtAuth.py +1 -1
  158. metadata/generated/schema/auth/loginRequest.py +1 -1
  159. metadata/generated/schema/auth/logoutRequest.py +1 -1
  160. metadata/generated/schema/auth/passwordResetRequest.py +3 -3
  161. metadata/generated/schema/auth/passwordResetToken.py +1 -1
  162. metadata/generated/schema/auth/personalAccessToken.py +1 -1
  163. metadata/generated/schema/auth/refreshToken.py +1 -1
  164. metadata/generated/schema/auth/registrationRequest.py +2 -2
  165. metadata/generated/schema/auth/revokePersonalToken.py +1 -1
  166. metadata/generated/schema/auth/revokeToken.py +1 -1
  167. metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
  168. metadata/generated/schema/auth/ssoAuth.py +1 -1
  169. metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
  170. metadata/generated/schema/configuration/__init__.py +1 -1
  171. metadata/generated/schema/configuration/appsPrivateConfiguration.py +28 -0
  172. metadata/generated/schema/configuration/authConfig.py +1 -1
  173. metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
  174. metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
  175. metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
  176. metadata/generated/schema/configuration/elasticSearchConfiguration.py +7 -1
  177. metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
  178. metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
  179. metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
  180. metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
  181. metadata/generated/schema/configuration/ldapConfiguration.py +28 -2
  182. metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
  183. metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
  184. metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
  185. metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
  186. metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
  187. metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
  188. metadata/generated/schema/configuration/loginConfiguration.py +1 -1
  189. metadata/generated/schema/configuration/logoConfiguration.py +1 -1
  190. metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
  191. metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
  192. metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
  193. metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
  194. metadata/generated/schema/dataInsight/__init__.py +1 -1
  195. metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
  196. metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
  197. metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
  198. metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
  199. metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
  200. metadata/generated/schema/dataInsight/type/__init__.py +1 -1
  201. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
  202. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
  203. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
  204. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
  205. metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
  206. metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
  207. metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
  208. metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
  209. metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithDescriptionByType.py +1 -1
  210. metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithOwnerByType.py +1 -1
  211. metadata/generated/schema/dataInsight/type/percentageOfServicesWithDescription.py +1 -1
  212. metadata/generated/schema/dataInsight/type/percentageOfServicesWithOwner.py +1 -1
  213. metadata/generated/schema/dataInsight/type/totalEntitiesByTier.py +1 -1
  214. metadata/generated/schema/dataInsight/type/totalEntitiesByType.py +1 -1
  215. metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
  216. metadata/generated/schema/email/__init__.py +1 -1
  217. metadata/generated/schema/email/emailRequest.py +1 -1
  218. metadata/generated/schema/email/smtpSettings.py +1 -1
  219. metadata/generated/schema/entity/__init__.py +1 -1
  220. metadata/generated/schema/entity/applications/__init__.py +1 -1
  221. metadata/generated/schema/entity/applications/app.py +18 -4
  222. metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
  223. metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
  224. metadata/generated/schema/entity/applications/configuration/applicationConfig.py +30 -0
  225. metadata/generated/schema/entity/{services/connections/database/mongoDB → applications/configuration/external}/__init__.py +1 -1
  226. metadata/generated/schema/entity/applications/configuration/external/autoTaggerAppConfig.py +28 -0
  227. metadata/generated/schema/entity/applications/configuration/external/metaPilotAppConfig.py +46 -0
  228. metadata/generated/schema/entity/applications/configuration/internal/__init__.py +3 -0
  229. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +14 -0
  230. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +27 -0
  231. metadata/generated/schema/entity/applications/configuration/{searchIndexingApp.py → internal/searchIndexingAppConfig.py} +13 -3
  232. metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +3 -0
  233. metadata/generated/schema/entity/applications/configuration/private/external/metaPilotAppPrivateConfig.py +26 -0
  234. metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
  235. metadata/generated/schema/entity/applications/jobStatus.py +3 -3
  236. metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
  237. metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
  238. metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +12 -3
  239. metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +12 -3
  240. metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
  241. metadata/generated/schema/entity/automations/__init__.py +1 -1
  242. metadata/generated/schema/entity/automations/testServiceConnection.py +2 -2
  243. metadata/generated/schema/entity/automations/workflow.py +1 -1
  244. metadata/generated/schema/entity/bot.py +2 -3
  245. metadata/generated/schema/entity/classification/__init__.py +1 -1
  246. metadata/generated/schema/entity/classification/classification.py +1 -1
  247. metadata/generated/schema/entity/classification/tag.py +2 -2
  248. metadata/generated/schema/entity/data/__init__.py +1 -1
  249. metadata/generated/schema/entity/data/chart.py +1 -1
  250. metadata/generated/schema/entity/data/container.py +4 -9
  251. metadata/generated/schema/entity/data/dashboard.py +1 -1
  252. metadata/generated/schema/entity/data/dashboardDataModel.py +1 -1
  253. metadata/generated/schema/entity/data/database.py +4 -8
  254. metadata/generated/schema/entity/data/databaseSchema.py +2 -8
  255. metadata/generated/schema/entity/data/glossary.py +1 -1
  256. metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
  257. metadata/generated/schema/entity/data/metrics.py +1 -1
  258. metadata/generated/schema/entity/data/mlmodel.py +1 -1
  259. metadata/generated/schema/entity/data/pipeline.py +6 -9
  260. metadata/generated/schema/entity/data/query.py +2 -2
  261. metadata/generated/schema/entity/data/report.py +1 -1
  262. metadata/generated/schema/entity/data/searchIndex.py +1 -1
  263. metadata/generated/schema/entity/data/storedProcedure.py +3 -9
  264. metadata/generated/schema/entity/data/table.py +18 -12
  265. metadata/generated/schema/entity/data/topic.py +1 -1
  266. metadata/generated/schema/entity/docStore/__init__.py +1 -1
  267. metadata/generated/schema/entity/docStore/document.py +1 -1
  268. metadata/generated/schema/entity/domains/__init__.py +1 -1
  269. metadata/generated/schema/entity/domains/dataProduct.py +1 -1
  270. metadata/generated/schema/entity/domains/domain.py +1 -1
  271. metadata/generated/schema/entity/events/__init__.py +1 -1
  272. metadata/generated/schema/entity/events/webhook.py +6 -3
  273. metadata/generated/schema/entity/feed/__init__.py +1 -1
  274. metadata/generated/schema/entity/feed/suggestion.py +61 -0
  275. metadata/generated/schema/entity/feed/thread.py +6 -1
  276. metadata/generated/schema/entity/policies/__init__.py +1 -1
  277. metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
  278. metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
  279. metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
  280. metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
  281. metadata/generated/schema/entity/policies/filters.py +2 -5
  282. metadata/generated/schema/entity/policies/policy.py +1 -1
  283. metadata/generated/schema/entity/services/__init__.py +1 -1
  284. metadata/generated/schema/entity/services/connections/__init__.py +1 -1
  285. metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
  286. metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
  287. metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
  288. metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
  289. metadata/generated/schema/entity/services/connections/connectionBasicType.py +1 -1
  290. metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
  291. metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
  292. metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
  293. metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
  294. metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
  295. metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
  296. metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
  297. metadata/generated/schema/entity/services/connections/dashboard/mstrConnection.py +46 -0
  298. metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
  299. metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +2 -2
  300. metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
  301. metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
  302. metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
  303. metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +1 -1
  304. metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
  305. metadata/generated/schema/entity/services/connections/database/athenaConnection.py +2 -2
  306. metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +2 -2
  307. metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +2 -2
  308. metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +2 -2
  309. metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
  310. metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
  311. metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
  312. metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
  313. metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +6 -1
  314. metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
  315. metadata/generated/schema/entity/services/connections/database/databricksConnection.py +2 -7
  316. metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
  317. metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
  318. metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
  319. metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
  320. metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +2 -2
  321. metadata/generated/schema/entity/services/connections/database/db2Connection.py +3 -2
  322. metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
  323. metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
  324. metadata/generated/schema/entity/services/connections/database/dorisConnection.py +86 -0
  325. metadata/generated/schema/entity/services/connections/database/druidConnection.py +2 -2
  326. metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
  327. metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
  328. metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +2 -2
  329. metadata/generated/schema/entity/services/connections/database/hiveConnection.py +2 -2
  330. metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +3 -0
  331. metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +17 -0
  332. metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +16 -0
  333. metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +23 -0
  334. metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +43 -0
  335. metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +20 -0
  336. metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +69 -0
  337. metadata/generated/schema/entity/services/connections/database/icebergConnection.py +35 -0
  338. metadata/generated/schema/entity/services/connections/database/impalaConnection.py +2 -2
  339. metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +2 -2
  340. metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +24 -14
  341. metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +5 -2
  342. metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +2 -2
  343. metadata/generated/schema/entity/services/connections/database/oracleConnection.py +2 -2
  344. metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +2 -2
  345. metadata/generated/schema/entity/services/connections/database/postgresConnection.py +2 -2
  346. metadata/generated/schema/entity/services/connections/database/prestoConnection.py +2 -2
  347. metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +2 -2
  348. metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
  349. metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +2 -2
  350. metadata/generated/schema/entity/services/connections/database/sasConnection.py +48 -0
  351. metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +2 -2
  352. metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +2 -2
  353. metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +2 -2
  354. metadata/generated/schema/entity/services/connections/database/trinoConnection.py +2 -2
  355. metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +90 -0
  356. metadata/generated/schema/entity/services/connections/database/verticaConnection.py +2 -2
  357. metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
  358. metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
  359. metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +1 -1
  360. metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
  361. metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
  362. metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +1 -1
  363. metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
  364. metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
  365. metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
  366. metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
  367. metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
  368. metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +1 -1
  369. metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +5 -20
  370. metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
  371. metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
  372. metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
  373. metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
  374. metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
  375. metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
  376. metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
  377. metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +2 -8
  378. metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
  379. metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
  380. metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
  381. metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
  382. metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
  383. metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
  384. metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
  385. metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
  386. metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +23 -0
  387. metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
  388. metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
  389. metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
  390. metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
  391. metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
  392. metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
  393. metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
  394. metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
  395. metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
  396. metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
  397. metadata/generated/schema/entity/services/connections/storage/{adlsConection.py → adlsConnection.py} +2 -2
  398. metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
  399. metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
  400. metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
  401. metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
  402. metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
  403. metadata/generated/schema/entity/services/dashboardService.py +4 -1
  404. metadata/generated/schema/entity/services/databaseService.py +13 -1
  405. metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
  406. metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +27 -22
  407. metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
  408. metadata/generated/schema/entity/services/ingestionPipelines/status.py +56 -0
  409. metadata/generated/schema/entity/services/messagingService.py +1 -1
  410. metadata/generated/schema/entity/services/metadataService.py +1 -1
  411. metadata/generated/schema/entity/services/mlmodelService.py +1 -1
  412. metadata/generated/schema/entity/services/pipelineService.py +4 -1
  413. metadata/generated/schema/entity/services/searchService.py +1 -1
  414. metadata/generated/schema/entity/services/serviceType.py +1 -1
  415. metadata/generated/schema/entity/services/storageService.py +10 -3
  416. metadata/generated/schema/entity/teams/__init__.py +1 -1
  417. metadata/generated/schema/entity/teams/persona.py +1 -1
  418. metadata/generated/schema/entity/teams/role.py +2 -6
  419. metadata/generated/schema/entity/teams/team.py +1 -1
  420. metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
  421. metadata/generated/schema/entity/teams/user.py +3 -10
  422. metadata/generated/schema/entity/type.py +2 -2
  423. metadata/generated/schema/entity/utils/__init__.py +1 -1
  424. metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
  425. metadata/generated/schema/entity/utils/servicesCount.py +1 -1
  426. metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
  427. metadata/generated/schema/events/__init__.py +1 -1
  428. metadata/generated/schema/events/alertMetrics.py +27 -0
  429. metadata/generated/schema/events/api/__init__.py +1 -1
  430. metadata/generated/schema/events/api/createEventSubscription.py +14 -12
  431. metadata/generated/schema/events/emailAlertConfig.py +1 -1
  432. metadata/generated/schema/events/eventFilterRule.py +13 -2
  433. metadata/generated/schema/events/eventSubscription.py +89 -49
  434. metadata/generated/schema/events/eventSubscriptionOffset.py +21 -0
  435. metadata/generated/schema/events/failedEvent.py +26 -0
  436. metadata/generated/schema/events/filterResourceDescriptor.py +27 -0
  437. metadata/generated/schema/events/subscriptionResourceDescriptor.py +11 -10
  438. metadata/generated/schema/metadataIngestion/__init__.py +1 -1
  439. metadata/generated/schema/metadataIngestion/application.py +40 -0
  440. metadata/generated/schema/metadataIngestion/applicationPipeline.py +15 -6
  441. metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
  442. metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
  443. metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +6 -1
  444. metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +6 -1
  445. metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
  446. metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
  447. metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
  448. metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
  449. metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +6 -3
  450. metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
  451. metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
  452. metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +6 -3
  453. metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +1 -1
  454. metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +1 -1
  455. metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +6 -3
  456. metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
  457. metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
  458. metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
  459. metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
  460. metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
  461. metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
  462. metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
  463. metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
  464. metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
  465. metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +2 -2
  466. metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +24 -0
  467. metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
  468. metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
  469. metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
  470. metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +8 -1
  471. metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
  472. metadata/generated/schema/metadataIngestion/workflow.py +1 -1
  473. metadata/generated/schema/monitoring/__init__.py +1 -1
  474. metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
  475. metadata/generated/schema/security/__init__.py +1 -1
  476. metadata/generated/schema/security/client/__init__.py +1 -1
  477. metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
  478. metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
  479. metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
  480. metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
  481. metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
  482. metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
  483. metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
  484. metadata/generated/schema/security/credentials/__init__.py +1 -1
  485. metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
  486. metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
  487. metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
  488. metadata/generated/schema/security/credentials/azureCredentials.py +1 -1
  489. metadata/generated/schema/security/credentials/basicAuth.py +1 -1
  490. metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
  491. metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
  492. metadata/generated/schema/security/credentials/gcpValues.py +1 -1
  493. metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
  494. metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
  495. metadata/generated/schema/security/secrets/__init__.py +1 -1
  496. metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
  497. metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +14 -4
  498. metadata/generated/schema/security/secrets/secretsManagerProvider.py +2 -2
  499. metadata/generated/schema/security/securityConfiguration.py +1 -1
  500. metadata/generated/schema/security/ssl/__init__.py +1 -1
  501. metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
  502. metadata/generated/schema/security/ssl/verifySSLConfig.py +4 -1
  503. metadata/generated/schema/settings/__init__.py +1 -1
  504. metadata/generated/schema/settings/settings.py +1 -1
  505. metadata/generated/schema/system/__init__.py +1 -1
  506. metadata/generated/schema/system/entityError.py +17 -0
  507. metadata/generated/schema/system/eventPublisherJob.py +5 -19
  508. metadata/generated/schema/system/indexingError.py +34 -0
  509. metadata/generated/schema/system/ui/__init__.py +1 -1
  510. metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
  511. metadata/generated/schema/system/ui/page.py +1 -1
  512. metadata/generated/schema/tests/__init__.py +1 -1
  513. metadata/generated/schema/tests/assigned.py +18 -0
  514. metadata/generated/schema/tests/basic.py +17 -39
  515. metadata/generated/schema/tests/customMetric.py +4 -2
  516. metadata/generated/schema/tests/resolved.py +35 -0
  517. metadata/generated/schema/tests/testCase.py +11 -2
  518. metadata/generated/schema/tests/testCaseResolutionStatus.py +63 -0
  519. metadata/generated/schema/tests/testDefinition.py +5 -1
  520. metadata/generated/schema/tests/testSuite.py +1 -1
  521. metadata/generated/schema/type/__init__.py +1 -1
  522. metadata/generated/schema/type/auditLog.py +1 -1
  523. metadata/generated/schema/type/basic.py +5 -5
  524. metadata/generated/schema/type/bulkOperationResult.py +1 -1
  525. metadata/generated/schema/type/changeEvent.py +5 -75
  526. metadata/generated/schema/type/changeEventType.py +29 -0
  527. metadata/generated/schema/type/collectionDescriptor.py +1 -1
  528. metadata/generated/schema/type/csvDocumentation.py +1 -1
  529. metadata/generated/schema/type/csvErrorType.py +1 -1
  530. metadata/generated/schema/type/csvFile.py +1 -1
  531. metadata/generated/schema/type/csvImportResult.py +1 -1
  532. metadata/generated/schema/type/dailyCount.py +1 -1
  533. metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
  534. metadata/generated/schema/type/entityHistory.py +1 -1
  535. metadata/generated/schema/type/entityLineage.py +2 -1
  536. metadata/generated/schema/type/entityReference.py +5 -1
  537. metadata/generated/schema/type/entityReferenceList.py +1 -1
  538. metadata/generated/schema/type/entityRelationship.py +1 -1
  539. metadata/generated/schema/type/entityUsage.py +1 -1
  540. metadata/generated/schema/type/filterPattern.py +1 -1
  541. metadata/generated/schema/type/function.py +7 -6
  542. metadata/generated/schema/type/include.py +1 -1
  543. metadata/generated/schema/type/jdbcConnection.py +1 -1
  544. metadata/generated/schema/type/lifeCycle.py +1 -1
  545. metadata/generated/schema/type/paging.py +1 -1
  546. metadata/generated/schema/type/profile.py +1 -1
  547. metadata/generated/schema/type/queryParserData.py +2 -2
  548. metadata/generated/schema/type/reaction.py +1 -1
  549. metadata/generated/schema/type/schedule.py +1 -1
  550. metadata/generated/schema/type/schema.py +1 -1
  551. metadata/generated/schema/type/tableQuery.py +2 -2
  552. metadata/generated/schema/type/tableUsageCount.py +1 -1
  553. metadata/generated/schema/type/tagLabel.py +1 -1
  554. metadata/generated/schema/type/usageDetails.py +1 -1
  555. metadata/generated/schema/type/usageRequest.py +1 -1
  556. metadata/generated/schema/type/votes.py +1 -1
  557. metadata/great_expectations/action.py +3 -2
  558. metadata/ingestion/api/delete.py +5 -2
  559. metadata/ingestion/api/models.py +4 -10
  560. metadata/ingestion/api/parser.py +3 -37
  561. metadata/ingestion/api/status.py +15 -4
  562. metadata/ingestion/api/step.py +39 -6
  563. metadata/ingestion/api/steps.py +20 -0
  564. metadata/ingestion/api/topology_runner.py +114 -168
  565. metadata/ingestion/bulksink/metadata_usage.py +11 -5
  566. metadata/ingestion/lineage/models.py +4 -0
  567. metadata/ingestion/lineage/parser.py +4 -5
  568. metadata/ingestion/lineage/sql_lineage.py +9 -17
  569. metadata/ingestion/models/custom_pydantic.py +4 -2
  570. metadata/ingestion/models/lf_tags_model.py +33 -0
  571. metadata/ingestion/models/patch_request.py +108 -2
  572. metadata/ingestion/models/tests_data.py +9 -0
  573. metadata/ingestion/models/topology.py +177 -60
  574. metadata/ingestion/ometa/auth_provider.py +0 -349
  575. metadata/ingestion/ometa/mixins/es_mixin.py +17 -0
  576. metadata/ingestion/ometa/mixins/patch_mixin.py +17 -68
  577. metadata/ingestion/ometa/mixins/suggestions_mixin.py +41 -0
  578. metadata/ingestion/ometa/mixins/table_mixin.py +18 -0
  579. metadata/ingestion/ometa/mixins/tests_mixin.py +24 -3
  580. metadata/ingestion/ometa/mixins/user_mixin.py +117 -22
  581. metadata/ingestion/ometa/ometa_api.py +18 -25
  582. metadata/ingestion/ometa/routes.py +7 -0
  583. metadata/ingestion/processor/query_parser.py +9 -2
  584. metadata/ingestion/sink/metadata_rest.py +25 -6
  585. metadata/ingestion/source/dashboard/dashboard_service.py +38 -61
  586. metadata/ingestion/source/dashboard/domodashboard/metadata.py +13 -14
  587. metadata/ingestion/source/dashboard/lightdash/metadata.py +2 -1
  588. metadata/ingestion/source/dashboard/looker/metadata.py +38 -25
  589. metadata/ingestion/source/dashboard/metabase/metadata.py +23 -6
  590. metadata/ingestion/source/dashboard/mode/client.py +10 -23
  591. metadata/ingestion/source/dashboard/mode/connection.py +6 -1
  592. metadata/ingestion/source/dashboard/mode/metadata.py +8 -4
  593. metadata/ingestion/source/dashboard/mstr/client.py +208 -0
  594. metadata/ingestion/source/dashboard/mstr/connection.py +53 -0
  595. metadata/ingestion/source/dashboard/mstr/metadata.py +182 -0
  596. metadata/ingestion/source/dashboard/mstr/models.py +144 -0
  597. metadata/ingestion/source/dashboard/powerbi/metadata.py +15 -10
  598. metadata/ingestion/source/dashboard/qliksense/metadata.py +11 -7
  599. metadata/ingestion/source/dashboard/quicksight/metadata.py +9 -5
  600. metadata/ingestion/source/dashboard/redash/metadata.py +23 -14
  601. metadata/ingestion/source/dashboard/superset/api_source.py +11 -8
  602. metadata/ingestion/source/dashboard/superset/client.py +16 -9
  603. metadata/ingestion/source/dashboard/superset/connection.py +3 -3
  604. metadata/ingestion/source/dashboard/superset/db_source.py +14 -10
  605. metadata/ingestion/source/dashboard/superset/mixin.py +22 -18
  606. metadata/ingestion/source/dashboard/superset/queries.py +1 -1
  607. metadata/ingestion/source/dashboard/tableau/client.py +91 -11
  608. metadata/ingestion/source/dashboard/tableau/connection.py +10 -1
  609. metadata/ingestion/source/dashboard/tableau/metadata.py +58 -70
  610. metadata/ingestion/source/dashboard/tableau/models.py +0 -8
  611. metadata/ingestion/source/dashboard/tableau/queries.py +5 -5
  612. metadata/ingestion/source/database/athena/client.py +80 -0
  613. metadata/ingestion/source/database/athena/connection.py +7 -0
  614. metadata/ingestion/source/database/athena/metadata.py +161 -19
  615. metadata/ingestion/source/database/azuresql/metadata.py +0 -1
  616. metadata/ingestion/source/database/bigquery/connection.py +24 -3
  617. metadata/ingestion/source/database/bigquery/metadata.py +39 -27
  618. metadata/ingestion/source/database/bigquery/queries.py +11 -4
  619. metadata/ingestion/source/database/bigquery/query_parser.py +13 -0
  620. metadata/ingestion/source/database/bigquery/usage.py +1 -3
  621. metadata/ingestion/source/database/column_helpers.py +0 -10
  622. metadata/ingestion/source/database/column_type_parser.py +11 -5
  623. metadata/ingestion/source/database/common_db_source.py +8 -2
  624. metadata/ingestion/source/database/common_nosql_source.py +8 -4
  625. metadata/ingestion/source/database/database_service.py +89 -7
  626. metadata/ingestion/source/database/databricks/client.py +5 -10
  627. metadata/ingestion/source/database/databricks/connection.py +16 -55
  628. metadata/ingestion/source/database/databricks/lineage.py +29 -26
  629. metadata/ingestion/source/database/databricks/metadata.py +448 -11
  630. metadata/ingestion/source/database/databricks/queries.py +28 -0
  631. metadata/ingestion/source/database/databricks/query_parser.py +5 -1
  632. metadata/ingestion/source/database/databricks/usage.py +2 -2
  633. metadata/ingestion/source/database/datalake/connection.py +31 -4
  634. metadata/ingestion/source/database/datalake/metadata.py +113 -27
  635. metadata/ingestion/source/database/dbt/dbt_config.py +5 -0
  636. metadata/ingestion/source/database/dbt/dbt_service.py +10 -14
  637. metadata/ingestion/source/database/dbt/dbt_utils.py +3 -1
  638. metadata/ingestion/source/database/dbt/metadata.py +11 -25
  639. metadata/ingestion/source/database/deltalake/metadata.py +6 -3
  640. metadata/ingestion/source/database/domodatabase/metadata.py +7 -6
  641. metadata/ingestion/source/database/doris/connection.py +72 -0
  642. metadata/ingestion/source/database/doris/metadata.py +315 -0
  643. metadata/ingestion/source/database/doris/queries.py +54 -0
  644. metadata/ingestion/source/database/doris/utils.py +64 -0
  645. metadata/ingestion/source/database/extended_sample_data.py +532 -0
  646. metadata/ingestion/source/database/glue/metadata.py +8 -5
  647. metadata/ingestion/source/database/hive/connection.py +0 -2
  648. metadata/ingestion/source/database/hive/utils.py +3 -0
  649. metadata/ingestion/source/database/iceberg/catalog/__init__.py +65 -0
  650. metadata/ingestion/source/database/iceberg/catalog/base.py +40 -0
  651. metadata/ingestion/source/database/iceberg/catalog/dynamodb.py +102 -0
  652. metadata/ingestion/source/database/iceberg/catalog/glue.py +88 -0
  653. metadata/ingestion/source/database/iceberg/catalog/hive.py +51 -0
  654. metadata/ingestion/source/database/iceberg/catalog/rest.py +84 -0
  655. metadata/ingestion/source/database/iceberg/connection.py +68 -0
  656. metadata/ingestion/source/database/iceberg/fs/__init__.py +52 -0
  657. metadata/ingestion/source/database/iceberg/fs/azure.py +44 -0
  658. metadata/ingestion/source/database/iceberg/fs/base.py +30 -0
  659. metadata/ingestion/source/database/iceberg/fs/s3.py +77 -0
  660. metadata/ingestion/source/database/iceberg/helper.py +124 -0
  661. metadata/ingestion/source/database/iceberg/metadata.py +311 -0
  662. metadata/ingestion/source/database/iceberg/models.py +66 -0
  663. metadata/ingestion/source/database/life_cycle_query_mixin.py +72 -3
  664. metadata/ingestion/source/database/mongodb/connection.py +1 -5
  665. metadata/ingestion/source/database/mssql/lineage.py +2 -0
  666. metadata/ingestion/source/database/mssql/metadata.py +108 -4
  667. metadata/ingestion/source/database/mssql/models.py +30 -0
  668. metadata/ingestion/source/database/mssql/queries.py +179 -1
  669. metadata/ingestion/source/database/mssql/utils.py +207 -4
  670. metadata/ingestion/source/database/mysql/metadata.py +0 -2
  671. metadata/ingestion/source/database/oracle/metadata.py +108 -2
  672. metadata/ingestion/source/database/oracle/models.py +30 -0
  673. metadata/ingestion/source/database/oracle/queries.py +98 -17
  674. metadata/ingestion/source/database/oracle/utils.py +0 -1
  675. metadata/ingestion/source/database/postgres/lineage.py +32 -14
  676. metadata/ingestion/source/database/postgres/metadata.py +15 -7
  677. metadata/ingestion/source/database/postgres/pgspider/lineage.py +0 -1
  678. metadata/ingestion/source/database/postgres/queries.py +4 -2
  679. metadata/ingestion/source/database/postgres/query_parser.py +4 -72
  680. metadata/ingestion/source/database/postgres/usage.py +41 -0
  681. metadata/ingestion/source/database/postgres/utils.py +34 -0
  682. metadata/ingestion/source/database/query_parser_source.py +8 -2
  683. metadata/ingestion/source/database/redshift/metadata.py +14 -4
  684. metadata/ingestion/source/database/redshift/queries.py +10 -4
  685. metadata/ingestion/source/database/redshift/query_parser.py +16 -0
  686. metadata/ingestion/source/database/redshift/usage.py +0 -2
  687. metadata/ingestion/source/database/salesforce/metadata.py +32 -3
  688. metadata/ingestion/source/database/sample_data.py +120 -6
  689. metadata/ingestion/source/database/sas/client.py +184 -0
  690. metadata/ingestion/source/database/sas/connection.py +47 -0
  691. metadata/ingestion/source/database/sas/extension_attr.py +103 -0
  692. metadata/ingestion/source/database/sas/metadata.py +914 -0
  693. metadata/ingestion/source/database/snowflake/metadata.py +8 -51
  694. metadata/ingestion/source/database/snowflake/models.py +6 -1
  695. metadata/ingestion/source/database/snowflake/queries.py +0 -4
  696. metadata/ingestion/source/database/snowflake/query_parser.py +5 -20
  697. metadata/ingestion/source/database/snowflake/utils.py +2 -6
  698. metadata/ingestion/source/database/stored_procedures_mixin.py +12 -8
  699. metadata/ingestion/source/database/unitycatalog/__init__.py +0 -0
  700. metadata/ingestion/source/database/unitycatalog/client.py +87 -0
  701. metadata/ingestion/source/database/unitycatalog/connection.py +97 -0
  702. metadata/ingestion/source/database/{databricks/unity_catalog → unitycatalog}/lineage.py +11 -11
  703. metadata/ingestion/source/database/{databricks/unity_catalog → unitycatalog}/metadata.py +42 -49
  704. metadata/ingestion/source/database/unitycatalog/query_parser.py +60 -0
  705. metadata/ingestion/source/database/unitycatalog/usage.py +31 -0
  706. metadata/ingestion/source/database/usage_source.py +3 -2
  707. metadata/ingestion/source/messaging/common_broker_source.py +6 -4
  708. metadata/ingestion/source/messaging/kinesis/metadata.py +6 -3
  709. metadata/ingestion/source/messaging/messaging_service.py +6 -2
  710. metadata/ingestion/source/metadata/amundsen/metadata.py +10 -7
  711. metadata/ingestion/source/metadata/atlas/metadata.py +5 -5
  712. metadata/ingestion/source/mlmodel/mlflow/metadata.py +5 -2
  713. metadata/ingestion/source/mlmodel/mlmodel_service.py +6 -2
  714. metadata/ingestion/source/pipeline/airflow/connection.py +0 -12
  715. metadata/ingestion/source/pipeline/airflow/lineage_parser.py +12 -6
  716. metadata/ingestion/source/pipeline/airflow/metadata.py +63 -34
  717. metadata/ingestion/source/pipeline/airflow/models.py +5 -4
  718. metadata/ingestion/source/pipeline/dagster/metadata.py +7 -4
  719. metadata/ingestion/source/pipeline/databrickspipeline/metadata.py +12 -9
  720. metadata/ingestion/source/pipeline/domopipeline/metadata.py +7 -4
  721. metadata/ingestion/source/pipeline/gluepipeline/metadata.py +5 -2
  722. metadata/ingestion/source/pipeline/pipeline_service.py +6 -2
  723. metadata/ingestion/source/pipeline/spline/metadata.py +0 -1
  724. metadata/ingestion/source/search/elasticsearch/connection.py +4 -1
  725. metadata/ingestion/source/search/elasticsearch/metadata.py +1 -2
  726. metadata/ingestion/source/search/search_service.py +6 -2
  727. metadata/ingestion/source/storage/s3/metadata.py +22 -17
  728. metadata/ingestion/source/storage/storage_service.py +53 -11
  729. metadata/ingestion/stage/table_usage.py +9 -2
  730. metadata/mixins/sqalchemy/sqa_mixin.py +14 -7
  731. metadata/parsers/protobuf_parser.py +29 -11
  732. metadata/pii/processor.py +9 -2
  733. metadata/profiler/api/models.py +19 -1
  734. metadata/profiler/interface/pandas/profiler_interface.py +59 -18
  735. metadata/profiler/interface/profiler_interface.py +13 -2
  736. metadata/profiler/interface/profiler_interface_factory.py +49 -14
  737. metadata/profiler/interface/sqlalchemy/bigquery/profiler_interface.py +3 -0
  738. metadata/profiler/interface/sqlalchemy/databricks/profiler_interface.py +26 -0
  739. metadata/profiler/interface/sqlalchemy/db2/__init__.py +0 -0
  740. metadata/profiler/interface/sqlalchemy/db2/profiler_interface.py +38 -0
  741. metadata/profiler/interface/sqlalchemy/mariadb/profiler_interface.py +85 -0
  742. metadata/profiler/interface/sqlalchemy/profiler_interface.py +77 -34
  743. metadata/profiler/interface/sqlalchemy/single_store/profiler_interface.py +2 -2
  744. metadata/profiler/interface/sqlalchemy/snowflake/profiler_interface.py +7 -0
  745. metadata/profiler/interface/sqlalchemy/trino/profiler_interface.py +2 -2
  746. metadata/profiler/interface/sqlalchemy/unity_catalog/profiler_interface.py +33 -0
  747. metadata/profiler/metrics/composed/null_ratio.py +1 -1
  748. metadata/profiler/metrics/hybrid/histogram.py +1 -0
  749. metadata/profiler/metrics/static/max.py +4 -1
  750. metadata/profiler/metrics/static/min.py +4 -1
  751. metadata/profiler/metrics/system/queries/snowflake.py +89 -17
  752. metadata/profiler/metrics/system/system.py +62 -20
  753. metadata/profiler/orm/functions/length.py +1 -0
  754. metadata/profiler/orm/functions/median.py +7 -0
  755. metadata/profiler/orm/functions/table_metric_computer.py +462 -0
  756. metadata/profiler/orm/registry.py +1 -0
  757. metadata/profiler/processor/core.py +116 -52
  758. metadata/profiler/processor/default.py +14 -3
  759. metadata/profiler/processor/handle_partition.py +2 -2
  760. metadata/profiler/processor/processor.py +9 -4
  761. metadata/profiler/processor/sampler/sqlalchemy/bigquery/sampler.py +31 -3
  762. metadata/profiler/processor/sampler/sqlalchemy/sampler.py +29 -6
  763. metadata/profiler/processor/sampler/sqlalchemy/trino/sampler.py +10 -4
  764. metadata/profiler/source/base/profiler_source.py +5 -2
  765. metadata/profiler/source/bigquery/type_mapper.py +0 -1
  766. metadata/profiler/source/mariadb/functions/median.py +20 -0
  767. metadata/profiler/source/mariadb/metrics/window/first_quartile.py +10 -0
  768. metadata/profiler/source/mariadb/metrics/window/median.py +10 -0
  769. metadata/profiler/source/mariadb/metrics/window/third_quartile.py +10 -0
  770. metadata/profiler/source/metadata.py +43 -10
  771. metadata/profiler/source/metadata_ext.py +16 -50
  772. metadata/py.typed +0 -0
  773. metadata/readers/dataframe/json.py +5 -4
  774. metadata/readers/file/api_reader.py +0 -1
  775. metadata/utils/constants.py +5 -0
  776. metadata/utils/datalake/datalake_utils.py +363 -129
  777. metadata/utils/entity_link.py +26 -6
  778. metadata/utils/fqn.py +20 -0
  779. metadata/utils/helpers.py +55 -9
  780. metadata/utils/importer.py +2 -3
  781. metadata/utils/life_cycle_utils.py +4 -4
  782. metadata/utils/logger.py +13 -2
  783. metadata/utils/partition.py +10 -5
  784. metadata/utils/secrets/client/loader.py +0 -1
  785. metadata/utils/secrets/noop_secrets_manager.py +4 -3
  786. metadata/utils/secrets/secrets_manager_factory.py +3 -4
  787. metadata/utils/{source_hash_utils.py → source_hash.py} +10 -1
  788. metadata/utils/sqlalchemy_utils.py +21 -0
  789. metadata/utils/storage_metadata_config.py +42 -1
  790. metadata/utils/tag_utils.py +5 -2
  791. metadata/workflow/application.py +154 -0
  792. metadata/workflow/application_output_handler.py +34 -0
  793. metadata/workflow/base.py +84 -153
  794. metadata/workflow/data_insight.py +8 -7
  795. metadata/workflow/data_quality.py +3 -2
  796. metadata/workflow/ingestion.py +203 -0
  797. metadata/workflow/metadata.py +2 -3
  798. metadata/workflow/output_handler.py +204 -0
  799. metadata/workflow/profiler.py +2 -2
  800. metadata/workflow/usage.py +3 -4
  801. metadata/workflow/workflow_output_handler.py +15 -255
  802. metadata/workflow/workflow_status_mixin.py +44 -52
  803. openmetadata_ingestion-1.3.0.0.dist-info/METADATA +749 -0
  804. {openmetadata_ingestion-1.2.5.2.dist-info → openmetadata_ingestion-1.3.0.0.dist-info}/RECORD +812 -725
  805. {openmetadata_ingestion-1.2.5.2.dist-info → openmetadata_ingestion-1.3.0.0.dist-info}/WHEEL +1 -1
  806. metadata/generated/schema/entity/applications/appConfig.py +0 -21
  807. metadata/generated/schema/entity/applications/configuration/dataInsightsApp.py +0 -17
  808. metadata/generated/schema/entity/applications/configuration/externalAppIngestionConfig.py +0 -38
  809. metadata/generated/schema/entity/services/connections/database/mongoDB/mongoDBValues.py +0 -44
  810. metadata/generated/schema/events/dataInsightAlertConfig.py +0 -17
  811. metadata/generated/schema/events/entitySpelFilters.py +0 -19
  812. metadata/ingestion/models/es_documents.py +0 -339
  813. metadata/ingestion/ometa/mixins/glossary_mixin.py +0 -501
  814. metadata/ingestion/ometa/provider_registry.py +0 -144
  815. metadata/ingestion/source/database/databricks/legacy/lineage.py +0 -51
  816. metadata/ingestion/source/database/databricks/legacy/metadata.py +0 -339
  817. metadata/ingestion/source/metadata/metadata_elasticsearch/metadata.py +0 -144
  818. metadata/profiler/orm/functions/table_metric_construct.py +0 -365
  819. openmetadata_ingestion-1.2.5.2.dist-info/METADATA +0 -426
  820. /metadata/ingestion/source/{database/databricks/legacy → dashboard/mstr}/__init__.py +0 -0
  821. /metadata/ingestion/source/database/{databricks/unity_catalog → doris}/__init__.py +0 -0
  822. /metadata/ingestion/source/{metadata/metadata_elasticsearch → database/sas}/__init__.py +0 -0
  823. /metadata/ingestion/source/database/{databricks → unitycatalog}/models.py +0 -0
  824. {openmetadata_ingestion-1.2.5.2.dist-info → openmetadata_ingestion-1.3.0.0.dist-info}/LICENSE +0 -0
  825. {openmetadata_ingestion-1.2.5.2.dist-info → openmetadata_ingestion-1.3.0.0.dist-info}/entry_points.txt +0 -0
  826. {openmetadata_ingestion-1.2.5.2.dist-info → openmetadata_ingestion-1.3.0.0.dist-info}/top_level.txt +0 -0
@@ -15,8 +15,9 @@ from different auths and different file systems.
15
15
  """
16
16
  import ast
17
17
  import json
18
+ import random
18
19
  import traceback
19
- from typing import Dict, List, Optional, cast
20
+ from typing import Dict, List, Optional, Union, cast
20
21
 
21
22
  from metadata.generated.schema.entity.data.table import Column, DataType
22
23
  from metadata.ingestion.source.database.column_helpers import truncate_column_name
@@ -29,18 +30,6 @@ from metadata.utils.logger import utils_logger
29
30
 
30
31
  logger = utils_logger()
31
32
 
32
- DATALAKE_DATA_TYPES = {
33
- **dict.fromkeys(["int64", "int", "int32"], DataType.INT),
34
- "dict": DataType.JSON,
35
- "list": DataType.ARRAY,
36
- **dict.fromkeys(["float64", "float32", "float"], DataType.FLOAT),
37
- "bool": DataType.BOOLEAN,
38
- **dict.fromkeys(
39
- ["datetime64", "timedelta[ns]", "datetime64[ns]"], DataType.DATETIME
40
- ),
41
- "str": DataType.STRING,
42
- }
43
-
44
33
 
45
34
  def fetch_dataframe(
46
35
  config_source,
@@ -100,135 +89,380 @@ def get_file_format_type(key_name, metadata_entry=None):
100
89
  return False
101
90
 
102
91
 
103
- def unique_json_structure(dicts: List[Dict]) -> Dict:
104
- """Given a sample of `n` json objects, return a json object that represents the unique structure of all `n` objects.
105
- Note that the type of the key will be that of the last object seen in the sample.
92
+ # pylint: disable=import-outside-toplevel
93
+ class DataFrameColumnParser:
94
+ """A column parser object. This serves as a Creator class for the appropriate column parser object parser
95
+ for datalake types. It allows us to implement different schema parsers for different datalake types without
96
+ implementing many conditionals statements.
106
97
 
107
- Args:
108
- dicts: list of json objects
98
+ e.g. if we want to implement a column parser for parquet files, we can simply implement a
99
+ ParquetDataFrameColumnParser class and add it as part of the `create` method. The `create` method will then return
100
+ the appropriate parser based on the file type. The `ColumnParser` class has a single entry point `get_columns` which
101
+ will call the `get_columns` method of the appropriate parser.
109
102
  """
110
- result = {}
111
- for dict_ in dicts:
112
- for key, value in dict_.items():
113
- if isinstance(value, dict):
114
- nested_json = result.get(key, {})
115
- # `isinstance(nested_json, dict)` if for a key we first see a non dict value
116
- # but then see a dict value later, we will consider the key to be a dict.
117
- result[key] = unique_json_structure(
118
- [nested_json if isinstance(nested_json, dict) else {}, value]
119
- )
120
- else:
121
- result[key] = value
122
- return result
123
103
 
104
+ def __init__(self, parser):
105
+ """Initialize the column parser object"""
106
+ self.parser = parser
124
107
 
125
- def construct_json_column_children(json_column: Dict) -> List[Dict]:
126
- """Construt a dict representation of a Column object
108
+ @classmethod
109
+ def create(
110
+ cls,
111
+ data_frame: "DataFrame",
112
+ file_type: Optional[SupportedTypes] = None,
113
+ sample: bool = True,
114
+ shuffle: bool = False,
115
+ ):
116
+ """Instantiate a column parser object with the appropriate parser
127
117
 
128
- Args:
129
- json_column: unique json structure of a column
130
- """
131
- children = []
132
- for key, value in json_column.items():
133
- column = {}
134
- type_ = type(value).__name__.lower()
135
- column["dataTypeDisplay"] = DATALAKE_DATA_TYPES.get(
136
- type_, DataType.UNKNOWN
137
- ).value
138
- column["dataType"] = DATALAKE_DATA_TYPES.get(type_, DataType.UNKNOWN).value
139
- column["name"] = truncate_column_name(key)
140
- column["displayName"] = key
141
- if isinstance(value, dict):
142
- column["children"] = construct_json_column_children(value)
143
- children.append(column)
144
-
145
- return children
146
-
147
-
148
- def get_children(json_column) -> List[Dict]:
149
- """Get children of json column.
150
-
151
- Args:
152
- json_column (pandas.Series): column with 100 sample rows.
153
- Sample rows will be used to infer children.
154
- """
155
- from pandas import Series # pylint: disable=import-outside-toplevel
118
+ Args:
119
+ data_frame: the dataframe object
120
+ file_type: the file type of the dataframe. Will be used to determine the appropriate parser.
121
+ sample: whether to sample the dataframe or not if we have a list of dataframes.
122
+ If sample is False, we will concatenate the dataframes, which can be cause OOM error for large dataset.
123
+ (default: True)
124
+ shuffle: whether to shuffle the dataframe list or not if sample is True. (default: False)
125
+ """
126
+ data_frame = cls._get_data_frame(data_frame, sample, shuffle)
127
+ if file_type == SupportedTypes.PARQUET:
128
+ parser = ParquetDataFrameColumnParser(data_frame)
129
+ return cls(parser)
130
+ parser = GenericDataFrameColumnParser(data_frame)
131
+ return cls(parser)
156
132
 
157
- json_column = cast(Series, json_column)
158
- try:
159
- json_column = json_column.apply(json.loads)
160
- except TypeError:
161
- # if values are not strings, we will assume they are already json objects
162
- # based on the read class logic
163
- pass
164
- json_structure = unique_json_structure(json_column.values.tolist())
133
+ @staticmethod
134
+ def _get_data_frame(
135
+ data_frame: Union[List["DataFrame"], "DataFrame"], sample: bool, shuffle: bool
136
+ ):
137
+ """Return the dataframe to use for parsing"""
138
+ import pandas as pd
165
139
 
166
- return construct_json_column_children(json_structure)
140
+ if not isinstance(data_frame, list):
141
+ return data_frame
167
142
 
143
+ if sample:
144
+ if shuffle:
145
+ random.shuffle(data_frame)
146
+ return data_frame[0]
168
147
 
169
- def get_columns(data_frame: "DataFrame"):
170
- """
171
- method to process column details
148
+ return pd.concat(data_frame)
149
+
150
+ def get_columns(self):
151
+ """Get the columns from the parser"""
152
+ return self.parser.get_columns()
153
+
154
+
155
+ class GenericDataFrameColumnParser:
156
+ """Given a dataframe object, parse the columns and return a list of Column objects.
157
+
158
+ # TODO: We should consider making the function above part of the `GenericDataFrameColumnParser` class
159
+ # though we need to do a thorough overview of where they are used to ensure unnecessary coupling.
172
160
  """
173
- cols = []
174
- if hasattr(data_frame, "columns"):
175
- df_columns = list(data_frame.columns)
176
- for column in df_columns:
177
- # use String by default
178
- data_type = DataType.STRING
179
- try:
180
- if hasattr(data_frame[column], "dtypes"):
181
- data_type = fetch_col_types(data_frame, column_name=column)
182
-
183
- parsed_string = {
184
- "dataTypeDisplay": data_type.value,
185
- "dataType": data_type,
186
- "name": truncate_column_name(column),
187
- "displayName": column,
188
- }
189
- if data_type == DataType.ARRAY:
190
- parsed_string["arrayDataType"] = DataType.UNKNOWN
191
-
192
- if data_type == DataType.JSON:
193
- parsed_string["children"] = get_children(
194
- data_frame[column].dropna()[:100]
161
+
162
+ _data_formats = {
163
+ **dict.fromkeys(["int64", "int", "int32"], DataType.INT),
164
+ "dict": DataType.JSON,
165
+ "list": DataType.ARRAY,
166
+ **dict.fromkeys(["float64", "float32", "float"], DataType.FLOAT),
167
+ "bool": DataType.BOOLEAN,
168
+ **dict.fromkeys(
169
+ ["datetime64", "timedelta[ns]", "datetime64[ns]"], DataType.DATETIME
170
+ ),
171
+ "str": DataType.STRING,
172
+ }
173
+
174
+ def __init__(self, data_frame: "DataFrame"):
175
+ self.data_frame = data_frame
176
+
177
+ def get_columns(self):
178
+ """
179
+ method to process column details
180
+ """
181
+ return self._get_columns(self.data_frame)
182
+
183
+ @classmethod
184
+ def _get_columns(cls, data_frame: "DataFrame"):
185
+ """
186
+ method to process column details.
187
+
188
+ Note this was move from a function to a class method to bring it closer to the
189
+ `GenericDataFrameColumnParser` class. Should be rethought as part of the TODO.
190
+ """
191
+ cols = []
192
+ if hasattr(data_frame, "columns"):
193
+ df_columns = list(data_frame.columns)
194
+ for column in df_columns:
195
+ # use String by default
196
+ data_type = DataType.STRING
197
+ try:
198
+ if hasattr(data_frame[column], "dtypes"):
199
+ data_type = cls.fetch_col_types(data_frame, column_name=column)
200
+
201
+ parsed_string = {
202
+ "dataTypeDisplay": data_type.value,
203
+ "dataType": data_type,
204
+ "name": truncate_column_name(column),
205
+ "displayName": column,
206
+ }
207
+ if data_type == DataType.ARRAY:
208
+ parsed_string["arrayDataType"] = DataType.UNKNOWN
209
+
210
+ if data_type == DataType.JSON:
211
+ parsed_string["children"] = cls.get_children(
212
+ data_frame[column].dropna()[:100]
213
+ )
214
+
215
+ cols.append(Column(**parsed_string))
216
+ except Exception as exc:
217
+ logger.debug(traceback.format_exc())
218
+ logger.warning(
219
+ f"Unexpected exception parsing column [{column}]: {exc}"
195
220
  )
221
+ return cols
196
222
 
197
- cols.append(Column(**parsed_string))
198
- except Exception as exc:
199
- logger.debug(traceback.format_exc())
200
- logger.warning(f"Unexpected exception parsing column [{column}]: {exc}")
201
- return cols
223
+ @classmethod
224
+ def fetch_col_types(cls, data_frame, column_name):
225
+ """fetch_col_types: Fetch Column Type for the c
202
226
 
227
+ Note this was move from a function to a class method to bring it closer to the
228
+ `GenericDataFrameColumnParser` class. Should be rethought as part of the TODO.
203
229
 
204
- def fetch_col_types(data_frame, column_name):
205
- """fetch_col_types: Fetch Column Type for the c
230
+ Args:
231
+ data_frame (DataFrame)
232
+ column_name (string)
233
+ """
234
+ data_type = None
235
+ try:
236
+ if data_frame[column_name].dtypes.name == "object" and any(
237
+ data_frame[column_name].dropna().values
238
+ ):
239
+ try:
240
+ # Safely evaluate the input string
241
+ df_row_val = data_frame[column_name].dropna().values[0]
242
+ parsed_object = ast.literal_eval(str(df_row_val))
243
+ # Determine the data type of the parsed object
244
+ data_type = type(parsed_object).__name__.lower()
245
+ except (ValueError, SyntaxError):
246
+ # Handle any exceptions that may occur
247
+ data_type = "string"
206
248
 
207
- Args:
208
- data_frame (DataFrame)
209
- column_name (string)
210
- """
211
- data_type = None
212
- try:
213
- if data_frame[column_name].dtypes.name == "object" and any(
214
- data_frame[column_name].dropna().values
249
+ data_type = cls._data_formats.get(
250
+ data_type or data_frame[column_name].dtypes.name, DataType.STRING
251
+ )
252
+ except Exception as err:
253
+ logger.warning(
254
+ f"Failed to distinguish data type for column {column_name}, Falling back to {data_type}, exc: {err}"
255
+ )
256
+ logger.debug(traceback.format_exc())
257
+ return data_type
258
+
259
+ @classmethod
260
+ def unique_json_structure(cls, dicts: List[Dict]) -> Dict:
261
+ """Given a sample of `n` json objects, return a json object that represents the unique
262
+ structure of all `n` objects. Note that the type of the key will be that of
263
+ the last object seen in the sample.
264
+
265
+ Args:
266
+ dicts: list of json objects
267
+ """
268
+ result = {}
269
+ for dict_ in dicts:
270
+ for key, value in dict_.items():
271
+ if isinstance(value, dict):
272
+ nested_json = result.get(key, {})
273
+ # `isinstance(nested_json, dict)` if for a key we first see a non dict value
274
+ # but then see a dict value later, we will consider the key to be a dict.
275
+ result[key] = cls.unique_json_structure(
276
+ [nested_json if isinstance(nested_json, dict) else {}, value]
277
+ )
278
+ else:
279
+ result[key] = value
280
+ return result
281
+
282
+ @classmethod
283
+ def construct_json_column_children(cls, json_column: Dict) -> List[Dict]:
284
+ """Construt a dict representation of a Column object
285
+
286
+ Args:
287
+ json_column: unique json structure of a column
288
+ """
289
+ children = []
290
+ for key, value in json_column.items():
291
+ column = {}
292
+ type_ = type(value).__name__.lower()
293
+ column["dataTypeDisplay"] = cls._data_formats.get(
294
+ type_, DataType.UNKNOWN
295
+ ).value
296
+ column["dataType"] = cls._data_formats.get(type_, DataType.UNKNOWN).value
297
+ column["name"] = truncate_column_name(key)
298
+ column["displayName"] = key
299
+ if isinstance(value, dict):
300
+ column["children"] = cls.construct_json_column_children(value)
301
+ children.append(column)
302
+
303
+ return children
304
+
305
+ @classmethod
306
+ def get_children(cls, json_column) -> List[Dict]:
307
+ """Get children of json column.
308
+
309
+ Args:
310
+ json_column (pandas.Series): column with 100 sample rows.
311
+ Sample rows will be used to infer children.
312
+ """
313
+ from pandas import Series # pylint: disable=import-outside-toplevel
314
+
315
+ json_column = cast(Series, json_column)
316
+ try:
317
+ json_column = json_column.apply(json.loads)
318
+ except TypeError:
319
+ # if values are not strings, we will assume they are already json objects
320
+ # based on the read class logic
321
+ pass
322
+ json_structure = cls.unique_json_structure(json_column.values.tolist())
323
+
324
+ return cls.construct_json_column_children(json_structure)
325
+
326
+
327
+ # pylint: disable=import-outside-toplevel
328
+ class ParquetDataFrameColumnParser:
329
+ """Given a dataframe object generated from a parquet file, parse the columns and return a list of Column objects."""
330
+
331
+ def __init__(self, data_frame: "DataFrame"):
332
+ import pyarrow as pa
333
+
334
+ self._data_formats = {
335
+ **dict.fromkeys(
336
+ ["int8", "int16", "int32", "int64", "int", pa.DurationType],
337
+ DataType.INT,
338
+ ),
339
+ **dict.fromkeys(
340
+ ["uint8", "uint16", "uint32", "uint64", "uint"], DataType.UINT
341
+ ),
342
+ pa.StructType: DataType.STRUCT,
343
+ **dict.fromkeys([pa.ListType, pa.LargeListType], DataType.ARRAY),
344
+ **dict.fromkeys(
345
+ ["halffloat", "float32", "float64", "double", "float"], DataType.FLOAT
346
+ ),
347
+ "bool": DataType.BOOLEAN,
348
+ **dict.fromkeys(
349
+ [
350
+ "datetime64",
351
+ "timedelta[ns]",
352
+ "datetime64[ns]",
353
+ "time32[s]",
354
+ "time32[ms]",
355
+ "time64[ns]",
356
+ "time64[us]",
357
+ pa.TimestampType,
358
+ "date64",
359
+ ],
360
+ DataType.DATETIME,
361
+ ),
362
+ "date32[day]": DataType.DATE,
363
+ "string": DataType.STRING,
364
+ **dict.fromkeys(
365
+ ["binary", "large_binary", pa.FixedSizeBinaryType], DataType.BINARY
366
+ ),
367
+ **dict.fromkeys([pa.Decimal128Type, pa.Decimal256Type], DataType.DECIMAL),
368
+ }
369
+
370
+ self.data_frame = data_frame
371
+ self._arrow_table = pa.Table.from_pandas(self.data_frame)
372
+
373
+ def get_columns(self):
374
+ """
375
+ method to process column details for parquet files
376
+ """
377
+ import pyarrow as pa
378
+
379
+ schema: List[pa.Field] = self._arrow_table.schema
380
+ columns = []
381
+ for column in schema:
382
+ parsed_column = {
383
+ "dataTypeDisplay": str(column.type),
384
+ "dataType": self._get_pq_data_type(column),
385
+ "name": truncate_column_name(column.name),
386
+ "displayName": column.name,
387
+ }
388
+
389
+ if parsed_column["dataType"] == DataType.ARRAY:
390
+ try:
391
+ item_field = column.type.value_field
392
+ parsed_column["arrayDataType"] = self._get_pq_data_type(item_field)
393
+ except AttributeError:
394
+ # if the value field is not specified, we will set it to UNKNOWN
395
+ parsed_column["arrayDataType"] = DataType.UNKNOWN
396
+
397
+ if parsed_column["dataType"] == DataType.BINARY:
398
+ try:
399
+ data_length = type(column.type).byte_width
400
+ except AttributeError:
401
+ # if the byte width is not specified, we will set it to -1
402
+ # following pyarrow convention
403
+ data_length = -1
404
+ parsed_column["dataLength"] = data_length
405
+
406
+ if parsed_column["dataType"] == DataType.STRUCT:
407
+ parsed_column["children"] = self._get_children(column)
408
+ columns.append(Column(**parsed_column))
409
+
410
+ return columns
411
+
412
+ def _get_children(self, column):
413
+ """For struct types, get the children of the column
414
+
415
+ Args:
416
+ column (pa.Field): pa column
417
+ """
418
+ field_idx = column.type.num_fields
419
+
420
+ children = []
421
+ for idx in range(field_idx):
422
+ child = column.type.field(idx)
423
+ data_type = self._get_pq_data_type(child)
424
+
425
+ child_column = {
426
+ "dataTypeDisplay": str(child.type),
427
+ "dataType": data_type,
428
+ "name": truncate_column_name(child.name),
429
+ "displayName": child.name,
430
+ }
431
+ if data_type == DataType.STRUCT:
432
+ child_column["children"] = self._get_children(child)
433
+ children.append(child_column)
434
+
435
+ return children
436
+
437
+ def _get_pq_data_type(self, column):
438
+ """Given a column return the type of the column
439
+
440
+ Args:
441
+ column (pa.Field): pa column
442
+ """
443
+ import pyarrow as pa
444
+
445
+ if isinstance(
446
+ column.type,
447
+ (
448
+ pa.DurationType,
449
+ pa.StructType,
450
+ pa.ListType,
451
+ pa.LargeListType,
452
+ pa.TimestampType,
453
+ pa.Decimal128Type,
454
+ pa.Decimal256Type,
455
+ pa.FixedSizeBinaryType,
456
+ ),
215
457
  ):
216
- try:
217
- # Safely evaluate the input string
218
- df_row_val = data_frame[column_name].dropna().values[0]
219
- parsed_object = ast.literal_eval(str(df_row_val))
220
- # Determine the data type of the parsed object
221
- data_type = type(parsed_object).__name__.lower()
222
- except (ValueError, SyntaxError):
223
- # Handle any exceptions that may occur
224
- data_type = "string"
225
-
226
- data_type = DATALAKE_DATA_TYPES.get(
227
- data_type or data_frame[column_name].dtypes.name, DataType.STRING
228
- )
229
- except Exception as err:
230
- logger.warning(
231
- f"Failed to distinguish data type for column {column_name}, Falling back to {data_type}, exc: {err}"
232
- )
233
- logger.debug(traceback.format_exc())
234
- return data_type
458
+ # the above type can take many shape
459
+ # (i.e. pa.ListType(pa.StructType([pa.column("a", pa.int64())])), etc,)
460
+ # so we'll use their type to determine the data type
461
+ data_type = self._data_formats.get(type(column.type), DataType.UNKNOWN)
462
+ else:
463
+ # for the other types we need to use their string representation
464
+ # to determine the data type as `type(column.type)` will return
465
+ # a generic `pyarrow.lib.DataType`
466
+ data_type = self._data_formats.get(str(column.type), DataType.UNKNOWN)
467
+
468
+ return data_type
@@ -13,17 +13,23 @@ Handle Entity Link building and splitting logic.
13
13
  Filter information has been taken from the
14
14
  ES indexes definitions
15
15
  """
16
- from typing import List, Optional
16
+ from typing import Any, List, Optional, TypeVar
17
17
 
18
18
  from antlr4.CommonTokenStream import CommonTokenStream
19
19
  from antlr4.error.ErrorStrategy import BailErrorStrategy
20
20
  from antlr4.InputStream import InputStream
21
21
  from antlr4.tree.Tree import ParseTreeWalker
22
+ from pydantic import BaseModel
22
23
  from requests.compat import unquote_plus
23
24
 
24
25
  from metadata.antlr.split_listener import EntityLinkSplitListener
25
26
  from metadata.generated.antlr.EntityLinkLexer import EntityLinkLexer
26
27
  from metadata.generated.antlr.EntityLinkParser import EntityLinkParser
28
+ from metadata.generated.schema.entity.data.table import Table
29
+ from metadata.utils.constants import ENTITY_REFERENCE_TYPE_MAP
30
+ from metadata.utils.dispatch import class_register
31
+
32
+ T = TypeVar("T", bound=BaseModel)
27
33
 
28
34
 
29
35
  class EntityLinkBuildingException(Exception):
@@ -86,16 +92,30 @@ def get_table_or_column_fqn(entity_link: str) -> str:
86
92
  )
87
93
 
88
94
 
89
- def get_entity_link(table_fqn: str, column_name: Optional[str]) -> str:
95
+ get_entity_link_registry = class_register()
96
+
97
+
98
+ def get_entity_link(entity_type: Any, fqn: str, **kwargs) -> str:
90
99
  """From table fqn and column name get the entity_link
91
100
 
92
101
  Args:
93
- table_fqn: table fqn
94
- column_name: Optional param to generate entity link with column name
102
+ entity_type: Entity being built
103
+ fqn: Entity fqn
95
104
  """
96
105
 
106
+ func = get_entity_link_registry.registry.get(entity_type.__name__)
107
+ if not func:
108
+ return f"<#E::{ENTITY_REFERENCE_TYPE_MAP[entity_type.__name__]}::{fqn}>"
109
+
110
+ return func(fqn, **kwargs)
111
+
112
+
113
+ @get_entity_link_registry.add(Table)
114
+ def _(fqn: str, column_name: Optional[str] = None) -> str:
115
+ """From table fqn and column name get the entity_link"""
116
+
97
117
  if column_name:
98
- entity_link = f"<#E::table::" f"{table_fqn}" f"::columns::" f"{column_name}>"
118
+ entity_link = f"<#E::{ENTITY_REFERENCE_TYPE_MAP[Table.__name__]}::{fqn}::columns::{column_name}>"
99
119
  else:
100
- entity_link = f"<#E::table::" f"{table_fqn}>"
120
+ entity_link = f"<#E::{ENTITY_REFERENCE_TYPE_MAP[Table.__name__]}::{fqn}>"
101
121
  return entity_link
metadata/utils/fqn.py CHANGED
@@ -28,6 +28,7 @@ from metadata.generated.antlr.FqnLexer import FqnLexer
28
28
  from metadata.generated.antlr.FqnParser import FqnParser
29
29
  from metadata.generated.schema.entity.classification.tag import Tag
30
30
  from metadata.generated.schema.entity.data.chart import Chart
31
+ from metadata.generated.schema.entity.data.container import Container
31
32
  from metadata.generated.schema.entity.data.dashboard import Dashboard
32
33
  from metadata.generated.schema.entity.data.dashboardDataModel import DashboardDataModel
33
34
  from metadata.generated.schema.entity.data.database import Database
@@ -284,6 +285,25 @@ def _(
284
285
  return _build(service_name, topic_name)
285
286
 
286
287
 
288
+ @fqn_build_registry.add(Container)
289
+ def _(
290
+ _: Optional[OpenMetadata], # ES Index not necessary for Container FQN building
291
+ *,
292
+ service_name: str,
293
+ parent_container: str,
294
+ container_name: str,
295
+ ) -> str:
296
+ if not service_name or not container_name:
297
+ raise FQNBuildingException(
298
+ f"Args should be informed, but got service=`{service_name}`, container=`{container_name}``"
299
+ )
300
+ return (
301
+ _build(parent_container, container_name, quote=False)
302
+ if parent_container
303
+ else (_build(service_name, container_name))
304
+ )
305
+
306
+
287
307
  @fqn_build_registry.add(SearchIndex)
288
308
  def _(
289
309
  _: Optional[OpenMetadata], # ES Index not necessary for Search Index FQN building