openmetadata-ingestion 1.8.7.1__py3-none-any.whl → 1.8.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openmetadata-ingestion might be problematic. Click here for more details.

Files changed (745) hide show
  1. airflow_provider_openmetadata/lineage/backend.py +7 -9
  2. metadata/generated/schema/analytics/__init__.py +1 -1
  3. metadata/generated/schema/analytics/basic.py +1 -1
  4. metadata/generated/schema/analytics/reportData.py +1 -1
  5. metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
  6. metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
  7. metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
  8. metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
  9. metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
  10. metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
  11. metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
  12. metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
  13. metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
  14. metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
  15. metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
  16. metadata/generated/schema/api/__init__.py +1 -1
  17. metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
  18. metadata/generated/schema/api/addTagToAssetsRequest.py +1 -1
  19. metadata/generated/schema/api/analytics/__init__.py +1 -1
  20. metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
  21. metadata/generated/schema/api/automations/__init__.py +1 -1
  22. metadata/generated/schema/api/automations/createWorkflow.py +1 -1
  23. metadata/generated/schema/api/bulkAssets.py +1 -1
  24. metadata/generated/schema/api/classification/__init__.py +1 -1
  25. metadata/generated/schema/api/classification/createClassification.py +1 -1
  26. metadata/generated/schema/api/classification/createTag.py +1 -1
  27. metadata/generated/schema/api/classification/loadTags.py +1 -1
  28. metadata/generated/schema/api/createBot.py +1 -1
  29. metadata/generated/schema/api/createEventPublisherJob.py +1 -1
  30. metadata/generated/schema/api/createType.py +1 -1
  31. metadata/generated/schema/api/data/__init__.py +1 -1
  32. metadata/generated/schema/api/data/createAPICollection.py +1 -1
  33. metadata/generated/schema/api/data/createAPIEndpoint.py +1 -1
  34. metadata/generated/schema/api/data/createChart.py +1 -1
  35. metadata/generated/schema/api/data/createContainer.py +1 -1
  36. metadata/generated/schema/api/data/createCustomProperty.py +1 -1
  37. metadata/generated/schema/api/data/createDashboard.py +1 -1
  38. metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
  39. metadata/generated/schema/api/data/createDataContract.py +1 -1
  40. metadata/generated/schema/api/data/createDatabase.py +1 -1
  41. metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
  42. metadata/generated/schema/api/data/createGlossary.py +1 -1
  43. metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
  44. metadata/generated/schema/api/data/createMetric.py +1 -1
  45. metadata/generated/schema/api/data/createMlModel.py +1 -1
  46. metadata/generated/schema/api/data/createPipeline.py +1 -1
  47. metadata/generated/schema/api/data/createQuery.py +1 -1
  48. metadata/generated/schema/api/data/createQueryCostRecord.py +1 -1
  49. metadata/generated/schema/api/data/createSearchIndex.py +1 -1
  50. metadata/generated/schema/api/data/createStoredProcedure.py +1 -1
  51. metadata/generated/schema/api/data/createTable.py +1 -1
  52. metadata/generated/schema/api/data/createTableProfile.py +1 -1
  53. metadata/generated/schema/api/data/createTopic.py +1 -1
  54. metadata/generated/schema/api/data/loadGlossary.py +1 -1
  55. metadata/generated/schema/api/data/restoreEntity.py +1 -1
  56. metadata/generated/schema/api/data/updateColumn.py +1 -1
  57. metadata/generated/schema/api/dataInsight/__init__.py +1 -1
  58. metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
  59. metadata/generated/schema/api/dataInsight/custom/__init__.py +1 -1
  60. metadata/generated/schema/api/dataInsight/custom/createDataInsightCustomChart.py +1 -1
  61. metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
  62. metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
  63. metadata/generated/schema/api/docStore/__init__.py +1 -1
  64. metadata/generated/schema/api/docStore/createDocument.py +1 -1
  65. metadata/generated/schema/api/domains/__init__.py +1 -1
  66. metadata/generated/schema/api/domains/createDataProduct.py +1 -1
  67. metadata/generated/schema/api/domains/createDomain.py +1 -1
  68. metadata/generated/schema/api/feed/__init__.py +1 -1
  69. metadata/generated/schema/api/feed/closeTask.py +1 -1
  70. metadata/generated/schema/api/feed/createPost.py +1 -1
  71. metadata/generated/schema/api/feed/createSuggestion.py +1 -1
  72. metadata/generated/schema/api/feed/createThread.py +1 -1
  73. metadata/generated/schema/api/feed/resolveTask.py +1 -1
  74. metadata/generated/schema/api/feed/threadCount.py +1 -1
  75. metadata/generated/schema/api/governance/__init__.py +1 -1
  76. metadata/generated/schema/api/governance/createWorkflowDefinition.py +1 -1
  77. metadata/generated/schema/api/governance/createWorkflowInstanceState.py +1 -1
  78. metadata/generated/schema/api/lineage/__init__.py +1 -1
  79. metadata/generated/schema/api/lineage/addLineage.py +1 -1
  80. metadata/generated/schema/api/lineage/esLineageData.py +1 -1
  81. metadata/generated/schema/api/lineage/lineageDirection.py +1 -1
  82. metadata/generated/schema/api/lineage/nodeInformation.py +1 -1
  83. metadata/generated/schema/api/lineage/searchLineageRequest.py +1 -1
  84. metadata/generated/schema/api/lineage/searchLineageResult.py +1 -1
  85. metadata/generated/schema/api/mcp/__init__.py +1 -1
  86. metadata/generated/schema/api/mcp/mcpToolDefinition.py +1 -1
  87. metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
  88. metadata/generated/schema/api/policies/__init__.py +1 -1
  89. metadata/generated/schema/api/policies/createPolicy.py +1 -1
  90. metadata/generated/schema/api/scim/__init__.py +1 -1
  91. metadata/generated/schema/api/scim/scimGroup.py +1 -1
  92. metadata/generated/schema/api/scim/scimPatchOp.py +1 -1
  93. metadata/generated/schema/api/scim/scimUser.py +1 -1
  94. metadata/generated/schema/api/search/__init__.py +1 -1
  95. metadata/generated/schema/api/search/previewSearchRequest.py +1 -1
  96. metadata/generated/schema/api/services/__init__.py +1 -1
  97. metadata/generated/schema/api/services/createApiService.py +1 -1
  98. metadata/generated/schema/api/services/createDashboardService.py +1 -1
  99. metadata/generated/schema/api/services/createDatabaseService.py +1 -1
  100. metadata/generated/schema/api/services/createMessagingService.py +1 -1
  101. metadata/generated/schema/api/services/createMetadataService.py +1 -1
  102. metadata/generated/schema/api/services/createMlModelService.py +1 -1
  103. metadata/generated/schema/api/services/createPipelineService.py +1 -1
  104. metadata/generated/schema/api/services/createSearchService.py +1 -1
  105. metadata/generated/schema/api/services/createSecurityService.py +67 -0
  106. metadata/generated/schema/api/services/createStorageService.py +1 -1
  107. metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
  108. metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +1 -1
  109. metadata/generated/schema/api/setOwner.py +1 -1
  110. metadata/generated/schema/api/teams/__init__.py +1 -1
  111. metadata/generated/schema/api/teams/createPersona.py +1 -1
  112. metadata/generated/schema/api/teams/createRole.py +1 -1
  113. metadata/generated/schema/api/teams/createTeam.py +1 -1
  114. metadata/generated/schema/api/teams/createUser.py +1 -1
  115. metadata/generated/schema/api/tests/__init__.py +1 -1
  116. metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
  117. metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
  118. metadata/generated/schema/api/tests/createTestCase.py +1 -1
  119. metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
  120. metadata/generated/schema/api/tests/createTestCaseResult.py +1 -1
  121. metadata/generated/schema/api/tests/createTestDefinition.py +1 -1
  122. metadata/generated/schema/api/tests/createTestSuite.py +1 -1
  123. metadata/generated/schema/api/validateGlossaryTagsRequest.py +1 -1
  124. metadata/generated/schema/api/voteRequest.py +1 -1
  125. metadata/generated/schema/auth/__init__.py +1 -1
  126. metadata/generated/schema/auth/basicAuth.py +1 -1
  127. metadata/generated/schema/auth/basicLoginRequest.py +1 -1
  128. metadata/generated/schema/auth/changePasswordRequest.py +1 -1
  129. metadata/generated/schema/auth/createPersonalToken.py +1 -1
  130. metadata/generated/schema/auth/emailRequest.py +1 -1
  131. metadata/generated/schema/auth/emailVerificationToken.py +1 -1
  132. metadata/generated/schema/auth/generateToken.py +1 -1
  133. metadata/generated/schema/auth/jwtAuth.py +1 -1
  134. metadata/generated/schema/auth/loginRequest.py +1 -1
  135. metadata/generated/schema/auth/logoutRequest.py +1 -1
  136. metadata/generated/schema/auth/passwordResetRequest.py +1 -1
  137. metadata/generated/schema/auth/passwordResetToken.py +1 -1
  138. metadata/generated/schema/auth/personalAccessToken.py +1 -1
  139. metadata/generated/schema/auth/refreshToken.py +1 -1
  140. metadata/generated/schema/auth/registrationRequest.py +1 -1
  141. metadata/generated/schema/auth/revokePersonalToken.py +1 -1
  142. metadata/generated/schema/auth/revokeToken.py +1 -1
  143. metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
  144. metadata/generated/schema/auth/ssoAuth.py +1 -1
  145. metadata/generated/schema/auth/supportToken.py +1 -1
  146. metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
  147. metadata/generated/schema/configuration/__init__.py +1 -1
  148. metadata/generated/schema/configuration/appsPrivateConfiguration.py +1 -1
  149. metadata/generated/schema/configuration/assetCertificationSettings.py +1 -1
  150. metadata/generated/schema/configuration/authConfig.py +1 -1
  151. metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
  152. metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
  153. metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
  154. metadata/generated/schema/configuration/dataQualityConfiguration.py +1 -1
  155. metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
  156. metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
  157. metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
  158. metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
  159. metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
  160. metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
  161. metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
  162. metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
  163. metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
  164. metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
  165. metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
  166. metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
  167. metadata/generated/schema/configuration/limitsConfiguration.py +1 -1
  168. metadata/generated/schema/configuration/lineageSettings.py +1 -1
  169. metadata/generated/schema/configuration/loginConfiguration.py +1 -1
  170. metadata/generated/schema/configuration/logoConfiguration.py +1 -1
  171. metadata/generated/schema/configuration/openMetadataBaseUrlConfiguration.py +1 -1
  172. metadata/generated/schema/configuration/opertionalConfiguration.py +1 -1
  173. metadata/generated/schema/configuration/opsConfig.py +1 -1
  174. metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
  175. metadata/generated/schema/configuration/profilerConfiguration.py +1 -1
  176. metadata/generated/schema/configuration/searchSettings.py +1 -1
  177. metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
  178. metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
  179. metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
  180. metadata/generated/schema/configuration/themeConfiguration.py +1 -1
  181. metadata/generated/schema/configuration/uiThemePreference.py +1 -1
  182. metadata/generated/schema/configuration/workflowSettings.py +1 -1
  183. metadata/generated/schema/dataInsight/__init__.py +1 -1
  184. metadata/generated/schema/dataInsight/custom/__init__.py +1 -1
  185. metadata/generated/schema/dataInsight/custom/dataInsightCustomChart.py +1 -1
  186. metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResult.py +1 -1
  187. metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResultList.py +1 -1
  188. metadata/generated/schema/dataInsight/custom/formulaHolder.py +1 -1
  189. metadata/generated/schema/dataInsight/custom/lineChart.py +1 -1
  190. metadata/generated/schema/dataInsight/custom/summaryCard.py +1 -1
  191. metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
  192. metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
  193. metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
  194. metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
  195. metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
  196. metadata/generated/schema/dataInsight/type/__init__.py +1 -1
  197. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
  198. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
  199. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
  200. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
  201. metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
  202. metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
  203. metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
  204. metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
  205. metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
  206. metadata/generated/schema/email/__init__.py +1 -1
  207. metadata/generated/schema/email/emailRequest.py +1 -1
  208. metadata/generated/schema/email/emailTemplate.py +1 -1
  209. metadata/generated/schema/email/emailTemplatePlaceholder.py +1 -1
  210. metadata/generated/schema/email/smtpSettings.py +1 -1
  211. metadata/generated/schema/email/templateValidationReponse.py +1 -1
  212. metadata/generated/schema/entity/__init__.py +1 -1
  213. metadata/generated/schema/entity/applications/__init__.py +1 -1
  214. metadata/generated/schema/entity/applications/app.py +1 -1
  215. metadata/generated/schema/entity/applications/appExtension.py +1 -1
  216. metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
  217. metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
  218. metadata/generated/schema/entity/applications/configuration/applicationConfig.py +1 -1
  219. metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
  220. metadata/generated/schema/entity/applications/configuration/external/automator/__init__.py +1 -1
  221. metadata/generated/schema/entity/applications/configuration/external/automator/addCustomProperties.py +1 -1
  222. metadata/generated/schema/entity/applications/configuration/external/automator/addDataProductAction.py +1 -1
  223. metadata/generated/schema/entity/applications/configuration/external/automator/addDescriptionAction.py +1 -1
  224. metadata/generated/schema/entity/applications/configuration/external/automator/addDomainAction.py +1 -1
  225. metadata/generated/schema/entity/applications/configuration/external/automator/addOwnerAction.py +1 -1
  226. metadata/generated/schema/entity/applications/configuration/external/automator/addTagsAction.py +1 -1
  227. metadata/generated/schema/entity/applications/configuration/external/automator/addTestCaseAction.py +1 -1
  228. metadata/generated/schema/entity/applications/configuration/external/automator/addTierAction.py +1 -1
  229. metadata/generated/schema/entity/applications/configuration/external/automator/lineagePropagationAction.py +20 -2
  230. metadata/generated/schema/entity/applications/configuration/external/automator/mlTaggingAction.py +1 -1
  231. metadata/generated/schema/entity/applications/configuration/external/automator/propagationStopConfig.py +44 -0
  232. metadata/generated/schema/entity/applications/configuration/external/automator/removeCustomPropertiesAction.py +1 -1
  233. metadata/generated/schema/entity/applications/configuration/external/automator/removeDataProductAction.py +1 -1
  234. metadata/generated/schema/entity/applications/configuration/external/automator/removeDescriptionAction.py +1 -1
  235. metadata/generated/schema/entity/applications/configuration/external/automator/removeDomainAction.py +1 -1
  236. metadata/generated/schema/entity/applications/configuration/external/automator/removeOwnerAction.py +1 -1
  237. metadata/generated/schema/entity/applications/configuration/external/automator/removeTagsAction.py +1 -1
  238. metadata/generated/schema/entity/applications/configuration/external/automator/removeTestCaseAction.py +1 -1
  239. metadata/generated/schema/entity/applications/configuration/external/automator/removeTierAction.py +1 -1
  240. metadata/generated/schema/entity/applications/configuration/external/automatorAppConfig.py +1 -1
  241. metadata/generated/schema/entity/applications/configuration/external/collateAIAppConfig.py +1 -1
  242. metadata/generated/schema/entity/applications/configuration/external/slackAppTokenConfiguration.py +1 -1
  243. metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
  244. metadata/generated/schema/entity/applications/configuration/internal/autoPilotAppConfig.py +1 -1
  245. metadata/generated/schema/entity/applications/configuration/internal/collateAIQualityAgentAppConfig.py +1 -1
  246. metadata/generated/schema/entity/applications/configuration/internal/collateAITierAgentAppConfig.py +1 -1
  247. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +1 -1
  248. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
  249. metadata/generated/schema/entity/applications/configuration/internal/dataRetentionConfiguration.py +1 -1
  250. metadata/generated/schema/entity/applications/configuration/internal/helloPipelinesConfiguration.py +1 -1
  251. metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +1 -1
  252. metadata/generated/schema/entity/applications/configuration/private/__init__.py +1 -1
  253. metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
  254. metadata/generated/schema/entity/applications/configuration/private/external/collateAIAppPrivateConfig.py +1 -1
  255. metadata/generated/schema/entity/applications/configuration/private/internal/__init__.py +1 -1
  256. metadata/generated/schema/entity/applications/configuration/private/internal/collateAITierAgentAppPrivateConfig.py +1 -1
  257. metadata/generated/schema/entity/applications/configuration/private/limits.py +1 -1
  258. metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
  259. metadata/generated/schema/entity/applications/jobStatus.py +1 -1
  260. metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
  261. metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
  262. metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +1 -1
  263. metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +1 -1
  264. metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
  265. metadata/generated/schema/entity/automations/__init__.py +1 -1
  266. metadata/generated/schema/entity/automations/testServiceConnection.py +3 -1
  267. metadata/generated/schema/entity/automations/workflow.py +1 -1
  268. metadata/generated/schema/entity/bot.py +1 -1
  269. metadata/generated/schema/entity/classification/__init__.py +1 -1
  270. metadata/generated/schema/entity/classification/classification.py +1 -1
  271. metadata/generated/schema/entity/classification/tag.py +1 -1
  272. metadata/generated/schema/entity/data/__init__.py +1 -1
  273. metadata/generated/schema/entity/data/apiCollection.py +1 -1
  274. metadata/generated/schema/entity/data/apiEndpoint.py +1 -1
  275. metadata/generated/schema/entity/data/chart.py +1 -1
  276. metadata/generated/schema/entity/data/container.py +1 -1
  277. metadata/generated/schema/entity/data/dashboard.py +1 -1
  278. metadata/generated/schema/entity/data/dashboardDataModel.py +1 -1
  279. metadata/generated/schema/entity/data/dataContract.py +1 -1
  280. metadata/generated/schema/entity/data/database.py +1 -1
  281. metadata/generated/schema/entity/data/databaseSchema.py +1 -1
  282. metadata/generated/schema/entity/data/glossary.py +1 -1
  283. metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
  284. metadata/generated/schema/entity/data/metric.py +1 -1
  285. metadata/generated/schema/entity/data/mlmodel.py +1 -1
  286. metadata/generated/schema/entity/data/pipeline.py +1 -1
  287. metadata/generated/schema/entity/data/query.py +1 -1
  288. metadata/generated/schema/entity/data/queryCostRecord.py +1 -1
  289. metadata/generated/schema/entity/data/queryCostSearchResult.py +1 -1
  290. metadata/generated/schema/entity/data/report.py +1 -1
  291. metadata/generated/schema/entity/data/searchIndex.py +1 -1
  292. metadata/generated/schema/entity/data/storedProcedure.py +1 -1
  293. metadata/generated/schema/entity/data/table.py +5 -1
  294. metadata/generated/schema/entity/data/topic.py +1 -1
  295. metadata/generated/schema/entity/docStore/__init__.py +1 -1
  296. metadata/generated/schema/entity/docStore/document.py +1 -1
  297. metadata/generated/schema/entity/domains/__init__.py +1 -1
  298. metadata/generated/schema/entity/domains/dataProduct.py +1 -1
  299. metadata/generated/schema/entity/domains/domain.py +1 -1
  300. metadata/generated/schema/entity/events/__init__.py +1 -1
  301. metadata/generated/schema/entity/events/webhook.py +1 -1
  302. metadata/generated/schema/entity/feed/__init__.py +1 -1
  303. metadata/generated/schema/entity/feed/assets.py +1 -1
  304. metadata/generated/schema/entity/feed/customProperty.py +1 -1
  305. metadata/generated/schema/entity/feed/description.py +1 -1
  306. metadata/generated/schema/entity/feed/domain.py +1 -1
  307. metadata/generated/schema/entity/feed/entityInfo.py +1 -1
  308. metadata/generated/schema/entity/feed/owner.py +1 -1
  309. metadata/generated/schema/entity/feed/suggestion.py +1 -1
  310. metadata/generated/schema/entity/feed/tag.py +1 -1
  311. metadata/generated/schema/entity/feed/testCaseResult.py +1 -1
  312. metadata/generated/schema/entity/feed/thread.py +1 -1
  313. metadata/generated/schema/entity/policies/__init__.py +1 -1
  314. metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
  315. metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
  316. metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
  317. metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
  318. metadata/generated/schema/entity/policies/filters.py +1 -1
  319. metadata/generated/schema/entity/policies/policy.py +1 -1
  320. metadata/generated/schema/entity/services/__init__.py +1 -1
  321. metadata/generated/schema/entity/services/apiService.py +1 -1
  322. metadata/generated/schema/entity/services/connections/__init__.py +1 -1
  323. metadata/generated/schema/entity/services/connections/api/__init__.py +1 -1
  324. metadata/generated/schema/entity/services/connections/api/restConnection.py +1 -1
  325. metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
  326. metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
  327. metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
  328. metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
  329. metadata/generated/schema/entity/services/connections/connectionBasicType.py +1 -1
  330. metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
  331. metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
  332. metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
  333. metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
  334. metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
  335. metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
  336. metadata/generated/schema/entity/services/connections/dashboard/microStrategyConnection.py +1 -1
  337. metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
  338. metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
  339. metadata/generated/schema/entity/services/connections/dashboard/powerBIReportServerConnection.py +1 -1
  340. metadata/generated/schema/entity/services/connections/dashboard/powerbi/__init__.py +1 -1
  341. metadata/generated/schema/entity/services/connections/dashboard/powerbi/azureConfig.py +1 -1
  342. metadata/generated/schema/entity/services/connections/dashboard/powerbi/bucketDetails.py +1 -1
  343. metadata/generated/schema/entity/services/connections/dashboard/powerbi/gcsConfig.py +1 -1
  344. metadata/generated/schema/entity/services/connections/dashboard/powerbi/s3Config.py +1 -1
  345. metadata/generated/schema/entity/services/connections/dashboard/qlikCloudConnection.py +1 -1
  346. metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
  347. metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
  348. metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
  349. metadata/generated/schema/entity/services/connections/dashboard/sigmaConnection.py +1 -1
  350. metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
  351. metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +1 -1
  352. metadata/generated/schema/entity/services/connections/dashboard/thoughtSpotConnection.py +1 -1
  353. metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
  354. metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
  355. metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +1 -1
  356. metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
  357. metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +1 -1
  358. metadata/generated/schema/entity/services/connections/database/cassandra/__init__.py +1 -1
  359. metadata/generated/schema/entity/services/connections/database/cassandra/cloudConfig.py +1 -1
  360. metadata/generated/schema/entity/services/connections/database/cassandraConnection.py +1 -1
  361. metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
  362. metadata/generated/schema/entity/services/connections/database/cockroachConnection.py +1 -1
  363. metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
  364. metadata/generated/schema/entity/services/connections/database/common/azureConfig.py +1 -1
  365. metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
  366. metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
  367. metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
  368. metadata/generated/schema/entity/services/connections/database/common/noConfigAuthenticationTypes.py +1 -1
  369. metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
  370. metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
  371. metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
  372. metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
  373. metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
  374. metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
  375. metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
  376. metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
  377. metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
  378. metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
  379. metadata/generated/schema/entity/services/connections/database/deltalake/__init__.py +1 -1
  380. metadata/generated/schema/entity/services/connections/database/deltalake/metastoreConfig.py +1 -1
  381. metadata/generated/schema/entity/services/connections/database/deltalake/storageConfig.py +1 -1
  382. metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
  383. metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
  384. metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
  385. metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
  386. metadata/generated/schema/entity/services/connections/database/exasolConnection.py +1 -1
  387. metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
  388. metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
  389. metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
  390. metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
  391. metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
  392. metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
  393. metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
  394. metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
  395. metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
  396. metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
  397. metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
  398. metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
  399. metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
  400. metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +1 -1
  401. metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
  402. metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +1 -1
  403. metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
  404. metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +1 -1
  405. metadata/generated/schema/entity/services/connections/database/postgresConnection.py +1 -1
  406. metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
  407. metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
  408. metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
  409. metadata/generated/schema/entity/services/connections/database/sapErpConnection.py +1 -1
  410. metadata/generated/schema/entity/services/connections/database/sapHana/__init__.py +1 -1
  411. metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaHDBConnection.py +1 -1
  412. metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaSQLConnection.py +1 -1
  413. metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +1 -1
  414. metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
  415. metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
  416. metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
  417. metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
  418. metadata/generated/schema/entity/services/connections/database/ssasConnection.py +47 -0
  419. metadata/generated/schema/entity/services/connections/database/synapseConnection.py +27 -2
  420. metadata/generated/schema/entity/services/connections/database/teradataConnection.py +1 -1
  421. metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
  422. metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
  423. metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
  424. metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
  425. metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
  426. metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +1 -1
  427. metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
  428. metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
  429. metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +1 -1
  430. metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
  431. metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
  432. metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
  433. metadata/generated/schema/entity/services/connections/metadata/alationSinkConnection.py +1 -1
  434. metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
  435. metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
  436. metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +1 -1
  437. metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
  438. metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
  439. metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
  440. metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
  441. metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
  442. metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
  443. metadata/generated/schema/entity/services/connections/mlmodel/vertexaiConnection.py +1 -1
  444. metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
  445. metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
  446. metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
  447. metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
  448. metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
  449. metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
  450. metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
  451. metadata/generated/schema/entity/services/connections/pipeline/datafactoryConnection.py +1 -1
  452. metadata/generated/schema/entity/services/connections/pipeline/dbtCloudConnection.py +1 -1
  453. metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
  454. metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
  455. metadata/generated/schema/entity/services/connections/pipeline/flinkConnection.py +1 -1
  456. metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
  457. metadata/generated/schema/entity/services/connections/pipeline/kafkaConnectConnection.py +1 -1
  458. metadata/generated/schema/entity/services/connections/pipeline/matillion/__init__.py +1 -1
  459. metadata/generated/schema/entity/services/connections/pipeline/matillion/matillionETL.py +1 -1
  460. metadata/generated/schema/entity/services/connections/pipeline/matillionConnection.py +1 -1
  461. metadata/generated/schema/entity/services/connections/pipeline/nifi/__init__.py +1 -1
  462. metadata/generated/schema/entity/services/connections/pipeline/nifi/basicAuth.py +1 -1
  463. metadata/generated/schema/entity/services/connections/pipeline/nifi/clientCertificateAuth.py +1 -1
  464. metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
  465. metadata/generated/schema/entity/services/connections/pipeline/openLineageConnection.py +1 -1
  466. metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
  467. metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
  468. metadata/generated/schema/entity/services/connections/pipeline/ssisConnection.py +1 -1
  469. metadata/generated/schema/entity/services/connections/pipeline/stitchConnection.py +1 -1
  470. metadata/generated/schema/entity/services/connections/pipeline/wherescapeConnection.py +1 -1
  471. metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
  472. metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
  473. metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
  474. metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
  475. metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
  476. metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
  477. metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
  478. metadata/generated/schema/entity/services/connections/security/__init__.py +3 -0
  479. metadata/generated/schema/entity/services/connections/security/ranger/__init__.py +3 -0
  480. metadata/generated/schema/entity/services/connections/security/ranger/basicAuth.py +26 -0
  481. metadata/generated/schema/entity/services/connections/security/rangerConnection.py +39 -0
  482. metadata/generated/schema/entity/services/connections/serviceConnection.py +4 -1
  483. metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
  484. metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
  485. metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
  486. metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
  487. metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
  488. metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
  489. metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
  490. metadata/generated/schema/entity/services/dashboardService.py +1 -1
  491. metadata/generated/schema/entity/services/databaseService.py +4 -1
  492. metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
  493. metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +1 -1
  494. metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
  495. metadata/generated/schema/entity/services/ingestionPipelines/reverseIngestionResponse.py +1 -1
  496. metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
  497. metadata/generated/schema/entity/services/messagingService.py +1 -1
  498. metadata/generated/schema/entity/services/metadataService.py +1 -1
  499. metadata/generated/schema/entity/services/mlmodelService.py +1 -1
  500. metadata/generated/schema/entity/services/pipelineService.py +1 -1
  501. metadata/generated/schema/entity/services/searchService.py +1 -1
  502. metadata/generated/schema/entity/services/securityService.py +135 -0
  503. metadata/generated/schema/entity/services/serviceType.py +2 -1
  504. metadata/generated/schema/entity/services/storageService.py +1 -1
  505. metadata/generated/schema/entity/teams/__init__.py +1 -1
  506. metadata/generated/schema/entity/teams/persona.py +1 -1
  507. metadata/generated/schema/entity/teams/role.py +1 -1
  508. metadata/generated/schema/entity/teams/team.py +1 -1
  509. metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
  510. metadata/generated/schema/entity/teams/user.py +1 -1
  511. metadata/generated/schema/entity/type.py +1 -1
  512. metadata/generated/schema/entity/utils/__init__.py +1 -1
  513. metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
  514. metadata/generated/schema/entity/utils/servicesCount.py +1 -1
  515. metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
  516. metadata/generated/schema/events/__init__.py +1 -1
  517. metadata/generated/schema/events/alertMetrics.py +1 -1
  518. metadata/generated/schema/events/api/__init__.py +1 -1
  519. metadata/generated/schema/events/api/createEventSubscription.py +1 -1
  520. metadata/generated/schema/events/api/eventSubscriptionDiagnosticInfo.py +1 -1
  521. metadata/generated/schema/events/api/eventsRecord.py +1 -1
  522. metadata/generated/schema/events/api/testEventSubscriptionDestination.py +1 -1
  523. metadata/generated/schema/events/api/typedEvent.py +1 -1
  524. metadata/generated/schema/events/emailAlertConfig.py +1 -1
  525. metadata/generated/schema/events/eventFilterRule.py +1 -1
  526. metadata/generated/schema/events/eventSubscription.py +1 -1
  527. metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
  528. metadata/generated/schema/events/failedEvent.py +1 -1
  529. metadata/generated/schema/events/failedEventResponse.py +1 -1
  530. metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
  531. metadata/generated/schema/events/statusContext.py +1 -1
  532. metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
  533. metadata/generated/schema/events/subscriptionStatus.py +1 -1
  534. metadata/generated/schema/events/testDestinationStatus.py +1 -1
  535. metadata/generated/schema/governance/workflows/__init__.py +1 -1
  536. metadata/generated/schema/governance/workflows/elements/__init__.py +1 -1
  537. metadata/generated/schema/governance/workflows/elements/edge.py +1 -1
  538. metadata/generated/schema/governance/workflows/elements/nodeSubType.py +1 -1
  539. metadata/generated/schema/governance/workflows/elements/nodeType.py +1 -1
  540. metadata/generated/schema/governance/workflows/elements/nodes/__init__.py +1 -1
  541. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/__init__.py +1 -1
  542. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/checkEntityAttributesTask.py +1 -1
  543. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/createAndRunIngestionPipelineTask.py +1 -1
  544. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/runAppTask.py +1 -1
  545. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setEntityCertificationTask.py +1 -1
  546. metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setGlossaryTermStatusTask.py +1 -1
  547. metadata/generated/schema/governance/workflows/elements/nodes/endEvent/__init__.py +1 -1
  548. metadata/generated/schema/governance/workflows/elements/nodes/endEvent/endEvent.py +1 -1
  549. metadata/generated/schema/governance/workflows/elements/nodes/gateway/__init__.py +1 -1
  550. metadata/generated/schema/governance/workflows/elements/nodes/gateway/parallelGateway.py +1 -1
  551. metadata/generated/schema/governance/workflows/elements/nodes/startEvent/__init__.py +1 -1
  552. metadata/generated/schema/governance/workflows/elements/nodes/startEvent/startEvent.py +1 -1
  553. metadata/generated/schema/governance/workflows/elements/nodes/userTask/__init__.py +1 -1
  554. metadata/generated/schema/governance/workflows/elements/nodes/userTask/userApprovalTask.py +1 -1
  555. metadata/generated/schema/governance/workflows/elements/triggers/__init__.py +1 -1
  556. metadata/generated/schema/governance/workflows/elements/triggers/eventBasedEntityTrigger.py +1 -1
  557. metadata/generated/schema/governance/workflows/elements/triggers/noOpTrigger.py +1 -1
  558. metadata/generated/schema/governance/workflows/elements/triggers/periodicBatchEntityTrigger.py +1 -1
  559. metadata/generated/schema/governance/workflows/workflowDefinition.py +1 -1
  560. metadata/generated/schema/governance/workflows/workflowInstance.py +1 -1
  561. metadata/generated/schema/governance/workflows/workflowInstanceState.py +1 -1
  562. metadata/generated/schema/jobs/__init__.py +1 -1
  563. metadata/generated/schema/jobs/backgroundJob.py +1 -1
  564. metadata/generated/schema/jobs/enumCleanupArgs.py +1 -1
  565. metadata/generated/schema/metadataIngestion/__init__.py +1 -1
  566. metadata/generated/schema/metadataIngestion/apiServiceMetadataPipeline.py +1 -1
  567. metadata/generated/schema/metadataIngestion/application.py +1 -1
  568. metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
  569. metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
  570. metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
  571. metadata/generated/schema/metadataIngestion/databaseServiceAutoClassificationPipeline.py +1 -1
  572. metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
  573. metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -1
  574. metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
  575. metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
  576. metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
  577. metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
  578. metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +1 -1
  579. metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
  580. metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
  581. metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +1 -1
  582. metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +1 -1
  583. metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +1 -1
  584. metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +1 -1
  585. metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
  586. metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
  587. metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
  588. metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
  589. metadata/generated/schema/metadataIngestion/reverseIngestionPipeline.py +1 -1
  590. metadata/generated/schema/metadataIngestion/reverseingestionconfig/__init__.py +1 -1
  591. metadata/generated/schema/metadataIngestion/reverseingestionconfig/descriptionConfig.py +1 -1
  592. metadata/generated/schema/metadataIngestion/reverseingestionconfig/ownerConfig.py +1 -1
  593. metadata/generated/schema/metadataIngestion/reverseingestionconfig/tagsConfig.py +1 -1
  594. metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
  595. metadata/generated/schema/metadataIngestion/securityServiceMetadataPipeline.py +27 -0
  596. metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
  597. metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
  598. metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
  599. metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
  600. metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
  601. metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
  602. metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
  603. metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
  604. metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
  605. metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
  606. metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
  607. metadata/generated/schema/metadataIngestion/workflow.py +1 -1
  608. metadata/generated/schema/monitoring/__init__.py +1 -1
  609. metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
  610. metadata/generated/schema/scim/__init__.py +1 -1
  611. metadata/generated/schema/scim/scimConfiguration.py +1 -1
  612. metadata/generated/schema/search/__init__.py +1 -1
  613. metadata/generated/schema/search/aggregationRequest.py +1 -1
  614. metadata/generated/schema/search/searchRequest.py +1 -1
  615. metadata/generated/schema/security/__init__.py +1 -1
  616. metadata/generated/schema/security/client/__init__.py +1 -1
  617. metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
  618. metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
  619. metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
  620. metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
  621. metadata/generated/schema/security/client/oidcClientConfig.py +1 -1
  622. metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
  623. metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
  624. metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
  625. metadata/generated/schema/security/credentials/__init__.py +1 -1
  626. metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
  627. metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
  628. metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
  629. metadata/generated/schema/security/credentials/azureCredentials.py +1 -1
  630. metadata/generated/schema/security/credentials/basicAuth.py +1 -1
  631. metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
  632. metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
  633. metadata/generated/schema/security/credentials/gcpExternalAccount.py +1 -1
  634. metadata/generated/schema/security/credentials/gcpValues.py +1 -1
  635. metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
  636. metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
  637. metadata/generated/schema/security/credentials/gitlabCredentials.py +1 -1
  638. metadata/generated/schema/security/sasl/__init__.py +1 -1
  639. metadata/generated/schema/security/sasl/saslClientConfig.py +1 -1
  640. metadata/generated/schema/security/secrets/__init__.py +1 -1
  641. metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
  642. metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
  643. metadata/generated/schema/security/secrets/secretsManagerProvider.py +1 -1
  644. metadata/generated/schema/security/securityConfiguration.py +1 -1
  645. metadata/generated/schema/security/ssl/__init__.py +1 -1
  646. metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
  647. metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
  648. metadata/generated/schema/settings/__init__.py +1 -1
  649. metadata/generated/schema/settings/settings.py +1 -1
  650. metadata/generated/schema/system/__init__.py +1 -1
  651. metadata/generated/schema/system/entityError.py +1 -1
  652. metadata/generated/schema/system/eventPublisherJob.py +1 -1
  653. metadata/generated/schema/system/indexingError.py +1 -1
  654. metadata/generated/schema/system/limitsResponse.py +1 -1
  655. metadata/generated/schema/system/ui/__init__.py +1 -1
  656. metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
  657. metadata/generated/schema/system/ui/navigationItem.py +1 -1
  658. metadata/generated/schema/system/ui/page.py +1 -1
  659. metadata/generated/schema/system/ui/tab.py +1 -1
  660. metadata/generated/schema/system/ui/uiCustomization.py +1 -1
  661. metadata/generated/schema/system/validationResponse.py +1 -1
  662. metadata/generated/schema/tests/__init__.py +1 -1
  663. metadata/generated/schema/tests/assigned.py +1 -1
  664. metadata/generated/schema/tests/basic.py +1 -1
  665. metadata/generated/schema/tests/customMetric.py +1 -1
  666. metadata/generated/schema/tests/dataQualityReport.py +1 -1
  667. metadata/generated/schema/tests/resolved.py +1 -1
  668. metadata/generated/schema/tests/testCase.py +1 -1
  669. metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
  670. metadata/generated/schema/tests/testDefinition.py +1 -1
  671. metadata/generated/schema/tests/testSuite.py +1 -1
  672. metadata/generated/schema/type/__init__.py +1 -1
  673. metadata/generated/schema/type/apiSchema.py +1 -1
  674. metadata/generated/schema/type/assetCertification.py +1 -1
  675. metadata/generated/schema/type/auditLog.py +1 -1
  676. metadata/generated/schema/type/basic.py +1 -1
  677. metadata/generated/schema/type/bulkOperationResult.py +1 -1
  678. metadata/generated/schema/type/changeEvent.py +1 -1
  679. metadata/generated/schema/type/changeEventType.py +1 -1
  680. metadata/generated/schema/type/changeSummaryMap.py +1 -1
  681. metadata/generated/schema/type/collectionDescriptor.py +1 -1
  682. metadata/generated/schema/type/csvDocumentation.py +1 -1
  683. metadata/generated/schema/type/csvErrorType.py +1 -1
  684. metadata/generated/schema/type/csvFile.py +1 -1
  685. metadata/generated/schema/type/csvImportResult.py +1 -1
  686. metadata/generated/schema/type/customProperties/__init__.py +1 -1
  687. metadata/generated/schema/type/customProperties/complexTypes.py +1 -1
  688. metadata/generated/schema/type/customProperties/enumConfig.py +1 -1
  689. metadata/generated/schema/type/customProperties/tableConfig.py +1 -1
  690. metadata/generated/schema/type/customProperty.py +1 -1
  691. metadata/generated/schema/type/dailyCount.py +1 -1
  692. metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
  693. metadata/generated/schema/type/entityHierarchy.py +1 -1
  694. metadata/generated/schema/type/entityHistory.py +1 -1
  695. metadata/generated/schema/type/entityLineage.py +1 -1
  696. metadata/generated/schema/type/entityReference.py +1 -1
  697. metadata/generated/schema/type/entityReferenceList.py +1 -1
  698. metadata/generated/schema/type/entityRelationship.py +1 -1
  699. metadata/generated/schema/type/entityUsage.py +1 -1
  700. metadata/generated/schema/type/filterPattern.py +1 -1
  701. metadata/generated/schema/type/function.py +1 -1
  702. metadata/generated/schema/type/include.py +1 -1
  703. metadata/generated/schema/type/jdbcConnection.py +1 -1
  704. metadata/generated/schema/type/lifeCycle.py +1 -1
  705. metadata/generated/schema/type/paging.py +1 -1
  706. metadata/generated/schema/type/profile.py +1 -1
  707. metadata/generated/schema/type/queryParserData.py +1 -1
  708. metadata/generated/schema/type/reaction.py +1 -1
  709. metadata/generated/schema/type/schedule.py +1 -1
  710. metadata/generated/schema/type/schema.py +1 -1
  711. metadata/generated/schema/type/tableQuery.py +1 -1
  712. metadata/generated/schema/type/tableUsageCount.py +1 -1
  713. metadata/generated/schema/type/tagLabel.py +1 -1
  714. metadata/generated/schema/type/usageDetails.py +1 -1
  715. metadata/generated/schema/type/usageRequest.py +1 -1
  716. metadata/generated/schema/type/votes.py +1 -1
  717. metadata/ingestion/api/parser.py +25 -9
  718. metadata/ingestion/ometa/routes.py +6 -0
  719. metadata/ingestion/source/dashboard/tableau/client.py +11 -0
  720. metadata/ingestion/source/dashboard/tableau/connection.py +38 -9
  721. metadata/ingestion/source/dashboard/tableau/metadata.py +1 -1
  722. metadata/ingestion/source/database/athena/metadata.py +25 -2
  723. metadata/ingestion/source/database/athena/utils.py +63 -6
  724. metadata/ingestion/source/database/column_type_parser.py +7 -2
  725. metadata/ingestion/source/database/datalake/metadata.py +2 -2
  726. metadata/ingestion/source/database/dbt/metadata.py +32 -6
  727. metadata/ingestion/source/database/glue/metadata.py +76 -1
  728. metadata/ingestion/source/security/security_service.py +128 -0
  729. metadata/profiler/interface/sqlalchemy/profiler_interface.py +5 -1
  730. metadata/readers/dataframe/base.py +2 -0
  731. metadata/readers/dataframe/dsv.py +50 -5
  732. metadata/readers/dataframe/parquet.py +202 -15
  733. metadata/readers/dataframe/reader_factory.py +6 -1
  734. metadata/sampler/sqlalchemy/sampler.py +60 -64
  735. metadata/sampler/sqlalchemy/snowflake/sampler.py +2 -1
  736. metadata/utils/class_helper.py +1 -0
  737. metadata/utils/constants.py +2 -0
  738. metadata/utils/datalake/datalake_utils.py +1 -0
  739. metadata/utils/entity_utils.py +6 -0
  740. {openmetadata_ingestion-1.8.7.1.dist-info → openmetadata_ingestion-1.8.9.0.dist-info}/METADATA +510 -510
  741. {openmetadata_ingestion-1.8.7.1.dist-info → openmetadata_ingestion-1.8.9.0.dist-info}/RECORD +745 -735
  742. {openmetadata_ingestion-1.8.7.1.dist-info → openmetadata_ingestion-1.8.9.0.dist-info}/LICENSE +0 -0
  743. {openmetadata_ingestion-1.8.7.1.dist-info → openmetadata_ingestion-1.8.9.0.dist-info}/WHEEL +0 -0
  744. {openmetadata_ingestion-1.8.7.1.dist-info → openmetadata_ingestion-1.8.9.0.dist-info}/entry_points.txt +0 -0
  745. {openmetadata_ingestion-1.8.7.1.dist-info → openmetadata_ingestion-1.8.9.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,128 @@
1
+ # Copyright 2025 Collate
2
+ # Licensed under the Collate Community License, Version 1.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ # https://github.com/open-metadata/OpenMetadata/blob/main/ingestion/LICENSE
6
+ # Unless required by applicable law or agreed to in writing, software
7
+ # distributed under the License is distributed on an "AS IS" BASIS,
8
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9
+ # See the License for the specific language governing permissions and
10
+ # limitations under the License.
11
+ """
12
+ Base class for ingesting security services
13
+ """
14
+ from abc import ABC
15
+ from typing import Set
16
+
17
+ from pydantic import Field
18
+ from typing_extensions import Annotated
19
+
20
+ from metadata.generated.schema.entity.services.securityService import (
21
+ SecurityConnection,
22
+ SecurityService,
23
+ )
24
+ from metadata.generated.schema.metadataIngestion.securityServiceMetadataPipeline import (
25
+ SecurityServiceMetadataPipeline,
26
+ )
27
+ from metadata.generated.schema.metadataIngestion.workflow import (
28
+ Source as WorkflowSource,
29
+ )
30
+ from metadata.ingestion.api.models import Either
31
+ from metadata.ingestion.api.steps import Source
32
+ from metadata.ingestion.api.topology_runner import TopologyRunnerMixin
33
+ from metadata.ingestion.models.topology import (
34
+ NodeStage,
35
+ ServiceTopology,
36
+ TopologyContextManager,
37
+ TopologyNode,
38
+ )
39
+ from metadata.ingestion.ometa.ometa_api import OpenMetadata
40
+ from metadata.ingestion.source.connections import get_connection
41
+ from metadata.utils.logger import ingestion_logger
42
+
43
+ logger = ingestion_logger()
44
+
45
+
46
+ class SecurityServiceTopology(ServiceTopology):
47
+ """
48
+ Defines the hierarchy in Security Services.
49
+
50
+ We could have a topology validator. We can only consume
51
+ data that has been produced by any parent node.
52
+ """
53
+
54
+ root: Annotated[
55
+ TopologyNode, Field(description="Root node for the topology")
56
+ ] = TopologyNode(
57
+ producer="get_services",
58
+ stages=[
59
+ NodeStage(
60
+ type_=SecurityService,
61
+ context="security_service",
62
+ processor="yield_create_request_security_service",
63
+ overwrite=False,
64
+ must_return=True,
65
+ cache_entities=True,
66
+ ),
67
+ ],
68
+ children=[], # Security services typically don't have child entities like policies, roles, etc.
69
+ post_process=["mark_security_entities_as_deleted"],
70
+ )
71
+
72
+
73
+ from metadata.utils.helpers import clean_uri
74
+
75
+
76
+ class SecurityServiceSource(TopologyRunnerMixin, Source, ABC):
77
+ """
78
+ Base class for Security Services.
79
+ It implements the topology and context.
80
+ """
81
+
82
+ source_config: SecurityServiceMetadataPipeline
83
+ config: WorkflowSource
84
+ # Big union of types we want to fetch dynamically
85
+ service_connection: SecurityConnection.model_fields["config"].annotation
86
+
87
+ topology = SecurityServiceTopology()
88
+ context = TopologyContextManager(topology)
89
+ security_source_state: Set = set()
90
+
91
+ def __init__(
92
+ self,
93
+ config: WorkflowSource,
94
+ metadata: OpenMetadata,
95
+ ):
96
+ config.serviceConnection.root.config.hostPort = clean_uri(
97
+ config.serviceConnection.root.config.hostPort
98
+ )
99
+ super().__init__()
100
+ self.config = config
101
+ self.metadata = metadata
102
+ self.service_connection = self.config.serviceConnection.root.config
103
+ self.source_config: SecurityServiceMetadataPipeline = (
104
+ self.config.sourceConfig.config
105
+ )
106
+
107
+ self.connection = get_connection(self.service_connection)
108
+ # Flag the connection for the test connection
109
+ self.connection_obj = self.connection
110
+ self.client = self.get_client()
111
+ self.test_connection()
112
+
113
+ @property
114
+ def name(self) -> str:
115
+ return self.service_connection.type.name
116
+
117
+ def close(self):
118
+ pass
119
+
120
+ def yield_create_request_security_service(self, config: WorkflowSource):
121
+ yield Either(
122
+ right=self.metadata.get_create_service_from_source(
123
+ entity=SecurityService, config=config
124
+ )
125
+ )
126
+
127
+ def test_connection(self) -> None:
128
+ self.client.test_connection()
@@ -16,6 +16,7 @@ supporting sqlalchemy abstraction layer
16
16
  """
17
17
 
18
18
  import concurrent.futures
19
+ import gc
19
20
  import math
20
21
  import threading
21
22
  import time
@@ -26,7 +27,7 @@ from typing import Any, Dict, List, Optional, Type, Union
26
27
 
27
28
  from sqlalchemy import Column, inspect, text
28
29
  from sqlalchemy.exc import DBAPIError, ProgrammingError, ResourceClosedError
29
- from sqlalchemy.orm import Session, scoped_session
30
+ from sqlalchemy.orm import scoped_session
30
31
 
31
32
  from metadata.generated.schema.entity.data.table import (
32
33
  CustomMetricProfile,
@@ -461,6 +462,9 @@ class SQAProfilerInterface(ProfilerInterface, SQAInterfaceMixin):
461
462
  logger.error(error)
462
463
  self.status.failed_profiler(error, traceback.format_exc())
463
464
  break
465
+ finally:
466
+ # Force garbage collection to help with memory management
467
+ gc.collect()
464
468
 
465
469
  # If we've exhausted all retries without success, return a tuple of None values
466
470
  return None, None, None
@@ -22,6 +22,8 @@ from metadata.readers.file.config_source_factory import get_reader
22
22
  from metadata.readers.models import ConfigSource
23
23
  from metadata.utils.logger import ingestion_logger
24
24
 
25
+ MAX_FILE_SIZE_FOR_PREVIEW = 50 * 1024 * 1024 # 50MB
26
+
25
27
  logger = ingestion_logger()
26
28
 
27
29
 
@@ -54,16 +54,24 @@ class DSVDataFrameReader(DataFrameReader):
54
54
  super().__init__(config_source, client)
55
55
 
56
56
  def read_from_pandas(
57
- self, path: str, storage_options: Optional[Dict[str, Any]] = None
57
+ self,
58
+ path: str,
59
+ storage_options: Optional[Dict[str, Any]] = None,
60
+ compression: Optional[str] = None,
58
61
  ) -> DatalakeColumnWrapper:
59
62
  import pandas as pd # pylint: disable=import-outside-toplevel
60
63
 
64
+ # Determine compression based on file extension if not provided
65
+ if compression is None and path.endswith(".gz"):
66
+ compression = "gzip"
67
+
61
68
  chunk_list = []
62
69
  with pd.read_csv(
63
70
  path,
64
71
  sep=self.separator,
65
72
  chunksize=CHUNKSIZE,
66
73
  storage_options=storage_options,
74
+ compression=compression,
67
75
  ) as reader:
68
76
  for chunks in reader:
69
77
  chunk_list.append(chunks)
@@ -81,16 +89,47 @@ class DSVDataFrameReader(DataFrameReader):
81
89
  """
82
90
  Read the CSV file from the gcs bucket and return a dataframe
83
91
  """
92
+ # Determine compression based on file extension
93
+ compression = None
94
+ if key.endswith(".gz"):
95
+ compression = "gzip"
96
+
84
97
  path = f"gs://{bucket_name}/{key}"
85
- return self.read_from_pandas(path=path)
98
+ return self.read_from_pandas(path=path, compression=compression)
86
99
 
87
100
  @_read_dsv_dispatch.register
88
101
  def _(self, _: S3Config, key: str, bucket_name: str) -> DatalakeColumnWrapper:
89
- path = self.client.get_object(Bucket=bucket_name, Key=key)["Body"]
90
- return self.read_from_pandas(path=path)
102
+ import pandas as pd # pylint: disable=import-outside-toplevel
103
+
104
+ # Determine compression based on file extension
105
+ compression = None
106
+ if key.endswith(".gz"):
107
+ compression = "gzip"
108
+
109
+ # Get the file content from S3
110
+ response = self.client.get_object(Bucket=bucket_name, Key=key)
111
+ file_content = response["Body"]
112
+
113
+ # Read the CSV data directly from the StreamingBody
114
+ chunk_list = []
115
+ with pd.read_csv(
116
+ file_content,
117
+ sep=self.separator,
118
+ chunksize=CHUNKSIZE,
119
+ compression=compression,
120
+ ) as reader:
121
+ for chunks in reader:
122
+ chunk_list.append(chunks)
123
+
124
+ return DatalakeColumnWrapper(dataframes=chunk_list)
91
125
 
92
126
  @_read_dsv_dispatch.register
93
127
  def _(self, _: AzureConfig, key: str, bucket_name: str) -> DatalakeColumnWrapper:
128
+ # Determine compression based on file extension
129
+ compression = None
130
+ if key.endswith(".gz"):
131
+ compression = "gzip"
132
+
94
133
  storage_options = return_azure_storage_options(self.config_source)
95
134
  path = AZURE_PATH.format(
96
135
  bucket_name=bucket_name,
@@ -100,13 +139,19 @@ class DSVDataFrameReader(DataFrameReader):
100
139
  return self.read_from_pandas(
101
140
  path=path,
102
141
  storage_options=storage_options,
142
+ compression=compression,
103
143
  )
104
144
 
105
145
  @_read_dsv_dispatch.register
106
146
  def _( # pylint: disable=unused-argument
107
147
  self, _: LocalConfig, key: str, bucket_name: str
108
148
  ) -> DatalakeColumnWrapper:
109
- return self.read_from_pandas(path=key)
149
+ # Determine compression based on file extension
150
+ compression = None
151
+ if key.endswith(".gz"):
152
+ compression = "gzip"
153
+
154
+ return self.read_from_pandas(path=key, compression=compression)
110
155
 
111
156
  def _read(self, *, key: str, bucket_name: str, **__) -> DatalakeColumnWrapper:
112
157
  return self._read_dsv_dispatch(
@@ -14,6 +14,8 @@ Generic Delimiter-Separated-Values implementation
14
14
  """
15
15
  from functools import singledispatchmethod
16
16
 
17
+ from pyarrow.parquet import ParquetFile
18
+
17
19
  from metadata.generated.schema.entity.services.connections.database.datalake.azureConfig import (
18
20
  AzureConfig,
19
21
  )
@@ -26,11 +28,18 @@ from metadata.generated.schema.entity.services.connections.database.datalake.s3C
26
28
  from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
27
29
  LocalConfig,
28
30
  )
29
- from metadata.readers.dataframe.base import DataFrameReader, FileFormatException
31
+ from metadata.readers.dataframe.base import (
32
+ MAX_FILE_SIZE_FOR_PREVIEW,
33
+ DataFrameReader,
34
+ FileFormatException,
35
+ )
30
36
  from metadata.readers.dataframe.common import dataframe_to_chunks
31
37
  from metadata.readers.dataframe.models import DatalakeColumnWrapper
32
38
  from metadata.readers.file.adls import AZURE_PATH, return_azure_storage_options
33
39
  from metadata.readers.models import ConfigSource
40
+ from metadata.utils.logger import ingestion_logger
41
+
42
+ logger = ingestion_logger()
34
43
 
35
44
 
36
45
  class ParquetDataFrameReader(DataFrameReader):
@@ -39,6 +48,90 @@ class ParquetDataFrameReader(DataFrameReader):
39
48
  from any source based on its init client.
40
49
  """
41
50
 
51
+ def _read_parquet_in_batches(
52
+ self, parquet_file: ParquetFile, batch_size: int = 10000
53
+ ):
54
+ """
55
+ Read a large parquet file in batches to avoid memory issues.
56
+ Includes multiple fallback strategies for older PyArrow versions.
57
+
58
+ Args:
59
+ parquet_file: PyArrow ParquetFile or similar object
60
+ batch_size: Number of rows to read per batch
61
+
62
+ Returns:
63
+ List of DataFrame chunks
64
+ """
65
+ chunks = []
66
+ batch_count = 0
67
+
68
+ try:
69
+ # Method 1: iter_batches (PyArrow >= 3.0 - preferred)
70
+ if hasattr(parquet_file, "iter_batches"):
71
+ logger.info(
72
+ "Reading large parquet file in batches to avoid memory issues"
73
+ )
74
+ for batch in parquet_file.iter_batches(batch_size=batch_size):
75
+ df_batch = batch.to_pandas()
76
+ if not df_batch.empty:
77
+ chunks.extend(dataframe_to_chunks(df_batch))
78
+ batch_count += 1
79
+
80
+ logger.info(
81
+ f"Successfully processed {batch_count} batches from large parquet file"
82
+ )
83
+ return chunks
84
+
85
+ # Method 2: Row group reading (PyArrow >= 0.15.0)
86
+ elif hasattr(parquet_file, "num_row_groups") and hasattr(
87
+ parquet_file, "read_row_group"
88
+ ):
89
+ logger.warning(
90
+ "iter_batches not available, using row group reading as fallback"
91
+ )
92
+
93
+ for i in range(parquet_file.num_row_groups):
94
+ try:
95
+ row_group_table = parquet_file.read_row_group(i)
96
+ df_chunk = row_group_table.to_pandas()
97
+ if not df_chunk.empty:
98
+ # Further chunk if row group is still too large
99
+ if len(df_chunk) > batch_size:
100
+ chunks.extend(dataframe_to_chunks(df_chunk))
101
+ else:
102
+ chunks.append(df_chunk)
103
+ batch_count += 1
104
+ except Exception as row_exc:
105
+ logger.warning(f"Failed to read row group {i}: {row_exc}")
106
+ continue
107
+
108
+ if chunks:
109
+ logger.info(
110
+ f"Successfully processed {batch_count} row groups from large parquet file"
111
+ )
112
+ return chunks
113
+
114
+ # Method 3: Regular reading (final fallback)
115
+ logger.warning(
116
+ "No chunking methods available, falling back to regular reading"
117
+ )
118
+ df = parquet_file.read().to_pandas()
119
+ chunks.extend(dataframe_to_chunks(df))
120
+
121
+ except Exception as exc:
122
+ # If all chunking fails, try regular reading as final fallback
123
+ logger.warning(
124
+ f"Batched reading failed: {exc}. Falling back to regular reading - this may cause memory issues for large files"
125
+ )
126
+ try:
127
+ df = parquet_file.read().to_pandas()
128
+ chunks.extend(dataframe_to_chunks(df))
129
+ except Exception as fallback_exc:
130
+ logger.error(f"Failed to read parquet file: {fallback_exc}")
131
+ raise fallback_exc
132
+
133
+ return chunks
134
+
42
135
  @singledispatchmethod
43
136
  def _read_parquet_dispatch(
44
137
  self, config_source: ConfigSource, key: str, bucket_name: str
@@ -48,24 +141,46 @@ class ParquetDataFrameReader(DataFrameReader):
48
141
  @_read_parquet_dispatch.register
49
142
  def _(self, _: GCSConfig, key: str, bucket_name: str) -> DatalakeColumnWrapper:
50
143
  """
51
- Read the CSV file from the gcs bucket and return a dataframe
144
+ Read the Parquet file from the gcs bucket and return a dataframe
52
145
  """
53
146
  # pylint: disable=import-outside-toplevel
54
147
  from gcsfs import GCSFileSystem
55
- from pyarrow.parquet import ParquetFile
56
148
 
57
149
  gcs = GCSFileSystem()
58
- file = gcs.open(f"gs://{bucket_name}/{key}")
59
- dataframe_response = (
60
- ParquetFile(file).read().to_pandas(split_blocks=True, self_destruct=True)
61
- )
62
- return dataframe_to_chunks(dataframe_response)
150
+ file_path = f"gs://{bucket_name}/{key}"
151
+
152
+ # Check file size to determine reading strategy
153
+ try:
154
+ file_info = gcs.info(file_path)
155
+ file_size = file_info.get("size", 0)
156
+
157
+ file = gcs.open(file_path)
158
+ parquet_file = ParquetFile(file)
159
+
160
+ if self._should_use_chunking(file_size):
161
+ # Use batched reading for large files
162
+ return self._read_parquet_in_batches(parquet_file)
163
+ else:
164
+ # Use regular reading for smaller files
165
+ dataframe_response = parquet_file.read().to_pandas(
166
+ split_blocks=True, self_destruct=True
167
+ )
168
+ return dataframe_to_chunks(dataframe_response)
169
+
170
+ except Exception:
171
+ # Fallback to regular reading if size check fails
172
+ file = gcs.open(file_path)
173
+ parquet_file = ParquetFile(file)
174
+ dataframe_response = parquet_file.read().to_pandas(
175
+ split_blocks=True, self_destruct=True
176
+ )
177
+ return dataframe_to_chunks(dataframe_response)
63
178
 
64
179
  @_read_parquet_dispatch.register
65
180
  def _(self, _: S3Config, key: str, bucket_name: str) -> DatalakeColumnWrapper:
66
181
  # pylint: disable=import-outside-toplevel
67
182
  from pyarrow.fs import S3FileSystem
68
- from pyarrow.parquet import ParquetDataset
183
+ from pyarrow.parquet import ParquetDataset, ParquetFile
69
184
 
70
185
  client_kwargs = {
71
186
  "endpoint_override": (
@@ -90,13 +205,40 @@ class ParquetDataFrameReader(DataFrameReader):
90
205
  s3_fs = S3FileSystem(**client_kwargs)
91
206
 
92
207
  bucket_uri = f"{bucket_name}/{key}"
93
- dataset = ParquetDataset(bucket_uri, filesystem=s3_fs)
94
208
 
95
- return dataframe_to_chunks(dataset.read_pandas().to_pandas())
209
+ # Check file size to determine reading strategy
210
+ try:
211
+ file_info = s3_fs.get_file_info(bucket_uri)
212
+ file_size = file_info.size if hasattr(file_info, "size") else 0
213
+
214
+ if self._should_use_chunking(file_size):
215
+ # Use ParquetFile for batched reading of large files
216
+ logger.info(
217
+ f"Large parquet file detected ({file_size} bytes > {MAX_FILE_SIZE_FOR_PREVIEW} bytes). "
218
+ f"Using batched reading for file: {bucket_uri}"
219
+ )
220
+ parquet_file = ParquetFile(bucket_uri, filesystem=s3_fs)
221
+ return self._read_parquet_in_batches(parquet_file)
222
+ else:
223
+ # Use ParquetDataset for regular reading of smaller files
224
+ logger.debug(
225
+ f"Reading small parquet file ({file_size} bytes): {bucket_uri}"
226
+ )
227
+ dataset = ParquetDataset(bucket_uri, filesystem=s3_fs)
228
+ return dataframe_to_chunks(dataset.read_pandas().to_pandas())
229
+
230
+ except Exception as exc:
231
+ # Fallback to regular reading if size check fails
232
+ logger.warning(
233
+ f"Could not determine file size for {bucket_uri}: {exc}. Using regular reading"
234
+ )
235
+ dataset = ParquetDataset(bucket_uri, filesystem=s3_fs)
236
+ return dataframe_to_chunks(dataset.read_pandas().to_pandas())
96
237
 
97
238
  @_read_parquet_dispatch.register
98
239
  def _(self, _: AzureConfig, key: str, bucket_name: str) -> DatalakeColumnWrapper:
99
240
  import pandas as pd # pylint: disable=import-outside-toplevel
241
+ import pyarrow.fs as fs
100
242
 
101
243
  storage_options = return_azure_storage_options(self.config_source)
102
244
  account_url = AZURE_PATH.format(
@@ -104,8 +246,33 @@ class ParquetDataFrameReader(DataFrameReader):
104
246
  account_name=self.config_source.securityConfig.accountName,
105
247
  key=key,
106
248
  )
107
- dataframe = pd.read_parquet(account_url, storage_options=storage_options)
108
- return dataframe_to_chunks(dataframe)
249
+
250
+ # Check file size to determine reading strategy
251
+ try:
252
+ # Try to get file size from Azure
253
+ azure_fs = fs.SubTreeFileSystem(
254
+ account_url, fs.AzureFileSystem(**storage_options)
255
+ )
256
+ file_info = azure_fs.get_file_info("/")
257
+ file_size = file_info.size if hasattr(file_info, "size") else 0
258
+
259
+ if self._should_use_chunking(file_size):
260
+ # Use PyArrow ParquetFile for batched reading of large files
261
+ parquet_file = ParquetFile(
262
+ account_url, filesystem=fs.AzureFileSystem(**storage_options)
263
+ )
264
+ return self._read_parquet_in_batches(parquet_file)
265
+ else:
266
+ # Use pandas for regular reading of smaller files
267
+ dataframe = pd.read_parquet(
268
+ account_url, storage_options=storage_options
269
+ )
270
+ return dataframe_to_chunks(dataframe)
271
+
272
+ except Exception:
273
+ # Fallback to regular pandas reading if size check or batching fails
274
+ dataframe = pd.read_parquet(account_url, storage_options=storage_options)
275
+ return dataframe_to_chunks(dataframe)
109
276
 
110
277
  @_read_parquet_dispatch.register
111
278
  def _(
@@ -114,10 +281,27 @@ class ParquetDataFrameReader(DataFrameReader):
114
281
  key: str,
115
282
  bucket_name: str, # pylint: disable=unused-argument
116
283
  ) -> DatalakeColumnWrapper:
284
+ import os
285
+
117
286
  import pandas as pd # pylint: disable=import-outside-toplevel
118
287
 
119
- dataframe = pd.read_parquet(key)
120
- return dataframe_to_chunks(dataframe)
288
+ # Check file size to determine reading strategy
289
+ try:
290
+ file_size = os.path.getsize(key)
291
+
292
+ if self._should_use_chunking(file_size):
293
+ # Use PyArrow ParquetFile for batched reading of large files
294
+ parquet_file = ParquetFile(key)
295
+ return self._read_parquet_in_batches(parquet_file)
296
+ else:
297
+ # Use pandas for regular reading of smaller files
298
+ dataframe = pd.read_parquet(key)
299
+ return dataframe_to_chunks(dataframe)
300
+
301
+ except Exception:
302
+ # Fallback to regular pandas reading if size check fails
303
+ dataframe = pd.read_parquet(key)
304
+ return dataframe_to_chunks(dataframe)
121
305
 
122
306
  def _read(self, *, key: str, bucket_name: str, **__) -> DatalakeColumnWrapper:
123
307
  return DatalakeColumnWrapper(
@@ -125,3 +309,6 @@ class ParquetDataFrameReader(DataFrameReader):
125
309
  self.config_source, key=key, bucket_name=bucket_name
126
310
  )
127
311
  )
312
+
313
+ def _should_use_chunking(self, file_size: int) -> bool:
314
+ return file_size > MAX_FILE_SIZE_FOR_PREVIEW or file_size == 0
@@ -36,6 +36,7 @@ logger = utils_logger()
36
36
 
37
37
  class SupportedTypes(Enum):
38
38
  CSV = "csv"
39
+ CSVGZ = "csv.gz"
39
40
  TSV = "tsv"
40
41
  AVRO = "avro"
41
42
  PARQUET = "parquet"
@@ -53,6 +54,7 @@ class SupportedTypes(Enum):
53
54
 
54
55
  DF_READER_MAP = {
55
56
  SupportedTypes.CSV.value: CSVDataFrameReader,
57
+ SupportedTypes.CSVGZ.value: CSVDataFrameReader,
56
58
  SupportedTypes.TSV.value: TSVDataFrameReader,
57
59
  SupportedTypes.AVRO.value: AvroDataFrameReader,
58
60
  SupportedTypes.PARQUET.value: ParquetDataFrameReader,
@@ -79,7 +81,10 @@ def get_df_reader(
79
81
  Load the File Reader based on the Config Source
80
82
  """
81
83
  # If we have a DSV file, build a reader dynamically based on the received separator
82
- if type_ in {SupportedTypes.CSV, SupportedTypes.TSV} and separator:
84
+ if (
85
+ type_ in {SupportedTypes.CSV, SupportedTypes.CSVGZ, SupportedTypes.TSV}
86
+ and separator
87
+ ):
83
88
  return get_dsv_reader_by_separator(separator=separator)(
84
89
  config_source=config_source, client=client
85
90
  )