openmetadata-ingestion 1.3.1.3__py3-none-any.whl → 1.3.2.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openmetadata-ingestion might be problematic. Click here for more details.

Files changed (554) hide show
  1. metadata/clients/azure_client.py +85 -0
  2. metadata/data_quality/source/test_suite.py +2 -2
  3. metadata/examples/workflows/datalake_azure_default.yaml +29 -0
  4. metadata/generated/schema/analytics/__init__.py +1 -1
  5. metadata/generated/schema/analytics/basic.py +1 -1
  6. metadata/generated/schema/analytics/reportData.py +1 -1
  7. metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
  8. metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
  9. metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
  10. metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
  11. metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
  12. metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
  13. metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
  14. metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
  15. metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
  16. metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
  17. metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
  18. metadata/generated/schema/api/__init__.py +1 -1
  19. metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
  20. metadata/generated/schema/api/analytics/__init__.py +1 -1
  21. metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
  22. metadata/generated/schema/api/automations/__init__.py +1 -1
  23. metadata/generated/schema/api/automations/createWorkflow.py +1 -1
  24. metadata/generated/schema/api/bulkAssets.py +1 -1
  25. metadata/generated/schema/api/classification/__init__.py +1 -1
  26. metadata/generated/schema/api/classification/createClassification.py +1 -1
  27. metadata/generated/schema/api/classification/createTag.py +1 -1
  28. metadata/generated/schema/api/classification/loadTags.py +1 -1
  29. metadata/generated/schema/api/createBot.py +1 -1
  30. metadata/generated/schema/api/createEventPublisherJob.py +1 -1
  31. metadata/generated/schema/api/createType.py +1 -1
  32. metadata/generated/schema/api/data/__init__.py +1 -1
  33. metadata/generated/schema/api/data/createChart.py +1 -1
  34. metadata/generated/schema/api/data/createContainer.py +1 -1
  35. metadata/generated/schema/api/data/createCustomProperty.py +1 -1
  36. metadata/generated/schema/api/data/createDashboard.py +1 -1
  37. metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
  38. metadata/generated/schema/api/data/createDatabase.py +1 -1
  39. metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
  40. metadata/generated/schema/api/data/createGlossary.py +1 -1
  41. metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
  42. metadata/generated/schema/api/data/createMlModel.py +1 -1
  43. metadata/generated/schema/api/data/createPipeline.py +1 -1
  44. metadata/generated/schema/api/data/createQuery.py +1 -1
  45. metadata/generated/schema/api/data/createSearchIndex.py +1 -1
  46. metadata/generated/schema/api/data/createStoredProcedure.py +1 -1
  47. metadata/generated/schema/api/data/createTable.py +1 -1
  48. metadata/generated/schema/api/data/createTableProfile.py +1 -1
  49. metadata/generated/schema/api/data/createTopic.py +1 -1
  50. metadata/generated/schema/api/data/loadGlossary.py +1 -1
  51. metadata/generated/schema/api/data/restoreEntity.py +1 -1
  52. metadata/generated/schema/api/dataInsight/__init__.py +1 -1
  53. metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
  54. metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
  55. metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
  56. metadata/generated/schema/api/docStore/__init__.py +1 -1
  57. metadata/generated/schema/api/docStore/createDocument.py +1 -1
  58. metadata/generated/schema/api/domains/__init__.py +1 -1
  59. metadata/generated/schema/api/domains/createDataProduct.py +1 -1
  60. metadata/generated/schema/api/domains/createDomain.py +1 -1
  61. metadata/generated/schema/api/feed/__init__.py +1 -1
  62. metadata/generated/schema/api/feed/closeTask.py +1 -1
  63. metadata/generated/schema/api/feed/createPost.py +1 -1
  64. metadata/generated/schema/api/feed/createSuggestion.py +1 -1
  65. metadata/generated/schema/api/feed/createThread.py +5 -1
  66. metadata/generated/schema/api/feed/resolveTask.py +1 -1
  67. metadata/generated/schema/api/feed/threadCount.py +1 -1
  68. metadata/generated/schema/api/lineage/__init__.py +1 -1
  69. metadata/generated/schema/api/lineage/addLineage.py +1 -1
  70. metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
  71. metadata/generated/schema/api/policies/__init__.py +1 -1
  72. metadata/generated/schema/api/policies/createPolicy.py +1 -1
  73. metadata/generated/schema/api/services/__init__.py +1 -1
  74. metadata/generated/schema/api/services/createDashboardService.py +1 -1
  75. metadata/generated/schema/api/services/createDatabaseService.py +1 -1
  76. metadata/generated/schema/api/services/createMessagingService.py +1 -1
  77. metadata/generated/schema/api/services/createMetadataService.py +1 -1
  78. metadata/generated/schema/api/services/createMlModelService.py +1 -1
  79. metadata/generated/schema/api/services/createPipelineService.py +1 -1
  80. metadata/generated/schema/api/services/createSearchService.py +1 -1
  81. metadata/generated/schema/api/services/createStorageService.py +1 -1
  82. metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
  83. metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +1 -1
  84. metadata/generated/schema/api/setOwner.py +1 -1
  85. metadata/generated/schema/api/teams/__init__.py +1 -1
  86. metadata/generated/schema/api/teams/createPersona.py +1 -1
  87. metadata/generated/schema/api/teams/createRole.py +1 -1
  88. metadata/generated/schema/api/teams/createTeam.py +1 -1
  89. metadata/generated/schema/api/teams/createUser.py +1 -1
  90. metadata/generated/schema/api/tests/__init__.py +1 -1
  91. metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
  92. metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
  93. metadata/generated/schema/api/tests/createTestCase.py +1 -1
  94. metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
  95. metadata/generated/schema/api/tests/createTestDefinition.py +1 -1
  96. metadata/generated/schema/api/tests/createTestSuite.py +1 -1
  97. metadata/generated/schema/api/voteRequest.py +1 -1
  98. metadata/generated/schema/auth/__init__.py +1 -1
  99. metadata/generated/schema/auth/basicAuth.py +1 -1
  100. metadata/generated/schema/auth/basicLoginRequest.py +1 -1
  101. metadata/generated/schema/auth/changePasswordRequest.py +1 -1
  102. metadata/generated/schema/auth/createPersonalToken.py +1 -1
  103. metadata/generated/schema/auth/emailRequest.py +1 -1
  104. metadata/generated/schema/auth/emailVerificationToken.py +1 -1
  105. metadata/generated/schema/auth/generateToken.py +1 -1
  106. metadata/generated/schema/auth/jwtAuth.py +1 -1
  107. metadata/generated/schema/auth/loginRequest.py +1 -1
  108. metadata/generated/schema/auth/logoutRequest.py +1 -1
  109. metadata/generated/schema/auth/passwordResetRequest.py +1 -1
  110. metadata/generated/schema/auth/passwordResetToken.py +1 -1
  111. metadata/generated/schema/auth/personalAccessToken.py +1 -1
  112. metadata/generated/schema/auth/refreshToken.py +1 -1
  113. metadata/generated/schema/auth/registrationRequest.py +1 -1
  114. metadata/generated/schema/auth/revokePersonalToken.py +1 -1
  115. metadata/generated/schema/auth/revokeToken.py +1 -1
  116. metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
  117. metadata/generated/schema/auth/ssoAuth.py +1 -1
  118. metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
  119. metadata/generated/schema/configuration/__init__.py +1 -1
  120. metadata/generated/schema/configuration/appsPrivateConfiguration.py +6 -2
  121. metadata/generated/schema/configuration/authConfig.py +1 -1
  122. metadata/generated/schema/configuration/authenticationConfiguration.py +13 -2
  123. metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
  124. metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
  125. metadata/generated/schema/configuration/dataQualityConfiguration.py +1 -1
  126. metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
  127. metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
  128. metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
  129. metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
  130. metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
  131. metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
  132. metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
  133. metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
  134. metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
  135. metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
  136. metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
  137. metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
  138. metadata/generated/schema/configuration/loginConfiguration.py +1 -1
  139. metadata/generated/schema/configuration/logoConfiguration.py +1 -1
  140. metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
  141. metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
  142. metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
  143. metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
  144. metadata/generated/schema/dataInsight/__init__.py +1 -1
  145. metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
  146. metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
  147. metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
  148. metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
  149. metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
  150. metadata/generated/schema/dataInsight/type/__init__.py +1 -1
  151. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
  152. metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
  153. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
  154. metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
  155. metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
  156. metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
  157. metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
  158. metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
  159. metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithDescriptionByType.py +1 -1
  160. metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithOwnerByType.py +1 -1
  161. metadata/generated/schema/dataInsight/type/percentageOfServicesWithDescription.py +1 -1
  162. metadata/generated/schema/dataInsight/type/percentageOfServicesWithOwner.py +1 -1
  163. metadata/generated/schema/dataInsight/type/totalEntitiesByTier.py +1 -1
  164. metadata/generated/schema/dataInsight/type/totalEntitiesByType.py +1 -1
  165. metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
  166. metadata/generated/schema/email/__init__.py +1 -1
  167. metadata/generated/schema/email/emailRequest.py +1 -1
  168. metadata/generated/schema/email/smtpSettings.py +1 -1
  169. metadata/generated/schema/entity/__init__.py +1 -1
  170. metadata/generated/schema/entity/applications/__init__.py +1 -1
  171. metadata/generated/schema/entity/applications/app.py +7 -2
  172. metadata/generated/schema/entity/applications/appRunRecord.py +2 -7
  173. metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
  174. metadata/generated/schema/entity/applications/configuration/applicationConfig.py +1 -1
  175. metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
  176. metadata/generated/schema/entity/applications/configuration/external/autoTaggerAppConfig.py +1 -1
  177. metadata/generated/schema/entity/applications/configuration/external/metaPilotAppConfig.py +1 -1
  178. metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
  179. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +1 -1
  180. metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
  181. metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +1 -1
  182. metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
  183. metadata/generated/schema/entity/applications/configuration/private/external/metaPilotAppPrivateConfig.py +1 -1
  184. metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
  185. metadata/generated/schema/entity/applications/jobStatus.py +1 -1
  186. metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
  187. metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
  188. metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +1 -1
  189. metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +1 -1
  190. metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
  191. metadata/generated/schema/entity/automations/__init__.py +1 -1
  192. metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
  193. metadata/generated/schema/entity/automations/workflow.py +1 -1
  194. metadata/generated/schema/entity/bot.py +1 -1
  195. metadata/generated/schema/entity/classification/__init__.py +1 -1
  196. metadata/generated/schema/entity/classification/classification.py +1 -1
  197. metadata/generated/schema/entity/classification/tag.py +1 -1
  198. metadata/generated/schema/entity/data/__init__.py +1 -1
  199. metadata/generated/schema/entity/data/chart.py +1 -1
  200. metadata/generated/schema/entity/data/container.py +1 -1
  201. metadata/generated/schema/entity/data/dashboard.py +1 -1
  202. metadata/generated/schema/entity/data/dashboardDataModel.py +1 -1
  203. metadata/generated/schema/entity/data/database.py +1 -1
  204. metadata/generated/schema/entity/data/databaseSchema.py +1 -1
  205. metadata/generated/schema/entity/data/glossary.py +1 -1
  206. metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
  207. metadata/generated/schema/entity/data/metrics.py +1 -1
  208. metadata/generated/schema/entity/data/mlmodel.py +1 -1
  209. metadata/generated/schema/entity/data/pipeline.py +1 -1
  210. metadata/generated/schema/entity/data/query.py +1 -1
  211. metadata/generated/schema/entity/data/report.py +1 -1
  212. metadata/generated/schema/entity/data/searchIndex.py +1 -1
  213. metadata/generated/schema/entity/data/storedProcedure.py +1 -1
  214. metadata/generated/schema/entity/data/table.py +1 -1
  215. metadata/generated/schema/entity/data/topic.py +1 -1
  216. metadata/generated/schema/entity/docStore/__init__.py +1 -1
  217. metadata/generated/schema/entity/docStore/document.py +1 -1
  218. metadata/generated/schema/entity/domains/__init__.py +1 -1
  219. metadata/generated/schema/entity/domains/dataProduct.py +1 -1
  220. metadata/generated/schema/entity/domains/domain.py +1 -1
  221. metadata/generated/schema/entity/events/__init__.py +1 -1
  222. metadata/generated/schema/entity/events/webhook.py +1 -1
  223. metadata/generated/schema/entity/feed/__init__.py +1 -1
  224. metadata/generated/schema/entity/feed/suggestion.py +1 -1
  225. metadata/generated/schema/entity/feed/thread.py +12 -1
  226. metadata/generated/schema/entity/policies/__init__.py +1 -1
  227. metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
  228. metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
  229. metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
  230. metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
  231. metadata/generated/schema/entity/policies/filters.py +1 -1
  232. metadata/generated/schema/entity/policies/policy.py +1 -1
  233. metadata/generated/schema/entity/services/__init__.py +1 -1
  234. metadata/generated/schema/entity/services/connections/__init__.py +1 -1
  235. metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
  236. metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
  237. metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
  238. metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
  239. metadata/generated/schema/entity/services/connections/connectionBasicType.py +6 -1
  240. metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
  241. metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
  242. metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
  243. metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
  244. metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
  245. metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
  246. metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
  247. metadata/generated/schema/entity/services/connections/dashboard/mstrConnection.py +1 -1
  248. metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
  249. metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
  250. metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
  251. metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
  252. metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
  253. metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +1 -1
  254. metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
  255. metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
  256. metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +34 -1
  257. metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
  258. metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +1 -1
  259. metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
  260. metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
  261. metadata/generated/schema/entity/services/connections/database/common/azureConfig.py +20 -0
  262. metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
  263. metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
  264. metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
  265. metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
  266. metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
  267. metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
  268. metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
  269. metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
  270. metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
  271. metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
  272. metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
  273. metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
  274. metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
  275. metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
  276. metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
  277. metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
  278. metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
  279. metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
  280. metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
  281. metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
  282. metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
  283. metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
  284. metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
  285. metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
  286. metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
  287. metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
  288. metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
  289. metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
  290. metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
  291. metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
  292. metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +1 -1
  293. metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
  294. metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +7 -3
  295. metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
  296. metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +1 -1
  297. metadata/generated/schema/entity/services/connections/database/postgresConnection.py +7 -3
  298. metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
  299. metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
  300. metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
  301. metadata/generated/schema/entity/services/connections/database/sapHana/__init__.py +3 -0
  302. metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaHDBConnection.py +20 -0
  303. metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaSQLConnection.py +36 -0
  304. metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +6 -40
  305. metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
  306. metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
  307. metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
  308. metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
  309. metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
  310. metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
  311. metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
  312. metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
  313. metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
  314. metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +1 -1
  315. metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
  316. metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
  317. metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +1 -1
  318. metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
  319. metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
  320. metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
  321. metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
  322. metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
  323. metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +1 -1
  324. metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
  325. metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
  326. metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
  327. metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
  328. metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
  329. metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
  330. metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
  331. metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
  332. metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
  333. metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
  334. metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
  335. metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
  336. metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
  337. metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
  338. metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
  339. metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
  340. metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
  341. metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
  342. metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
  343. metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
  344. metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
  345. metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
  346. metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
  347. metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
  348. metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
  349. metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
  350. metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
  351. metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
  352. metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
  353. metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
  354. metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
  355. metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
  356. metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
  357. metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
  358. metadata/generated/schema/entity/services/dashboardService.py +1 -1
  359. metadata/generated/schema/entity/services/databaseService.py +1 -1
  360. metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
  361. metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +1 -1
  362. metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
  363. metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
  364. metadata/generated/schema/entity/services/messagingService.py +1 -1
  365. metadata/generated/schema/entity/services/metadataService.py +1 -1
  366. metadata/generated/schema/entity/services/mlmodelService.py +1 -1
  367. metadata/generated/schema/entity/services/pipelineService.py +1 -1
  368. metadata/generated/schema/entity/services/searchService.py +1 -1
  369. metadata/generated/schema/entity/services/serviceType.py +1 -1
  370. metadata/generated/schema/entity/services/storageService.py +1 -1
  371. metadata/generated/schema/entity/teams/__init__.py +1 -1
  372. metadata/generated/schema/entity/teams/persona.py +1 -1
  373. metadata/generated/schema/entity/teams/role.py +1 -1
  374. metadata/generated/schema/entity/teams/team.py +1 -1
  375. metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
  376. metadata/generated/schema/entity/teams/user.py +1 -1
  377. metadata/generated/schema/entity/type.py +1 -1
  378. metadata/generated/schema/entity/utils/__init__.py +1 -1
  379. metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
  380. metadata/generated/schema/entity/utils/servicesCount.py +1 -1
  381. metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
  382. metadata/generated/schema/events/__init__.py +1 -1
  383. metadata/generated/schema/events/alertMetrics.py +1 -1
  384. metadata/generated/schema/events/api/__init__.py +1 -1
  385. metadata/generated/schema/events/api/createEventSubscription.py +1 -1
  386. metadata/generated/schema/events/emailAlertConfig.py +1 -1
  387. metadata/generated/schema/events/eventFilterRule.py +1 -1
  388. metadata/generated/schema/events/eventSubscription.py +1 -1
  389. metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
  390. metadata/generated/schema/events/failedEvent.py +1 -1
  391. metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
  392. metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
  393. metadata/generated/schema/metadataIngestion/__init__.py +1 -1
  394. metadata/generated/schema/metadataIngestion/application.py +1 -1
  395. metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
  396. metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
  397. metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
  398. metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
  399. metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -1
  400. metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
  401. metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
  402. metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
  403. metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
  404. metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +1 -1
  405. metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
  406. metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
  407. metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +1 -1
  408. metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +1 -1
  409. metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +1 -1
  410. metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +1 -1
  411. metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
  412. metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
  413. metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
  414. metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
  415. metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
  416. metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
  417. metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
  418. metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
  419. metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
  420. metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
  421. metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
  422. metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
  423. metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
  424. metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
  425. metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
  426. metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
  427. metadata/generated/schema/metadataIngestion/workflow.py +1 -1
  428. metadata/generated/schema/monitoring/__init__.py +1 -1
  429. metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
  430. metadata/generated/schema/security/__init__.py +1 -1
  431. metadata/generated/schema/security/client/__init__.py +1 -1
  432. metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
  433. metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
  434. metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
  435. metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
  436. metadata/generated/schema/security/client/oidcClientConfig.py +46 -0
  437. metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
  438. metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
  439. metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
  440. metadata/generated/schema/security/credentials/__init__.py +1 -1
  441. metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
  442. metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
  443. metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
  444. metadata/generated/schema/security/credentials/azureCredentials.py +6 -1
  445. metadata/generated/schema/security/credentials/basicAuth.py +1 -1
  446. metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
  447. metadata/generated/schema/security/credentials/gcpCredentials.py +7 -3
  448. metadata/generated/schema/security/credentials/gcpExternalAccount.py +37 -0
  449. metadata/generated/schema/security/credentials/gcpValues.py +2 -2
  450. metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
  451. metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
  452. metadata/generated/schema/security/secrets/__init__.py +1 -1
  453. metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
  454. metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
  455. metadata/generated/schema/security/secrets/secretsManagerProvider.py +1 -1
  456. metadata/generated/schema/security/securityConfiguration.py +1 -1
  457. metadata/generated/schema/security/ssl/__init__.py +1 -1
  458. metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
  459. metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
  460. metadata/generated/schema/settings/__init__.py +1 -1
  461. metadata/generated/schema/settings/settings.py +1 -1
  462. metadata/generated/schema/system/__init__.py +1 -1
  463. metadata/generated/schema/system/entityError.py +1 -1
  464. metadata/generated/schema/system/eventPublisherJob.py +1 -1
  465. metadata/generated/schema/system/indexingError.py +1 -1
  466. metadata/generated/schema/system/ui/__init__.py +1 -1
  467. metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
  468. metadata/generated/schema/system/ui/page.py +1 -1
  469. metadata/generated/schema/system/validationResponse.py +43 -0
  470. metadata/generated/schema/tests/__init__.py +1 -1
  471. metadata/generated/schema/tests/assigned.py +1 -1
  472. metadata/generated/schema/tests/basic.py +1 -1
  473. metadata/generated/schema/tests/customMetric.py +1 -1
  474. metadata/generated/schema/tests/resolved.py +1 -1
  475. metadata/generated/schema/tests/testCase.py +1 -1
  476. metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
  477. metadata/generated/schema/tests/testDefinition.py +1 -1
  478. metadata/generated/schema/tests/testSuite.py +1 -1
  479. metadata/generated/schema/type/__init__.py +1 -1
  480. metadata/generated/schema/type/auditLog.py +1 -1
  481. metadata/generated/schema/type/basic.py +1 -1
  482. metadata/generated/schema/type/bulkOperationResult.py +1 -1
  483. metadata/generated/schema/type/changeEvent.py +1 -1
  484. metadata/generated/schema/type/changeEventType.py +1 -1
  485. metadata/generated/schema/type/collectionDescriptor.py +1 -1
  486. metadata/generated/schema/type/csvDocumentation.py +1 -1
  487. metadata/generated/schema/type/csvErrorType.py +1 -1
  488. metadata/generated/schema/type/csvFile.py +1 -1
  489. metadata/generated/schema/type/csvImportResult.py +1 -1
  490. metadata/generated/schema/type/customProperties/__init__.py +1 -1
  491. metadata/generated/schema/type/customProperties/enumConfig.py +1 -1
  492. metadata/generated/schema/type/customProperty.py +1 -1
  493. metadata/generated/schema/type/dailyCount.py +1 -1
  494. metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
  495. metadata/generated/schema/type/entityHistory.py +1 -1
  496. metadata/generated/schema/type/entityLineage.py +1 -1
  497. metadata/generated/schema/type/entityReference.py +1 -1
  498. metadata/generated/schema/type/entityReferenceList.py +1 -1
  499. metadata/generated/schema/type/entityRelationship.py +1 -1
  500. metadata/generated/schema/type/entityUsage.py +1 -1
  501. metadata/generated/schema/type/filterPattern.py +1 -1
  502. metadata/generated/schema/type/function.py +1 -1
  503. metadata/generated/schema/type/include.py +1 -1
  504. metadata/generated/schema/type/jdbcConnection.py +1 -1
  505. metadata/generated/schema/type/lifeCycle.py +1 -1
  506. metadata/generated/schema/type/paging.py +1 -1
  507. metadata/generated/schema/type/profile.py +1 -1
  508. metadata/generated/schema/type/queryParserData.py +1 -1
  509. metadata/generated/schema/type/reaction.py +1 -1
  510. metadata/generated/schema/type/schedule.py +1 -1
  511. metadata/generated/schema/type/schema.py +1 -1
  512. metadata/generated/schema/type/tableQuery.py +1 -1
  513. metadata/generated/schema/type/tableUsageCount.py +1 -1
  514. metadata/generated/schema/type/tagLabel.py +1 -1
  515. metadata/generated/schema/type/usageDetails.py +1 -1
  516. metadata/generated/schema/type/usageRequest.py +1 -1
  517. metadata/generated/schema/type/votes.py +1 -1
  518. metadata/great_expectations/action.py +5 -3
  519. metadata/ingestion/api/topology_runner.py +4 -3
  520. metadata/ingestion/lineage/parser.py +4 -1
  521. metadata/ingestion/ometa/mixins/patch_mixin.py +2 -4
  522. metadata/ingestion/source/dashboard/powerbi/client.py +4 -1
  523. metadata/ingestion/source/database/azuresql/connection.py +21 -3
  524. metadata/ingestion/source/database/datalake/connection.py +2 -14
  525. metadata/ingestion/source/database/datalake/metadata.py +3 -2
  526. metadata/ingestion/source/database/dbt/dbt_config.py +2 -14
  527. metadata/ingestion/source/database/mssql/lineage.py +1 -0
  528. metadata/ingestion/source/database/mssql/usage.py +5 -1
  529. metadata/ingestion/source/database/mysql/connection.py +14 -0
  530. metadata/ingestion/source/database/postgres/connection.py +15 -0
  531. metadata/ingestion/source/database/stored_procedures_mixin.py +1 -1
  532. metadata/ingestion/source/storage/storage_service.py +5 -2
  533. metadata/parsers/json_schema_parser.py +17 -7
  534. metadata/pii/scanners/ner_scanner.py +5 -3
  535. metadata/profiler/interface/profiler_interface.py +4 -4
  536. metadata/profiler/processor/sample_data_handler.py +45 -8
  537. metadata/profiler/source/databricks/profiler_source.py +36 -0
  538. metadata/profiler/source/metadata.py +7 -1
  539. metadata/profiler/source/profiler_source_factory.py +8 -0
  540. metadata/readers/dataframe/json.py +11 -6
  541. metadata/readers/dataframe/models.py +1 -0
  542. metadata/utils/credentials.py +36 -19
  543. metadata/utils/datalake/datalake_utils.py +34 -4
  544. metadata/utils/secrets/azure_kv_secrets_manager.py +3 -19
  545. metadata/utils/source_hash.py +23 -13
  546. metadata/utils/storage_metadata_config.py +2 -15
  547. metadata/workflow/base.py +1 -1
  548. {openmetadata_ingestion-1.3.1.3.dist-info → openmetadata_ingestion-1.3.2.0rc2.dist-info}/METADATA +292 -292
  549. {openmetadata_ingestion-1.3.1.3.dist-info → openmetadata_ingestion-1.3.2.0rc2.dist-info}/RECORD +554 -544
  550. /metadata/examples/workflows/{datalake_azure.yaml → datalake_azure_client_secret.yaml} +0 -0
  551. {openmetadata_ingestion-1.3.1.3.dist-info → openmetadata_ingestion-1.3.2.0rc2.dist-info}/LICENSE +0 -0
  552. {openmetadata_ingestion-1.3.1.3.dist-info → openmetadata_ingestion-1.3.2.0rc2.dist-info}/WHEEL +0 -0
  553. {openmetadata_ingestion-1.3.1.3.dist-info → openmetadata_ingestion-1.3.2.0rc2.dist-info}/entry_points.txt +0 -0
  554. {openmetadata_ingestion-1.3.1.3.dist-info → openmetadata_ingestion-1.3.2.0rc2.dist-info}/top_level.txt +0 -0
@@ -17,8 +17,13 @@ from datetime import datetime
17
17
  from functools import singledispatch
18
18
  from io import BytesIO
19
19
 
20
+ from pydantic.json import ENCODERS_BY_TYPE
21
+
20
22
  from metadata.clients.aws_client import AWSClient
21
23
  from metadata.generated.schema.entity.data.table import Table, TableData
24
+ from metadata.generated.schema.entity.services.connections.connectionBasicType import (
25
+ DataStorageConfig,
26
+ )
22
27
  from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials
23
28
  from metadata.profiler.interface.profiler_interface import ProfilerInterface
24
29
  from metadata.utils.helpers import clean_uri
@@ -27,15 +32,45 @@ from metadata.utils.logger import profiler_logger
27
32
  logger = profiler_logger()
28
33
 
29
34
 
30
- def _get_object_key(table: Table, prefix: str, overwrite_data: bool) -> str:
35
+ class PathPatternException(Exception):
36
+ """
37
+ Exception class need to validate the file path pattern
38
+ """
39
+
40
+
41
+ def validate_path_pattern(file_path_format: str) -> None:
42
+ if not (
43
+ "{service_name}" in file_path_format
44
+ and "{database_name}" in file_path_format
45
+ and "{database_schema_name}" in file_path_format
46
+ and "{table_name}" in file_path_format
47
+ and file_path_format.endswith(".parquet")
48
+ ):
49
+ raise PathPatternException(
50
+ "Please provide a valid path pattern, "
51
+ "the pattern should include these components {service_name}, "
52
+ "{database_name}, {database_schema_name}, {table_name} and "
53
+ "it should end with extension .parquet"
54
+ )
55
+
56
+
57
+ def _get_object_key(
58
+ table: Table, prefix: str, overwrite_data: bool, file_path_format: str
59
+ ) -> str:
60
+ validate_path_pattern(file_path_format)
61
+ file_name = file_path_format.format(
62
+ service_name=table.service.name,
63
+ database_name=table.database.name,
64
+ database_schema_name=table.databaseSchema.name,
65
+ table_name=table.name.__root__,
66
+ )
31
67
  if not overwrite_data:
32
- file_name = f"sample_data_{datetime.now().strftime('%Y_%m_%d')}.parquet"
33
- else:
34
- file_name = "sample_data.parquet"
35
- path = str(table.fullyQualifiedName.__root__).replace(".", "/")
68
+ file_name = file_name.replace(
69
+ ".parquet", f"_{datetime.now().strftime('%Y_%m_%d')}.parquet"
70
+ )
36
71
  if prefix:
37
- return f"{clean_uri(prefix)}/{path}/{file_name}"
38
- return f"{path}/{file_name}"
72
+ return f"{clean_uri(prefix)}/{file_name}"
73
+ return file_name
39
74
 
40
75
 
41
76
  def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -> None:
@@ -45,9 +80,10 @@ def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -
45
80
  import pandas as pd # pylint: disable=import-outside-toplevel
46
81
 
47
82
  try:
48
- sample_storage_config = profiler_interface.storage_config
83
+ sample_storage_config: DataStorageConfig = profiler_interface.storage_config
49
84
  if not sample_storage_config:
50
85
  return
86
+ ENCODERS_BY_TYPE[bytes] = lambda v: v.decode("utf-8", "ignore")
51
87
  deserialized_data = json.loads(data.json())
52
88
  df = pd.DataFrame(
53
89
  data=deserialized_data.get("rows", []),
@@ -59,6 +95,7 @@ def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -
59
95
  table=profiler_interface.table_entity,
60
96
  prefix=sample_storage_config.prefix,
61
97
  overwrite_data=sample_storage_config.overwriteData,
98
+ file_path_format=sample_storage_config.filePathPattern,
62
99
  )
63
100
  upload_to_storage(
64
101
  sample_storage_config.storageConfig,
@@ -0,0 +1,36 @@
1
+ """Extend the ProfilerSource class to add support for Databricks is_disconnect SQA method"""
2
+
3
+ from metadata.generated.schema.entity.services.databaseService import DatabaseService
4
+ from metadata.generated.schema.metadataIngestion.workflow import (
5
+ OpenMetadataWorkflowConfig,
6
+ )
7
+ from metadata.ingestion.ometa.ometa_api import OpenMetadata
8
+ from metadata.profiler.source.base.profiler_source import ProfilerSource
9
+
10
+
11
+ def is_disconnect(self, e, connection, cursor):
12
+ """is_disconnect method for the Databricks dialect"""
13
+ if "Invalid SessionHandle: SessionHandle" in str(e):
14
+ return True
15
+ return False
16
+
17
+
18
+ class DataBricksProfilerSource(ProfilerSource):
19
+ """Databricks Profiler source"""
20
+
21
+ def __init__(
22
+ self,
23
+ config: OpenMetadataWorkflowConfig,
24
+ database: DatabaseService,
25
+ ometa_client: OpenMetadata,
26
+ ):
27
+ super().__init__(config, database, ometa_client)
28
+ self.set_is_disconnect()
29
+
30
+ def set_is_disconnect(self):
31
+ """Set the is_disconnect method for the Databricks dialect"""
32
+ from databricks.sqlalchemy import (
33
+ DatabricksDialect, # pylint: disable=import-outside-toplevel
34
+ )
35
+
36
+ DatabricksDialect.is_disconnect = is_disconnect
@@ -43,6 +43,10 @@ from metadata.utils.logger import profiler_logger
43
43
  logger = profiler_logger()
44
44
 
45
45
 
46
+ TABLE_FIELDS = ["tableProfilerConfig", "columns", "customMetrics"]
47
+ TAGS_FIELD = ["tags"]
48
+
49
+
46
50
  class ProfilerSourceAndEntity(BaseModel):
47
51
  """Return class for the OpenMetadata Profiler Source"""
48
52
 
@@ -273,7 +277,9 @@ class OpenMetadataSource(Source):
273
277
  """
274
278
  tables = self.metadata.list_all_entities(
275
279
  entity=Table,
276
- fields=["tableProfilerConfig", "columns", "customMetrics"],
280
+ fields=TABLE_FIELDS
281
+ if not self.source_config.processPiiSensitive
282
+ else TABLE_FIELDS + TAGS_FIELD,
277
283
  params={
278
284
  "service": self.config.source.serviceName,
279
285
  "database": fqn.build(
@@ -16,8 +16,12 @@ Factory class for creating profiler source objects
16
16
  from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import (
17
17
  BigqueryType,
18
18
  )
19
+ from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
20
+ DatabricksType,
21
+ )
19
22
  from metadata.profiler.source.base.profiler_source import ProfilerSource
20
23
  from metadata.profiler.source.bigquery.profiler_source import BigQueryProfilerSource
24
+ from metadata.profiler.source.databricks.profiler_source import DataBricksProfilerSource
21
25
 
22
26
 
23
27
  class ProfilerSourceFactory:
@@ -44,3 +48,7 @@ profiler_source_factory.register_source(
44
48
  BigqueryType.BigQuery.value.lower(),
45
49
  BigQueryProfilerSource,
46
50
  )
51
+ profiler_source_factory.register_source(
52
+ DatabricksType.Databricks.value.lower(),
53
+ DataBricksProfilerSource,
54
+ )
@@ -16,7 +16,7 @@ import gzip
16
16
  import io
17
17
  import json
18
18
  import zipfile
19
- from typing import List, Union
19
+ from typing import Any, Dict, List, Optional, Tuple, Union
20
20
 
21
21
  from metadata.readers.dataframe.base import DataFrameReader
22
22
  from metadata.readers.dataframe.common import dataframe_to_chunks
@@ -47,7 +47,7 @@ class JSONDataFrameReader(DataFrameReader):
47
47
  @staticmethod
48
48
  def read_from_json(
49
49
  key: str, json_text: bytes, decode: bool = False, **__
50
- ) -> List["DataFrame"]:
50
+ ) -> Tuple[List["DataFrame"], Optional[Dict[str, Any]]]:
51
51
  """
52
52
  Decompress a JSON file (if needed) and read its contents
53
53
  as a dataframe.
@@ -60,20 +60,25 @@ class JSONDataFrameReader(DataFrameReader):
60
60
  import pandas as pd
61
61
 
62
62
  json_text = _get_json_text(key=key, text=json_text, decode=decode)
63
+ raw_data = None
63
64
  try:
64
65
  data = json.loads(json_text)
66
+ if isinstance(data, dict) and data.get("$schema"):
67
+ raw_data = json_text
65
68
  except json.decoder.JSONDecodeError:
66
69
  logger.debug("Failed to read as JSON object. Trying to read as JSON Lines")
67
70
  data = [json.loads(json_obj) for json_obj in json_text.strip().split("\n")]
68
71
 
69
72
  # if we get a scalar value (e.g. {"a":"b"}) then we need to specify the index
70
73
  data = data if not isinstance(data, dict) else [data]
71
- return dataframe_to_chunks(pd.DataFrame.from_records(data))
74
+ return dataframe_to_chunks(pd.DataFrame.from_records(data)), raw_data
72
75
 
73
76
  def _read(self, *, key: str, bucket_name: str, **kwargs) -> DatalakeColumnWrapper:
74
77
  text = self.reader.read(key, bucket_name=bucket_name)
78
+ dataframes, raw_data = self.read_from_json(
79
+ key=key, json_text=text, decode=True, **kwargs
80
+ )
75
81
  return DatalakeColumnWrapper(
76
- dataframes=self.read_from_json(
77
- key=key, json_text=text, decode=True, **kwargs
78
- )
82
+ dataframes=dataframes,
83
+ raw_data=raw_data,
79
84
  )
@@ -29,6 +29,7 @@ class DatalakeColumnWrapper(BaseModel):
29
29
 
30
30
  columns: Optional[List[Column]]
31
31
  dataframes: Optional[List[Any]] # pandas.Dataframe does not have any validators
32
+ raw_data: Any # in special cases like json schema, we need to store the raw data
32
33
 
33
34
 
34
35
  class DatalakeTableSchemaWrapper(BaseModel):
@@ -15,7 +15,7 @@ import base64
15
15
  import json
16
16
  import os
17
17
  import tempfile
18
- from typing import Dict, List, Optional
18
+ from typing import Dict, List, Optional, Union
19
19
 
20
20
  from cryptography.hazmat.primitives import serialization
21
21
  from google import auth
@@ -25,6 +25,9 @@ from metadata.generated.schema.security.credentials.gcpCredentials import (
25
25
  GCPCredentials,
26
26
  GcpCredentialsPath,
27
27
  )
28
+ from metadata.generated.schema.security.credentials.gcpExternalAccount import (
29
+ GcpExternalAccount,
30
+ )
28
31
  from metadata.generated.schema.security.credentials.gcpValues import (
29
32
  GcpCredentialsValues,
30
33
  )
@@ -85,30 +88,44 @@ def create_credential_tmp_file(credentials: dict) -> str:
85
88
  return temp_file_path
86
89
 
87
90
 
88
- def build_google_credentials_dict(gcp_values: GcpCredentialsValues) -> Dict[str, str]:
91
+ def build_google_credentials_dict(
92
+ gcp_values: Union[GcpCredentialsValues, GcpExternalAccount]
93
+ ) -> Dict[str, str]:
89
94
  """
90
95
  Given GcPCredentialsValues, build a dictionary as the JSON file
91
96
  downloaded from GCP with the service_account
92
97
  :param gcp_values: GCP credentials
93
98
  :return: Dictionary with credentials
94
99
  """
95
- private_key_str = gcp_values.privateKey.get_secret_value()
96
- # adding the replace string here to escape line break if passed from env
97
- private_key_str = private_key_str.replace("\\n", "\n")
98
- validate_private_key(private_key_str)
99
-
100
- return {
101
- "type": gcp_values.type,
102
- "project_id": gcp_values.projectId.__root__,
103
- "private_key_id": gcp_values.privateKeyId,
104
- "private_key": private_key_str,
105
- "client_email": gcp_values.clientEmail,
106
- "client_id": gcp_values.clientId,
107
- "auth_uri": str(gcp_values.authUri),
108
- "token_uri": str(gcp_values.tokenUri),
109
- "auth_provider_x509_cert_url": str(gcp_values.authProviderX509CertUrl),
110
- "client_x509_cert_url": str(gcp_values.clientX509CertUrl),
111
- }
100
+ if isinstance(gcp_values, GcpCredentialsValues):
101
+ private_key_str = gcp_values.privateKey.get_secret_value()
102
+ # adding the replace string here to escape line break if passed from env
103
+ private_key_str = private_key_str.replace("\\n", "\n")
104
+ validate_private_key(private_key_str)
105
+
106
+ return {
107
+ "type": gcp_values.type,
108
+ "project_id": gcp_values.projectId.__root__,
109
+ "private_key_id": gcp_values.privateKeyId,
110
+ "private_key": private_key_str,
111
+ "client_email": gcp_values.clientEmail,
112
+ "client_id": gcp_values.clientId,
113
+ "auth_uri": str(gcp_values.authUri),
114
+ "token_uri": str(gcp_values.tokenUri),
115
+ "auth_provider_x509_cert_url": str(gcp_values.authProviderX509CertUrl),
116
+ "client_x509_cert_url": str(gcp_values.clientX509CertUrl),
117
+ }
118
+ if isinstance(gcp_values, GcpExternalAccount):
119
+ return {
120
+ "type": gcp_values.externalType,
121
+ "audience": gcp_values.audience,
122
+ "subject_token_type": gcp_values.subjectTokenType,
123
+ "token_url": gcp_values.tokenURL,
124
+ "credential_source": gcp_values.credentialSource,
125
+ }
126
+ raise InvalidGcpConfigException(
127
+ f"Error trying to build GCP credentials dict due to Invalid GCP config {type(gcp_values)}"
128
+ )
112
129
 
113
130
 
114
131
  def set_google_credentials(gcp_credentials: GCPCredentials) -> None:
@@ -17,10 +17,11 @@ import ast
17
17
  import json
18
18
  import random
19
19
  import traceback
20
- from typing import Dict, List, Optional, Union, cast
20
+ from typing import Any, Dict, List, Optional, Union, cast
21
21
 
22
22
  from metadata.generated.schema.entity.data.table import Column, DataType
23
23
  from metadata.ingestion.source.database.column_helpers import truncate_column_name
24
+ from metadata.parsers.json_schema_parser import parse_json_schema
24
25
  from metadata.readers.dataframe.models import (
25
26
  DatalakeColumnWrapper,
26
27
  DatalakeTableSchemaWrapper,
@@ -35,6 +36,7 @@ def fetch_dataframe(
35
36
  config_source,
36
37
  client,
37
38
  file_fqn: DatalakeTableSchemaWrapper,
39
+ fetch_raw_data: bool = False,
38
40
  **kwargs,
39
41
  ) -> Optional[List["DataFrame"]]:
40
42
  """
@@ -60,6 +62,8 @@ def fetch_dataframe(
60
62
  df_wrapper: DatalakeColumnWrapper = df_reader.read(
61
63
  key=key, bucket_name=bucket_name, **kwargs
62
64
  )
65
+ if fetch_raw_data:
66
+ return df_wrapper.dataframes, df_wrapper.raw_data
63
67
  return df_wrapper.dataframes
64
68
  except Exception as err:
65
69
  logger.error(
@@ -73,6 +77,8 @@ def fetch_dataframe(
73
77
  # Here we need to blow things up. Without the dataframe we cannot move forward
74
78
  raise err
75
79
 
80
+ if fetch_raw_data:
81
+ return None, None
76
82
  return None
77
83
 
78
84
 
@@ -112,6 +118,7 @@ class DataFrameColumnParser:
112
118
  file_type: Optional[SupportedTypes] = None,
113
119
  sample: bool = True,
114
120
  shuffle: bool = False,
121
+ raw_data: Any = None,
115
122
  ):
116
123
  """Instantiate a column parser object with the appropriate parser
117
124
 
@@ -126,8 +133,14 @@ class DataFrameColumnParser:
126
133
  data_frame = cls._get_data_frame(data_frame, sample, shuffle)
127
134
  if file_type == SupportedTypes.PARQUET:
128
135
  parser = ParquetDataFrameColumnParser(data_frame)
129
- return cls(parser)
130
- parser = GenericDataFrameColumnParser(data_frame)
136
+ elif file_type in {
137
+ SupportedTypes.JSON,
138
+ SupportedTypes.JSONGZ,
139
+ SupportedTypes.JSONZIP,
140
+ }:
141
+ parser = JsonDataFrameColumnParser(data_frame, raw_data=raw_data)
142
+ else:
143
+ parser = GenericDataFrameColumnParser(data_frame)
131
144
  return cls(parser)
132
145
 
133
146
  @staticmethod
@@ -172,8 +185,9 @@ class GenericDataFrameColumnParser:
172
185
  "bytes": DataType.BYTES,
173
186
  }
174
187
 
175
- def __init__(self, data_frame: "DataFrame"):
188
+ def __init__(self, data_frame: "DataFrame", raw_data: Any = None):
176
189
  self.data_frame = data_frame
190
+ self.raw_data = raw_data
177
191
 
178
192
  def get_columns(self):
179
193
  """
@@ -472,3 +486,19 @@ class ParquetDataFrameColumnParser:
472
486
  data_type = self._data_formats.get(str(column.type), DataType.UNKNOWN)
473
487
 
474
488
  return data_type
489
+
490
+
491
+ class JsonDataFrameColumnParser(GenericDataFrameColumnParser):
492
+ """Given a dataframe object generated from a json file, parse the columns and return a list of Column objects."""
493
+
494
+ def get_columns(self):
495
+ """
496
+ method to process column details for json files
497
+ """
498
+ if self.raw_data:
499
+ try:
500
+ return parse_json_schema(schema_text=self.raw_data, cls=Column)
501
+ except Exception as exc:
502
+ logger.warning(f"Unable to parse the json schema: {exc}")
503
+ logger.debug(traceback.format_exc())
504
+ return self._get_columns(self.data_frame)
@@ -17,9 +17,9 @@ import traceback
17
17
  from abc import ABC
18
18
  from typing import Optional
19
19
 
20
- from azure.identity import ClientSecretCredential, DefaultAzureCredential
21
- from azure.keyvault.secrets import KeyVaultSecret, SecretClient
20
+ from azure.keyvault.secrets import KeyVaultSecret
22
21
 
22
+ from metadata.clients.azure_client import AzureClient
23
23
  from metadata.generated.schema.security.secrets.secretsManagerClientLoader import (
24
24
  SecretsManagerClientLoader,
25
25
  )
@@ -105,23 +105,7 @@ class AzureKVSecretsManager(ExternalSecretsManager, ABC):
105
105
  ):
106
106
  super().__init__(provider=SecretsManagerProvider.azure_kv, loader=loader)
107
107
 
108
- if (
109
- self.credentials.tenantId
110
- and self.credentials.clientId
111
- and self.credentials.clientSecret
112
- ):
113
- azure_identity = ClientSecretCredential(
114
- tenant_id=self.credentials.tenantId,
115
- client_id=self.credentials.clientId,
116
- client_secret=self.credentials.clientSecret.get_secret_value(),
117
- )
118
- else:
119
- azure_identity = DefaultAzureCredential()
120
-
121
- self.client = SecretClient(
122
- vault_url=f"https://{self.credentials.vaultName}.vault.azure.net/",
123
- credential=azure_identity,
124
- )
108
+ self.client = AzureClient(self.credentials).create_secret_client()
125
109
 
126
110
  def get_string_value(self, secret_id: str) -> str:
127
111
  """
@@ -14,9 +14,14 @@ Source hash utils module
14
14
  """
15
15
 
16
16
  import hashlib
17
+ import traceback
17
18
  from typing import Dict, Optional
18
19
 
19
20
  from metadata.ingestion.ometa.ometa_api import C
21
+ from metadata.utils.logger import utils_logger
22
+
23
+ logger = utils_logger()
24
+
20
25
 
21
26
  SOURCE_HASH_EXCLUDE_FIELDS = {
22
27
  "sourceHash": True,
@@ -25,19 +30,24 @@ SOURCE_HASH_EXCLUDE_FIELDS = {
25
30
 
26
31
  def generate_source_hash(
27
32
  create_request: C, exclude_fields: Optional[Dict] = None
28
- ) -> str:
33
+ ) -> Optional[str]:
29
34
  """
30
35
  Given a create_request model convert it to json string and generate a hash value
31
36
  """
32
-
33
- # We always want to exclude the sourceHash when generating the fingerprint
34
- exclude_fields = (
35
- SOURCE_HASH_EXCLUDE_FIELDS.update(exclude_fields)
36
- if exclude_fields
37
- else SOURCE_HASH_EXCLUDE_FIELDS
38
- )
39
-
40
- create_request_json = create_request.json(exclude=exclude_fields)
41
-
42
- json_bytes = create_request_json.encode("utf-8")
43
- return hashlib.md5(json_bytes).hexdigest()
37
+ try:
38
+ # We always want to exclude the sourceHash when generating the fingerprint
39
+ exclude_fields = (
40
+ SOURCE_HASH_EXCLUDE_FIELDS.update(exclude_fields)
41
+ if exclude_fields
42
+ else SOURCE_HASH_EXCLUDE_FIELDS
43
+ )
44
+
45
+ create_request_json = create_request.json(exclude=exclude_fields)
46
+
47
+ json_bytes = create_request_json.encode("utf-8")
48
+ return hashlib.md5(json_bytes).hexdigest()
49
+
50
+ except Exception as exc:
51
+ logger.warning(f"Failed to generate source hash due to - {exc}")
52
+ logger.debug(traceback.format_exc())
53
+ return None
@@ -17,6 +17,7 @@ from functools import singledispatch
17
17
 
18
18
  import requests
19
19
 
20
+ from metadata.clients.azure_client import AzureClient
20
21
  from metadata.generated.schema.entity.services.connections.database.datalake.azureConfig import (
21
22
  AzureConfig,
22
23
  )
@@ -153,21 +154,7 @@ def _(config: StorageMetadataAdlsConfig) -> ManifestMetadataConfig:
153
154
  else STORAGE_METADATA_MANIFEST_FILE_NAME
154
155
  )
155
156
 
156
- from azure.identity import ( # pylint: disable=import-outside-toplevel
157
- ClientSecretCredential,
158
- )
159
- from azure.storage.blob import ( # pylint: disable=import-outside-toplevel
160
- BlobServiceClient,
161
- )
162
-
163
- blob_client = BlobServiceClient(
164
- account_url=f"https://{config.securityConfig.accountName}.blob.core.windows.net/",
165
- credential=ClientSecretCredential(
166
- config.securityConfig.tenantId,
167
- config.securityConfig.clientId,
168
- config.securityConfig.clientSecret.get_secret_value(),
169
- ),
170
- )
157
+ blob_client = AzureClient(config.securityConfig).create_blob_client()
171
158
 
172
159
  reader = get_reader(
173
160
  config_source=AzureConfig(securityConfig=config.securityConfig),
metadata/workflow/base.py CHANGED
@@ -108,7 +108,7 @@ class BaseWorkflow(ABC, WorkflowStatusMixin):
108
108
  @property
109
109
  def ingestion_pipeline(self):
110
110
  """Get or create the Ingestion Pipeline from the configuration"""
111
- if not self._ingestion_pipeline:
111
+ if not self._ingestion_pipeline and self.config.ingestionPipelineFQN:
112
112
  self._ingestion_pipeline = self.get_or_create_ingestion_pipeline()
113
113
 
114
114
  return self._ingestion_pipeline