openmetadata-ingestion 1.10.0.0__py3-none-any.whl → 1.10.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openmetadata-ingestion might be problematic. Click here for more details.
- metadata/clients/aws_client.py +4 -0
- metadata/examples/workflows/confluent_cdc.yaml +70 -0
- metadata/examples/workflows/confluent_cdc_cloud.yaml +86 -0
- metadata/examples/workflows/confluent_cdc_local.yaml +120 -0
- metadata/examples/workflows/kinesisfirehose.yaml +99 -0
- metadata/examples/workflows/my_confluent_cloud.yaml +49 -0
- metadata/generated/schema/analytics/__init__.py +1 -1
- metadata/generated/schema/analytics/basic.py +1 -1
- metadata/generated/schema/analytics/reportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
- metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
- metadata/generated/schema/api/__init__.py +1 -1
- metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
- metadata/generated/schema/api/addTagToAssetsRequest.py +1 -1
- metadata/generated/schema/api/analytics/__init__.py +1 -1
- metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
- metadata/generated/schema/api/automations/__init__.py +1 -1
- metadata/generated/schema/api/automations/createWorkflow.py +1 -1
- metadata/generated/schema/api/bulkAssets.py +1 -1
- metadata/generated/schema/api/classification/__init__.py +1 -1
- metadata/generated/schema/api/classification/createClassification.py +1 -1
- metadata/generated/schema/api/classification/createTag.py +1 -1
- metadata/generated/schema/api/classification/createTagWithRecognizers.py +1 -1
- metadata/generated/schema/api/classification/loadTags.py +1 -1
- metadata/generated/schema/api/configuration/__init__.py +1 -1
- metadata/generated/schema/api/configuration/rdfConfiguration.py +1 -1
- metadata/generated/schema/api/createBot.py +1 -1
- metadata/generated/schema/api/createEventPublisherJob.py +1 -1
- metadata/generated/schema/api/createType.py +1 -1
- metadata/generated/schema/api/data/__init__.py +1 -1
- metadata/generated/schema/api/data/createAPICollection.py +1 -1
- metadata/generated/schema/api/data/createAPIEndpoint.py +1 -1
- metadata/generated/schema/api/data/createChart.py +1 -1
- metadata/generated/schema/api/data/createContainer.py +1 -1
- metadata/generated/schema/api/data/createCustomProperty.py +1 -1
- metadata/generated/schema/api/data/createDashboard.py +1 -1
- metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
- metadata/generated/schema/api/data/createDataContract.py +1 -1
- metadata/generated/schema/api/data/createDatabase.py +1 -1
- metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
- metadata/generated/schema/api/data/createDirectory.py +1 -1
- metadata/generated/schema/api/data/createEntityProfile.py +1 -1
- metadata/generated/schema/api/data/createFile.py +1 -1
- metadata/generated/schema/api/data/createGlossary.py +1 -1
- metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
- metadata/generated/schema/api/data/createMetric.py +1 -1
- metadata/generated/schema/api/data/createMlModel.py +1 -1
- metadata/generated/schema/api/data/createPipeline.py +1 -1
- metadata/generated/schema/api/data/createQuery.py +1 -1
- metadata/generated/schema/api/data/createQueryCostRecord.py +1 -1
- metadata/generated/schema/api/data/createSearchIndex.py +1 -1
- metadata/generated/schema/api/data/createSpreadsheet.py +1 -1
- metadata/generated/schema/api/data/createStoredProcedure.py +1 -1
- metadata/generated/schema/api/data/createTable.py +1 -1
- metadata/generated/schema/api/data/createTableProfile.py +1 -1
- metadata/generated/schema/api/data/createTopic.py +1 -1
- metadata/generated/schema/api/data/createWorksheet.py +1 -1
- metadata/generated/schema/api/data/loadGlossary.py +1 -1
- metadata/generated/schema/api/data/restoreEntity.py +1 -1
- metadata/generated/schema/api/data/updateColumn.py +1 -1
- metadata/generated/schema/api/dataInsight/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
- metadata/generated/schema/api/dataInsight/custom/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/custom/createDataInsightCustomChart.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
- metadata/generated/schema/api/docStore/__init__.py +1 -1
- metadata/generated/schema/api/docStore/createDocument.py +1 -1
- metadata/generated/schema/api/domains/__init__.py +1 -1
- metadata/generated/schema/api/domains/createDataProduct.py +1 -1
- metadata/generated/schema/api/domains/createDomain.py +1 -1
- metadata/generated/schema/api/entityRelationship/__init__.py +1 -1
- metadata/generated/schema/api/entityRelationship/entityRelationshipDirection.py +1 -1
- metadata/generated/schema/api/entityRelationship/esEntityRelationshipData.py +1 -1
- metadata/generated/schema/api/entityRelationship/relationshipRef.py +1 -1
- metadata/generated/schema/api/entityRelationship/searchEntityRelationshipRequest.py +1 -1
- metadata/generated/schema/api/entityRelationship/searchEntityRelationshipResult.py +1 -1
- metadata/generated/schema/api/entityRelationship/searchSchemaEntityRelationshipResult.py +1 -1
- metadata/generated/schema/api/events/__init__.py +1 -1
- metadata/generated/schema/api/events/createNotificationTemplate.py +1 -1
- metadata/generated/schema/api/events/notificationTemplateValidationRequest.py +1 -1
- metadata/generated/schema/api/events/notificationTemplateValidationResponse.py +1 -1
- metadata/generated/schema/api/feed/__init__.py +1 -1
- metadata/generated/schema/api/feed/closeTask.py +1 -1
- metadata/generated/schema/api/feed/createPost.py +1 -1
- metadata/generated/schema/api/feed/createSuggestion.py +1 -1
- metadata/generated/schema/api/feed/createThread.py +1 -1
- metadata/generated/schema/api/feed/resolveTask.py +1 -1
- metadata/generated/schema/api/feed/threadCount.py +1 -1
- metadata/generated/schema/api/governance/__init__.py +1 -1
- metadata/generated/schema/api/governance/createWorkflowDefinition.py +1 -1
- metadata/generated/schema/api/governance/createWorkflowInstanceState.py +1 -1
- metadata/generated/schema/api/lineage/__init__.py +1 -1
- metadata/generated/schema/api/lineage/addLineage.py +1 -1
- metadata/generated/schema/api/lineage/entityCountLineageRequest.py +1 -1
- metadata/generated/schema/api/lineage/esLineageData.py +1 -1
- metadata/generated/schema/api/lineage/lineageDirection.py +1 -1
- metadata/generated/schema/api/lineage/lineagePaginationInfo.py +1 -1
- metadata/generated/schema/api/lineage/nodeInformation.py +1 -1
- metadata/generated/schema/api/lineage/searchLineageRequest.py +1 -1
- metadata/generated/schema/api/lineage/searchLineageResult.py +1 -1
- metadata/generated/schema/api/mcp/__init__.py +1 -1
- metadata/generated/schema/api/mcp/mcpSearchResponse.py +1 -1
- metadata/generated/schema/api/mcp/mcpToolDefinition.py +1 -1
- metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
- metadata/generated/schema/api/policies/__init__.py +1 -1
- metadata/generated/schema/api/policies/createPolicy.py +1 -1
- metadata/generated/schema/api/rdf/__init__.py +1 -1
- metadata/generated/schema/api/rdf/sparqlQuery.py +1 -1
- metadata/generated/schema/api/rdf/sparqlResponse.py +1 -1
- metadata/generated/schema/api/scim/__init__.py +1 -1
- metadata/generated/schema/api/scim/scimGroup.py +1 -1
- metadata/generated/schema/api/scim/scimPatchOp.py +1 -1
- metadata/generated/schema/api/scim/scimUser.py +1 -1
- metadata/generated/schema/api/search/__init__.py +1 -1
- metadata/generated/schema/api/search/previewSearchRequest.py +1 -1
- metadata/generated/schema/api/services/__init__.py +1 -1
- metadata/generated/schema/api/services/createApiService.py +1 -1
- metadata/generated/schema/api/services/createDashboardService.py +1 -1
- metadata/generated/schema/api/services/createDatabaseService.py +1 -1
- metadata/generated/schema/api/services/createDriveService.py +1 -1
- metadata/generated/schema/api/services/createMessagingService.py +1 -1
- metadata/generated/schema/api/services/createMetadataService.py +1 -1
- metadata/generated/schema/api/services/createMlModelService.py +1 -1
- metadata/generated/schema/api/services/createPipelineService.py +1 -1
- metadata/generated/schema/api/services/createSearchService.py +1 -1
- metadata/generated/schema/api/services/createSecurityService.py +1 -1
- metadata/generated/schema/api/services/createStorageService.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +1 -1
- metadata/generated/schema/api/setOwner.py +1 -1
- metadata/generated/schema/api/teams/__init__.py +1 -1
- metadata/generated/schema/api/teams/createPersona.py +1 -1
- metadata/generated/schema/api/teams/createRole.py +1 -1
- metadata/generated/schema/api/teams/createTeam.py +1 -1
- metadata/generated/schema/api/teams/createUser.py +1 -1
- metadata/generated/schema/api/tests/__init__.py +1 -1
- metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
- metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
- metadata/generated/schema/api/tests/createTestCase.py +1 -1
- metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
- metadata/generated/schema/api/tests/createTestCaseResult.py +1 -1
- metadata/generated/schema/api/tests/createTestDefinition.py +1 -1
- metadata/generated/schema/api/tests/createTestSuite.py +1 -1
- metadata/generated/schema/api/tests/moveGlossaryTermRequest.py +1 -1
- metadata/generated/schema/api/validateGlossaryTagsRequest.py +1 -1
- metadata/generated/schema/api/voteRequest.py +1 -1
- metadata/generated/schema/auth/__init__.py +1 -1
- metadata/generated/schema/auth/basicAuth.py +1 -1
- metadata/generated/schema/auth/basicLoginRequest.py +1 -1
- metadata/generated/schema/auth/changePasswordRequest.py +1 -1
- metadata/generated/schema/auth/createPersonalToken.py +1 -1
- metadata/generated/schema/auth/emailRequest.py +1 -1
- metadata/generated/schema/auth/emailVerificationToken.py +1 -1
- metadata/generated/schema/auth/generateToken.py +1 -1
- metadata/generated/schema/auth/jwtAuth.py +1 -1
- metadata/generated/schema/auth/loginRequest.py +1 -1
- metadata/generated/schema/auth/logoutRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetToken.py +1 -1
- metadata/generated/schema/auth/personalAccessToken.py +1 -1
- metadata/generated/schema/auth/refreshToken.py +1 -1
- metadata/generated/schema/auth/registrationRequest.py +1 -1
- metadata/generated/schema/auth/revokePersonalToken.py +1 -1
- metadata/generated/schema/auth/revokeToken.py +1 -1
- metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
- metadata/generated/schema/auth/ssoAuth.py +1 -1
- metadata/generated/schema/auth/supportToken.py +1 -1
- metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
- metadata/generated/schema/configuration/__init__.py +1 -1
- metadata/generated/schema/configuration/aiPlatformConfiguration.py +1 -1
- metadata/generated/schema/configuration/appsPrivateConfiguration.py +1 -1
- metadata/generated/schema/configuration/assetCertificationSettings.py +1 -1
- metadata/generated/schema/configuration/authConfig.py +1 -1
- metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
- metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
- metadata/generated/schema/configuration/cacheConfiguration.py +1 -1
- metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/dataQualityConfiguration.py +1 -1
- metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
- metadata/generated/schema/configuration/entityRulesSettings.py +1 -1
- metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
- metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
- metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
- metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
- metadata/generated/schema/configuration/limitsConfiguration.py +1 -1
- metadata/generated/schema/configuration/lineageSettings.py +1 -1
- metadata/generated/schema/configuration/logStorageConfiguration.py +1 -1
- metadata/generated/schema/configuration/loginConfiguration.py +1 -1
- metadata/generated/schema/configuration/logoConfiguration.py +1 -1
- metadata/generated/schema/configuration/openMetadataBaseUrlConfiguration.py +1 -1
- metadata/generated/schema/configuration/opertionalConfiguration.py +1 -1
- metadata/generated/schema/configuration/opsConfig.py +1 -1
- metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
- metadata/generated/schema/configuration/profilerConfiguration.py +1 -1
- metadata/generated/schema/configuration/searchSettings.py +1 -1
- metadata/generated/schema/configuration/securityConfiguration.py +1 -1
- metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
- metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
- metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
- metadata/generated/schema/configuration/themeConfiguration.py +1 -1
- metadata/generated/schema/configuration/uiThemePreference.py +1 -1
- metadata/generated/schema/configuration/workflowSettings.py +1 -1
- metadata/generated/schema/dataInsight/__init__.py +1 -1
- metadata/generated/schema/dataInsight/custom/__init__.py +1 -1
- metadata/generated/schema/dataInsight/custom/dataInsightCustomChart.py +1 -1
- metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResult.py +1 -1
- metadata/generated/schema/dataInsight/custom/dataInsightCustomChartResultList.py +1 -1
- metadata/generated/schema/dataInsight/custom/formulaHolder.py +1 -1
- metadata/generated/schema/dataInsight/custom/lineChart.py +1 -1
- metadata/generated/schema/dataInsight/custom/summaryCard.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
- metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
- metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
- metadata/generated/schema/dataInsight/type/__init__.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
- metadata/generated/schema/email/__init__.py +1 -1
- metadata/generated/schema/email/emailRequest.py +1 -1
- metadata/generated/schema/email/emailTemplate.py +1 -1
- metadata/generated/schema/email/emailTemplatePlaceholder.py +1 -1
- metadata/generated/schema/email/smtpSettings.py +1 -1
- metadata/generated/schema/email/templateValidationReponse.py +1 -1
- metadata/generated/schema/entity/__init__.py +1 -1
- metadata/generated/schema/entity/applications/__init__.py +1 -1
- metadata/generated/schema/entity/applications/app.py +1 -1
- metadata/generated/schema/entity/applications/appExtension.py +1 -1
- metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
- metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/applicationConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addCustomProperties.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addDataProductAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addDescriptionAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addDomainAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addOwnerAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addTagsAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addTermsAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addTestCaseAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/addTierAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/lineagePropagationAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/mlTaggingAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/propagationStopConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeCustomPropertiesAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeDataProductAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeDescriptionAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeDomainAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeOwnerAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeTagsAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeTermsAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeTestCaseAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automator/removeTierAction.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/automatorAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/collateAIAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/metadataExporterAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/metadataExporterConnectors/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/metadataExporterConnectors/bigQueryConnection.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/metadataExporterConnectors/databricksConnection.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/metadataExporterConnectors/redshiftConnection.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/metadataExporterConnectors/snowflakeConnection.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/slackAppTokenConfiguration.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/autoPilotAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/collateAIQualityAgentAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/collateAITierAgentAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataRetentionConfiguration.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/helloPipelinesConfiguration.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/collateAIAppPrivateConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/internal/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/internal/collateAITierAgentAppPrivateConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/limits.py +1 -1
- metadata/generated/schema/entity/applications/createAppRequest.py +1 -1
- metadata/generated/schema/entity/applications/jobStatus.py +1 -1
- metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +1 -1
- metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
- metadata/generated/schema/entity/automations/__init__.py +1 -1
- metadata/generated/schema/entity/automations/queryRunnerRequest.py +1 -1
- metadata/generated/schema/entity/automations/response/__init__.py +1 -1
- metadata/generated/schema/entity/automations/response/queryRunnerResponse.py +1 -1
- metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
- metadata/generated/schema/entity/automations/testSparkEngineConnection.py +1 -1
- metadata/generated/schema/entity/automations/workflow.py +1 -1
- metadata/generated/schema/entity/bot.py +1 -1
- metadata/generated/schema/entity/classification/__init__.py +1 -1
- metadata/generated/schema/entity/classification/classification.py +1 -1
- metadata/generated/schema/entity/classification/tag.py +1 -1
- metadata/generated/schema/entity/data/__init__.py +1 -1
- metadata/generated/schema/entity/data/apiCollection.py +1 -1
- metadata/generated/schema/entity/data/apiEndpoint.py +1 -1
- metadata/generated/schema/entity/data/chart.py +1 -1
- metadata/generated/schema/entity/data/container.py +1 -1
- metadata/generated/schema/entity/data/dashboard.py +1 -1
- metadata/generated/schema/entity/data/dashboardDataModel.py +1 -1
- metadata/generated/schema/entity/data/dataContract.py +1 -1
- metadata/generated/schema/entity/data/database.py +1 -1
- metadata/generated/schema/entity/data/databaseSchema.py +1 -1
- metadata/generated/schema/entity/data/directory.py +1 -1
- metadata/generated/schema/entity/data/file.py +1 -1
- metadata/generated/schema/entity/data/glossary.py +1 -1
- metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
- metadata/generated/schema/entity/data/metric.py +1 -1
- metadata/generated/schema/entity/data/mlmodel.py +1 -1
- metadata/generated/schema/entity/data/pipeline.py +1 -1
- metadata/generated/schema/entity/data/query.py +1 -1
- metadata/generated/schema/entity/data/queryCostRecord.py +1 -1
- metadata/generated/schema/entity/data/queryCostSearchResult.py +1 -1
- metadata/generated/schema/entity/data/report.py +1 -1
- metadata/generated/schema/entity/data/searchIndex.py +1 -1
- metadata/generated/schema/entity/data/spreadsheet.py +1 -1
- metadata/generated/schema/entity/data/storedProcedure.py +1 -1
- metadata/generated/schema/entity/data/table.py +1 -1
- metadata/generated/schema/entity/data/topic.py +1 -1
- metadata/generated/schema/entity/data/worksheet.py +1 -1
- metadata/generated/schema/entity/datacontract/__init__.py +1 -1
- metadata/generated/schema/entity/datacontract/dataContractResult.py +1 -1
- metadata/generated/schema/entity/datacontract/qualityValidation.py +1 -1
- metadata/generated/schema/entity/datacontract/schemaValidation.py +1 -1
- metadata/generated/schema/entity/datacontract/semanticsValidation.py +1 -1
- metadata/generated/schema/entity/datacontract/slaValidation.py +1 -1
- metadata/generated/schema/entity/docStore/__init__.py +1 -1
- metadata/generated/schema/entity/docStore/document.py +1 -1
- metadata/generated/schema/entity/domains/__init__.py +1 -1
- metadata/generated/schema/entity/domains/dataProduct.py +1 -1
- metadata/generated/schema/entity/domains/domain.py +1 -1
- metadata/generated/schema/entity/events/__init__.py +1 -1
- metadata/generated/schema/entity/events/notificationTemplate.py +1 -1
- metadata/generated/schema/entity/events/webhook.py +1 -1
- metadata/generated/schema/entity/feed/__init__.py +1 -1
- metadata/generated/schema/entity/feed/assets.py +1 -1
- metadata/generated/schema/entity/feed/customProperty.py +1 -1
- metadata/generated/schema/entity/feed/description.py +1 -1
- metadata/generated/schema/entity/feed/domain.py +1 -1
- metadata/generated/schema/entity/feed/entityInfo.py +1 -1
- metadata/generated/schema/entity/feed/owner.py +1 -1
- metadata/generated/schema/entity/feed/suggestion.py +1 -1
- metadata/generated/schema/entity/feed/tag.py +1 -1
- metadata/generated/schema/entity/feed/testCaseResult.py +1 -1
- metadata/generated/schema/entity/feed/thread.py +1 -1
- metadata/generated/schema/entity/policies/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
- metadata/generated/schema/entity/policies/filters.py +1 -1
- metadata/generated/schema/entity/policies/policy.py +1 -1
- metadata/generated/schema/entity/services/__init__.py +1 -1
- metadata/generated/schema/entity/services/apiService.py +1 -1
- metadata/generated/schema/entity/services/connections/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/api/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/api/restConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/connectionBasicType.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/grafanaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/hexConnection.py +76 -0
- metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/microStrategyConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerBIReportServerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/bucketDetails.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/gcsConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerbi/s3Config.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/qlikCloudConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/sigmaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/thoughtSpotConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cassandra/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cassandra/cloudConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cassandraConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/cockroachConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/noConfigAuthenticationTypes.py +1 -1
- metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricks/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricks/azureAdSetup.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricks/databricksOAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricks/personalAccessToken.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltalake/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltalake/metastoreConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltalake/storageConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/epicConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/exasolConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/postgresConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapErpConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaHDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaSQLConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/serviceNowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/ssasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/synapseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/teradataConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/timescaleConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/drive/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/drive/customDriveConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/drive/googleDriveConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/drive/sharePointConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/alationSinkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/vertexaiConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/datafactoryConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/dbtCloudConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/flinkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/kafkaConnectConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/kinesisFirehoseConnection.py +58 -0
- metadata/generated/schema/entity/services/connections/pipeline/matillion/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/matillion/matillionETL.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/matillionConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifi/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifi/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifi/clientCertificateAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/openLineageConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/snowplowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/ssisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/stitchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/wherescapeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/security/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/security/ranger/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/security/ranger/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/security/rangerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
- metadata/generated/schema/entity/services/dashboardService.py +4 -1
- metadata/generated/schema/entity/services/databaseService.py +1 -1
- metadata/generated/schema/entity/services/driveService.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/reverseIngestionResponse.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
- metadata/generated/schema/entity/services/messagingService.py +1 -1
- metadata/generated/schema/entity/services/metadataService.py +1 -1
- metadata/generated/schema/entity/services/mlmodelService.py +1 -1
- metadata/generated/schema/entity/services/pipelineService.py +4 -1
- metadata/generated/schema/entity/services/searchService.py +1 -1
- metadata/generated/schema/entity/services/securityService.py +1 -1
- metadata/generated/schema/entity/services/serviceType.py +1 -1
- metadata/generated/schema/entity/services/storageService.py +1 -1
- metadata/generated/schema/entity/teams/__init__.py +1 -1
- metadata/generated/schema/entity/teams/persona.py +1 -1
- metadata/generated/schema/entity/teams/role.py +1 -1
- metadata/generated/schema/entity/teams/team.py +1 -1
- metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
- metadata/generated/schema/entity/teams/user.py +1 -1
- metadata/generated/schema/entity/type.py +1 -1
- metadata/generated/schema/entity/utils/__init__.py +1 -1
- metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
- metadata/generated/schema/entity/utils/servicesCount.py +1 -1
- metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
- metadata/generated/schema/events/__init__.py +1 -1
- metadata/generated/schema/events/alertMetrics.py +1 -1
- metadata/generated/schema/events/api/__init__.py +1 -1
- metadata/generated/schema/events/api/createEventSubscription.py +1 -1
- metadata/generated/schema/events/api/eventSubscriptionDiagnosticInfo.py +1 -1
- metadata/generated/schema/events/api/eventsRecord.py +1 -1
- metadata/generated/schema/events/api/testEventSubscriptionDestination.py +1 -1
- metadata/generated/schema/events/api/typedEvent.py +1 -1
- metadata/generated/schema/events/emailAlertConfig.py +1 -1
- metadata/generated/schema/events/eventFilterRule.py +1 -1
- metadata/generated/schema/events/eventSubscription.py +1 -1
- metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
- metadata/generated/schema/events/failedEvent.py +1 -1
- metadata/generated/schema/events/failedEventResponse.py +1 -1
- metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
- metadata/generated/schema/events/statusContext.py +1 -1
- metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
- metadata/generated/schema/events/subscriptionStatus.py +1 -1
- metadata/generated/schema/events/testDestinationStatus.py +1 -1
- metadata/generated/schema/governance/workflows/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/edge.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodeSubType.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodeType.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/checkEntityAttributesTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/createAndRunIngestionPipelineTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/runAppTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setEntityCertificationTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/automatedTask/setGlossaryTermStatusTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/endEvent/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/endEvent/endEvent.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/gateway/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/gateway/parallelGateway.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/startEvent/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/startEvent/startEvent.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/userTask/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/nodes/userTask/userApprovalTask.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/__init__.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/eventBasedEntityTrigger.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/noOpTrigger.py +1 -1
- metadata/generated/schema/governance/workflows/elements/triggers/periodicBatchEntityTrigger.py +1 -1
- metadata/generated/schema/governance/workflows/workflowDefinition.py +1 -1
- metadata/generated/schema/governance/workflows/workflowInstance.py +1 -1
- metadata/generated/schema/governance/workflows/workflowInstanceState.py +1 -1
- metadata/generated/schema/jobs/__init__.py +1 -1
- metadata/generated/schema/jobs/backgroundJob.py +1 -1
- metadata/generated/schema/jobs/enumCleanupArgs.py +1 -1
- metadata/generated/schema/metadataIngestion/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/apiServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/application.py +1 -1
- metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceAutoClassificationPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/driveServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/engine/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/engine/nativeEngineConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/engine/sparkEngineConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseIngestionPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/descriptionConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/ownerConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/reverseingestionconfig/tagsConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/securityServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/workflow.py +1 -1
- metadata/generated/schema/monitoring/__init__.py +1 -1
- metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
- metadata/generated/schema/scim/__init__.py +1 -1
- metadata/generated/schema/scim/scimConfiguration.py +1 -1
- metadata/generated/schema/search/__init__.py +1 -1
- metadata/generated/schema/search/aggregationRequest.py +1 -1
- metadata/generated/schema/search/searchRequest.py +1 -1
- metadata/generated/schema/security/__init__.py +1 -1
- metadata/generated/schema/security/client/__init__.py +1 -1
- metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/oidcClientConfig.py +1 -1
- metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
- metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
- metadata/generated/schema/security/credentials/__init__.py +1 -1
- metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
- metadata/generated/schema/security/credentials/azureCredentials.py +1 -1
- metadata/generated/schema/security/credentials/basicAuth.py +1 -1
- metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpExternalAccount.py +1 -1
- metadata/generated/schema/security/credentials/gcpValues.py +1 -1
- metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
- metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gitlabCredentials.py +1 -1
- metadata/generated/schema/security/credentials/kubernetesCredentials.py +1 -1
- metadata/generated/schema/security/sasl/__init__.py +1 -1
- metadata/generated/schema/security/sasl/saslClientConfig.py +1 -1
- metadata/generated/schema/security/secrets/__init__.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerProvider.py +1 -1
- metadata/generated/schema/security/securityConfiguration.py +1 -1
- metadata/generated/schema/security/ssl/__init__.py +1 -1
- metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
- metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
- metadata/generated/schema/settings/__init__.py +1 -1
- metadata/generated/schema/settings/settings.py +1 -1
- metadata/generated/schema/system/__init__.py +1 -1
- metadata/generated/schema/system/entityError.py +1 -1
- metadata/generated/schema/system/eventPublisherJob.py +1 -1
- metadata/generated/schema/system/indexingError.py +1 -1
- metadata/generated/schema/system/limitsResponse.py +1 -1
- metadata/generated/schema/system/securityValidationResponse.py +1 -1
- metadata/generated/schema/system/ui/__init__.py +1 -1
- metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
- metadata/generated/schema/system/ui/navigationItem.py +1 -1
- metadata/generated/schema/system/ui/page.py +1 -1
- metadata/generated/schema/system/ui/tab.py +1 -1
- metadata/generated/schema/system/ui/uiCustomization.py +1 -1
- metadata/generated/schema/system/validationResponse.py +1 -1
- metadata/generated/schema/tests/__init__.py +1 -1
- metadata/generated/schema/tests/assigned.py +1 -1
- metadata/generated/schema/tests/basic.py +1 -1
- metadata/generated/schema/tests/customMetric.py +1 -1
- metadata/generated/schema/tests/dataQualityReport.py +1 -1
- metadata/generated/schema/tests/resolved.py +1 -1
- metadata/generated/schema/tests/testCase.py +1 -1
- metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
- metadata/generated/schema/tests/testDefinition.py +1 -1
- metadata/generated/schema/tests/testSuite.py +1 -1
- metadata/generated/schema/type/__init__.py +1 -1
- metadata/generated/schema/type/apiSchema.py +1 -1
- metadata/generated/schema/type/assetCertification.py +1 -1
- metadata/generated/schema/type/auditLog.py +1 -1
- metadata/generated/schema/type/basic.py +1 -1
- metadata/generated/schema/type/bulkOperationResult.py +1 -1
- metadata/generated/schema/type/changeEvent.py +1 -1
- metadata/generated/schema/type/changeEventType.py +1 -1
- metadata/generated/schema/type/changeSummaryMap.py +1 -1
- metadata/generated/schema/type/collectionDescriptor.py +1 -1
- metadata/generated/schema/type/contextRecognizer.py +1 -1
- metadata/generated/schema/type/contractExecutionStatus.py +1 -1
- metadata/generated/schema/type/csvDocumentation.py +1 -1
- metadata/generated/schema/type/csvErrorType.py +1 -1
- metadata/generated/schema/type/csvFile.py +1 -1
- metadata/generated/schema/type/csvImportResult.py +1 -1
- metadata/generated/schema/type/customProperties/__init__.py +1 -1
- metadata/generated/schema/type/customProperties/complexTypes.py +1 -1
- metadata/generated/schema/type/customProperties/enumConfig.py +1 -1
- metadata/generated/schema/type/customProperties/tableConfig.py +1 -1
- metadata/generated/schema/type/customProperty.py +1 -1
- metadata/generated/schema/type/customRecognizer.py +1 -1
- metadata/generated/schema/type/dailyCount.py +1 -1
- metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
- metadata/generated/schema/type/denyListRecognizer.py +1 -1
- metadata/generated/schema/type/entityHierarchy.py +1 -1
- metadata/generated/schema/type/entityHistory.py +1 -1
- metadata/generated/schema/type/entityLineage.py +1 -1
- metadata/generated/schema/type/entityProfile.py +1 -1
- metadata/generated/schema/type/entityReference.py +1 -1
- metadata/generated/schema/type/entityReferenceList.py +1 -1
- metadata/generated/schema/type/entityRelationship/__init__.py +1 -1
- metadata/generated/schema/type/entityRelationship/nodeInformation.py +1 -1
- metadata/generated/schema/type/entityUsage.py +1 -1
- metadata/generated/schema/type/filterPattern.py +1 -1
- metadata/generated/schema/type/function.py +1 -1
- metadata/generated/schema/type/include.py +1 -1
- metadata/generated/schema/type/jdbcConnection.py +1 -1
- metadata/generated/schema/type/layerPaging.py +1 -1
- metadata/generated/schema/type/lifeCycle.py +1 -1
- metadata/generated/schema/type/paging.py +1 -1
- metadata/generated/schema/type/patternRecognizer.py +1 -1
- metadata/generated/schema/type/personaPreferences.py +1 -1
- metadata/generated/schema/type/piiEntity.py +1 -1
- metadata/generated/schema/type/predefinedRecognizer.py +1 -1
- metadata/generated/schema/type/profile.py +1 -1
- metadata/generated/schema/type/queryParserData.py +1 -1
- metadata/generated/schema/type/reaction.py +1 -1
- metadata/generated/schema/type/recognizer.py +1 -1
- metadata/generated/schema/type/recognizerFeedback.py +1 -1
- metadata/generated/schema/type/recognizers/__init__.py +1 -1
- metadata/generated/schema/type/recognizers/patterns.py +1 -1
- metadata/generated/schema/type/recognizers/regexFlags.py +1 -1
- metadata/generated/schema/type/schedule.py +1 -1
- metadata/generated/schema/type/schema.py +1 -1
- metadata/generated/schema/type/status.py +1 -1
- metadata/generated/schema/type/tableQuery.py +1 -1
- metadata/generated/schema/type/tableUsageCount.py +1 -1
- metadata/generated/schema/type/tagLabel.py +1 -1
- metadata/generated/schema/type/usageDetails.py +1 -1
- metadata/generated/schema/type/usageRequest.py +1 -1
- metadata/generated/schema/type/votes.py +1 -1
- metadata/ingestion/ometa/mixins/es_mixin.py +5 -1
- metadata/ingestion/source/dashboard/grafana/models.py +1 -1
- metadata/ingestion/source/dashboard/hex/client.py +104 -0
- metadata/ingestion/source/dashboard/hex/connection.py +60 -0
- metadata/ingestion/source/dashboard/hex/metadata.py +363 -0
- metadata/ingestion/source/dashboard/hex/models.py +143 -0
- metadata/ingestion/source/dashboard/hex/query_fetcher.py +503 -0
- metadata/ingestion/source/dashboard/hex/service_spec.py +18 -0
- metadata/ingestion/source/dashboard/hex/warehouse_queries.py +225 -0
- metadata/ingestion/source/dashboard/powerbi/databricks_parser.py +102 -0
- metadata/ingestion/source/dashboard/powerbi/metadata.py +285 -90
- metadata/ingestion/source/dashboard/tableau/client.py +8 -4
- metadata/ingestion/source/database/bigquery/metadata.py +0 -8
- metadata/ingestion/source/database/databricks/client.py +120 -0
- metadata/ingestion/source/database/redshift/utils.py +2 -1
- metadata/ingestion/source/database/snowflake/lineage.py +44 -21
- metadata/ingestion/source/database/snowflake/queries.py +2 -0
- metadata/ingestion/source/database/snowflake/query_parser.py +12 -2
- metadata/ingestion/source/database/snowflake/usage.py +97 -0
- metadata/ingestion/source/database/unitycatalog/client.py +1 -1
- metadata/ingestion/source/database/unitycatalog/lineage.py +180 -5
- metadata/ingestion/source/database/unitycatalog/models.py +18 -2
- metadata/ingestion/source/pipeline/databrickspipeline/connection.py +1 -0
- metadata/ingestion/source/pipeline/databrickspipeline/kafka_parser.py +501 -0
- metadata/ingestion/source/pipeline/databrickspipeline/metadata.py +905 -36
- metadata/ingestion/source/pipeline/databrickspipeline/models.py +19 -3
- metadata/ingestion/source/pipeline/kafkaconnect/client.py +362 -30
- metadata/ingestion/source/pipeline/kafkaconnect/metadata.py +1666 -62
- metadata/ingestion/source/pipeline/kafkaconnect/models.py +35 -1
- metadata/ingestion/source/storage/s3/metadata.py +9 -2
- metadata/parsers/json_schema_parser.py +41 -1
- metadata/utils/deprecation.py +9 -6
- metadata/utils/fqn.py +59 -2
- {openmetadata_ingestion-1.10.0.0.dist-info → openmetadata_ingestion-1.10.2.0.dist-info}/METADATA +550 -550
- {openmetadata_ingestion-1.10.0.0.dist-info → openmetadata_ingestion-1.10.2.0.dist-info}/RECORD +854 -838
- {openmetadata_ingestion-1.10.0.0.dist-info → openmetadata_ingestion-1.10.2.0.dist-info}/LICENSE +0 -0
- {openmetadata_ingestion-1.10.0.0.dist-info → openmetadata_ingestion-1.10.2.0.dist-info}/WHEEL +0 -0
- {openmetadata_ingestion-1.10.0.0.dist-info → openmetadata_ingestion-1.10.2.0.dist-info}/entry_points.txt +0 -0
- {openmetadata_ingestion-1.10.0.0.dist-info → openmetadata_ingestion-1.10.2.0.dist-info}/top_level.txt +0 -0
|
@@ -13,7 +13,7 @@ KafkaConnect source to extract metadata from OM UI
|
|
|
13
13
|
"""
|
|
14
14
|
import traceback
|
|
15
15
|
from datetime import datetime
|
|
16
|
-
from typing import Iterable, Optional
|
|
16
|
+
from typing import Any, Iterable, List, Optional
|
|
17
17
|
|
|
18
18
|
from metadata.generated.schema.api.data.createPipeline import CreatePipelineRequest
|
|
19
19
|
from metadata.generated.schema.api.lineage.addLineage import AddLineageRequest
|
|
@@ -30,9 +30,11 @@ from metadata.generated.schema.entity.data.topic import Topic
|
|
|
30
30
|
from metadata.generated.schema.entity.services.connections.pipeline.kafkaConnectConnection import (
|
|
31
31
|
KafkaConnectConnection,
|
|
32
32
|
)
|
|
33
|
+
from metadata.generated.schema.entity.services.databaseService import DatabaseService
|
|
33
34
|
from metadata.generated.schema.entity.services.ingestionPipelines.status import (
|
|
34
35
|
StackTraceError,
|
|
35
36
|
)
|
|
37
|
+
from metadata.generated.schema.entity.services.messagingService import MessagingService
|
|
36
38
|
from metadata.generated.schema.metadataIngestion.workflow import (
|
|
37
39
|
Source as WorkflowSource,
|
|
38
40
|
)
|
|
@@ -42,15 +44,28 @@ from metadata.generated.schema.type.basic import (
|
|
|
42
44
|
SourceUrl,
|
|
43
45
|
Timestamp,
|
|
44
46
|
)
|
|
45
|
-
from metadata.generated.schema.type.entityLineage import
|
|
47
|
+
from metadata.generated.schema.type.entityLineage import (
|
|
48
|
+
ColumnLineage,
|
|
49
|
+
EntitiesEdge,
|
|
50
|
+
LineageDetails,
|
|
51
|
+
)
|
|
46
52
|
from metadata.generated.schema.type.entityLineage import Source as LineageSource
|
|
47
53
|
from metadata.generated.schema.type.entityReference import EntityReference
|
|
48
54
|
from metadata.ingestion.api.models import Either
|
|
49
55
|
from metadata.ingestion.api.steps import InvalidSourceException
|
|
56
|
+
from metadata.ingestion.lineage.sql_lineage import get_column_fqn
|
|
50
57
|
from metadata.ingestion.models.pipeline_status import OMetaPipelineStatus
|
|
51
58
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata, T
|
|
59
|
+
from metadata.ingestion.source.pipeline.kafkaconnect.client import (
|
|
60
|
+
CONNECTOR_CLASS_TO_SERVICE_TYPE,
|
|
61
|
+
MESSAGING_ENDPOINT_KEYS,
|
|
62
|
+
SERVICE_TYPE_HOSTNAME_KEYS,
|
|
63
|
+
parse_cdc_topic_name,
|
|
64
|
+
)
|
|
52
65
|
from metadata.ingestion.source.pipeline.kafkaconnect.models import (
|
|
66
|
+
ConnectorType,
|
|
53
67
|
KafkaConnectPipelineDetails,
|
|
68
|
+
KafkaConnectTopics,
|
|
54
69
|
)
|
|
55
70
|
from metadata.ingestion.source.pipeline.pipeline_service import PipelineServiceSource
|
|
56
71
|
from metadata.utils import fqn
|
|
@@ -67,6 +82,22 @@ STATUS_MAP = {
|
|
|
67
82
|
"UNASSIGNED": StatusType.Pending.value,
|
|
68
83
|
}
|
|
69
84
|
|
|
85
|
+
# CDC envelope field names used for Debezium detection and parsing
|
|
86
|
+
CDC_ENVELOPE_FIELDS = {"after", "before", "op"}
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def get_field_name(field_name: Any) -> str:
|
|
90
|
+
"""
|
|
91
|
+
Extract string name from FieldName object or string.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
field_name: FieldName object with .root attribute, or plain string
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
String representation of the field name
|
|
98
|
+
"""
|
|
99
|
+
return field_name.root if hasattr(field_name, "root") else str(field_name)
|
|
100
|
+
|
|
70
101
|
|
|
71
102
|
class KafkaconnectSource(PipelineServiceSource):
|
|
72
103
|
"""
|
|
@@ -74,6 +105,14 @@ class KafkaconnectSource(PipelineServiceSource):
|
|
|
74
105
|
Pipeline metadata from Kafka Connect
|
|
75
106
|
"""
|
|
76
107
|
|
|
108
|
+
def __init__(self, config: WorkflowSource, metadata: OpenMetadata):
|
|
109
|
+
super().__init__(config, metadata)
|
|
110
|
+
# Track lineage results for summary reporting
|
|
111
|
+
self.lineage_results = []
|
|
112
|
+
# Cache services for hostname matching (lazy loaded)
|
|
113
|
+
self._database_services_cache = None
|
|
114
|
+
self._messaging_services_cache = None
|
|
115
|
+
|
|
77
116
|
@classmethod
|
|
78
117
|
def create(
|
|
79
118
|
cls, config_dict, metadata: OpenMetadata, pipeline_name: Optional[str] = None
|
|
@@ -86,6 +125,257 @@ class KafkaconnectSource(PipelineServiceSource):
|
|
|
86
125
|
)
|
|
87
126
|
return cls(config, metadata)
|
|
88
127
|
|
|
128
|
+
@property
|
|
129
|
+
def database_services(self) -> List[DatabaseService]:
|
|
130
|
+
"""Lazily load and cache database services for hostname matching"""
|
|
131
|
+
if self._database_services_cache is None:
|
|
132
|
+
self._database_services_cache = list(
|
|
133
|
+
self.metadata.list_all_entities(entity=DatabaseService, limit=100)
|
|
134
|
+
)
|
|
135
|
+
logger.debug(
|
|
136
|
+
f"Cached {len(self._database_services_cache)} database services for hostname matching"
|
|
137
|
+
)
|
|
138
|
+
return self._database_services_cache
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def messaging_services(self) -> List[MessagingService]:
|
|
142
|
+
"""Lazily load and cache messaging services for broker matching"""
|
|
143
|
+
if self._messaging_services_cache is None:
|
|
144
|
+
self._messaging_services_cache = list(
|
|
145
|
+
self.metadata.list_all_entities(entity=MessagingService, limit=100)
|
|
146
|
+
)
|
|
147
|
+
logger.debug(
|
|
148
|
+
f"Cached {len(self._messaging_services_cache)} messaging services for broker matching"
|
|
149
|
+
)
|
|
150
|
+
return self._messaging_services_cache
|
|
151
|
+
|
|
152
|
+
def _extract_hostname(self, host_string: str) -> str:
|
|
153
|
+
"""
|
|
154
|
+
Extract just the hostname from a connection string by removing protocol and port.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
host_string: Connection string (e.g., "SASL_SSL://host:9092", "host:3306", "host")
|
|
158
|
+
|
|
159
|
+
Returns:
|
|
160
|
+
Just the hostname part (e.g., "host")
|
|
161
|
+
"""
|
|
162
|
+
if not host_string:
|
|
163
|
+
return ""
|
|
164
|
+
|
|
165
|
+
# Remove protocol prefix (e.g., "SASL_SSL://", "http://", "jdbc:mysql://")
|
|
166
|
+
if "://" in host_string:
|
|
167
|
+
host_string = host_string.split("://", 1)[1]
|
|
168
|
+
|
|
169
|
+
# Remove port suffix (e.g., ":9092", ":3306")
|
|
170
|
+
if ":" in host_string:
|
|
171
|
+
host_string = host_string.split(":")[0]
|
|
172
|
+
|
|
173
|
+
return host_string.strip()
|
|
174
|
+
|
|
175
|
+
def find_database_service_by_hostname(
|
|
176
|
+
self, service_type: str, hostname: str
|
|
177
|
+
) -> Optional[str]:
|
|
178
|
+
"""
|
|
179
|
+
Find database service by matching serviceType and hostname.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
service_type: OpenMetadata service type (e.g., "Mysql", "Postgres")
|
|
183
|
+
hostname: Hostname from Kafka Connect config (e.g., "localhost:3306", "db.example.com")
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Service name if found, None otherwise
|
|
187
|
+
"""
|
|
188
|
+
try:
|
|
189
|
+
# Use cached database services
|
|
190
|
+
all_services = self.database_services
|
|
191
|
+
|
|
192
|
+
# Filter by serviceType first to reduce the search space
|
|
193
|
+
filtered_services = [
|
|
194
|
+
svc
|
|
195
|
+
for svc in all_services
|
|
196
|
+
if svc.serviceType and svc.serviceType.value == service_type
|
|
197
|
+
]
|
|
198
|
+
|
|
199
|
+
logger.debug(
|
|
200
|
+
f"Found {len(filtered_services)} services with serviceType={service_type} "
|
|
201
|
+
f"out of {len(all_services)} total database services"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Extract just the hostname (no protocol, no port)
|
|
205
|
+
connector_host = self._extract_hostname(hostname).lower()
|
|
206
|
+
|
|
207
|
+
# Match by hostname in service connection config
|
|
208
|
+
for service in filtered_services:
|
|
209
|
+
if not service.connection or not service.connection.config:
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
service_config = service.connection.config
|
|
213
|
+
|
|
214
|
+
# Extract hostPort from service config
|
|
215
|
+
# Different services use different field names
|
|
216
|
+
host_port = None
|
|
217
|
+
if hasattr(service_config, "hostPort") and service_config.hostPort:
|
|
218
|
+
host_port = service_config.hostPort
|
|
219
|
+
elif hasattr(service_config, "host") and service_config.host:
|
|
220
|
+
host_port = service_config.host
|
|
221
|
+
|
|
222
|
+
if host_port:
|
|
223
|
+
# Extract just the hostname (no protocol, no port)
|
|
224
|
+
service_host = self._extract_hostname(host_port).lower()
|
|
225
|
+
|
|
226
|
+
# Match hostname (case-insensitive)
|
|
227
|
+
if service_host == connector_host:
|
|
228
|
+
logger.info(
|
|
229
|
+
f"Matched database service: {service.name} "
|
|
230
|
+
f"(type={service_type}, hostname={connector_host})"
|
|
231
|
+
)
|
|
232
|
+
return str(
|
|
233
|
+
service.name.root
|
|
234
|
+
if hasattr(service.name, "root")
|
|
235
|
+
else service.name
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
logger.debug(
|
|
239
|
+
f"No database service found matching serviceType={service_type}, hostname={connector_host}"
|
|
240
|
+
)
|
|
241
|
+
return None
|
|
242
|
+
|
|
243
|
+
except Exception as exc:
|
|
244
|
+
logger.debug(traceback.format_exc())
|
|
245
|
+
logger.warning(f"Unable to find database service by hostname: {exc}")
|
|
246
|
+
return None
|
|
247
|
+
|
|
248
|
+
def find_messaging_service_by_brokers(self, brokers: str) -> Optional[str]:
|
|
249
|
+
"""
|
|
250
|
+
Find messaging service by matching broker endpoints.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
brokers: Comma-separated broker list from Kafka Connect config
|
|
254
|
+
(e.g., "SASL_SSL://broker1:9092,broker2:9092")
|
|
255
|
+
|
|
256
|
+
Returns:
|
|
257
|
+
Service name if found, None otherwise
|
|
258
|
+
"""
|
|
259
|
+
try:
|
|
260
|
+
# Use cached messaging services
|
|
261
|
+
all_services = self.messaging_services
|
|
262
|
+
|
|
263
|
+
logger.debug(f"Searching for messaging service matching brokers: {brokers}")
|
|
264
|
+
|
|
265
|
+
# Parse connector brokers into a set of hostnames (no protocol, no port)
|
|
266
|
+
connector_brokers = set(
|
|
267
|
+
self._extract_hostname(broker.strip()).lower()
|
|
268
|
+
for broker in brokers.split(",")
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
# Match by brokers in service connection config
|
|
272
|
+
for service in all_services:
|
|
273
|
+
if not service.connection or not service.connection.config:
|
|
274
|
+
continue
|
|
275
|
+
|
|
276
|
+
service_config = service.connection.config
|
|
277
|
+
|
|
278
|
+
# Extract bootstrapServers from Kafka connection
|
|
279
|
+
if (
|
|
280
|
+
hasattr(service_config, "bootstrapServers")
|
|
281
|
+
and service_config.bootstrapServers
|
|
282
|
+
):
|
|
283
|
+
# Parse service brokers into hostnames (no protocol, no port)
|
|
284
|
+
service_brokers = set(
|
|
285
|
+
self._extract_hostname(broker.strip()).lower()
|
|
286
|
+
for broker in service_config.bootstrapServers.split(",")
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Check if any broker hostname matches
|
|
290
|
+
matched_brokers = (
|
|
291
|
+
connector_brokers & service_brokers
|
|
292
|
+
) # Set intersection
|
|
293
|
+
if matched_brokers:
|
|
294
|
+
logger.info(
|
|
295
|
+
f"Matched messaging service: {service.name} "
|
|
296
|
+
f"(matched broker hostnames: {matched_brokers})"
|
|
297
|
+
)
|
|
298
|
+
return str(
|
|
299
|
+
service.name.root
|
|
300
|
+
if hasattr(service.name, "root")
|
|
301
|
+
else service.name
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
logger.debug(
|
|
305
|
+
f"No messaging service found matching broker hostnames: {connector_brokers}"
|
|
306
|
+
)
|
|
307
|
+
return None
|
|
308
|
+
|
|
309
|
+
except Exception as exc:
|
|
310
|
+
logger.debug(traceback.format_exc())
|
|
311
|
+
logger.warning(f"Unable to find messaging service by brokers: {exc}")
|
|
312
|
+
return None
|
|
313
|
+
|
|
314
|
+
def get_service_from_connector_config(
|
|
315
|
+
self, pipeline_details: KafkaConnectPipelineDetails
|
|
316
|
+
) -> tuple[Optional[str], Optional[str]]:
|
|
317
|
+
"""
|
|
318
|
+
Extract and match database and messaging service names from connector configuration.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
pipeline_details: Kafka Connect pipeline details with config
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
Tuple of (database_service_name, messaging_service_name)
|
|
325
|
+
Either or both can be None if not found
|
|
326
|
+
"""
|
|
327
|
+
db_service_name = None
|
|
328
|
+
messaging_service_name = None
|
|
329
|
+
|
|
330
|
+
if not pipeline_details.config:
|
|
331
|
+
return db_service_name, messaging_service_name
|
|
332
|
+
|
|
333
|
+
try:
|
|
334
|
+
# Extract connector class to determine service type
|
|
335
|
+
connector_class = pipeline_details.config.get("connector.class", "")
|
|
336
|
+
|
|
337
|
+
# Get the class name without package (e.g., "MySqlCdcSource" from "io.debezium.connector.mysql.MySqlCdcSource")
|
|
338
|
+
if connector_class:
|
|
339
|
+
class_name = connector_class.split(".")[-1]
|
|
340
|
+
service_type = CONNECTOR_CLASS_TO_SERVICE_TYPE.get(class_name)
|
|
341
|
+
|
|
342
|
+
if service_type:
|
|
343
|
+
# Extract hostname from connector config
|
|
344
|
+
hostname_keys = SERVICE_TYPE_HOSTNAME_KEYS.get(service_type, [])
|
|
345
|
+
for key in hostname_keys:
|
|
346
|
+
hostname = pipeline_details.config.get(key)
|
|
347
|
+
if hostname:
|
|
348
|
+
logger.debug(
|
|
349
|
+
f"Found hostname '{hostname}' for service type '{service_type}' "
|
|
350
|
+
f"from config key '{key}'"
|
|
351
|
+
)
|
|
352
|
+
# Match database service
|
|
353
|
+
db_service_name = self.find_database_service_by_hostname(
|
|
354
|
+
service_type=service_type, hostname=hostname
|
|
355
|
+
)
|
|
356
|
+
if db_service_name:
|
|
357
|
+
break
|
|
358
|
+
|
|
359
|
+
# Extract broker endpoints for messaging service
|
|
360
|
+
for key in MESSAGING_ENDPOINT_KEYS:
|
|
361
|
+
brokers = pipeline_details.config.get(key)
|
|
362
|
+
if brokers:
|
|
363
|
+
logger.debug(f"Found brokers '{brokers}' from config key '{key}'")
|
|
364
|
+
messaging_service_name = self.find_messaging_service_by_brokers(
|
|
365
|
+
brokers=brokers
|
|
366
|
+
)
|
|
367
|
+
if messaging_service_name:
|
|
368
|
+
break
|
|
369
|
+
|
|
370
|
+
return db_service_name, messaging_service_name
|
|
371
|
+
|
|
372
|
+
except Exception as exc:
|
|
373
|
+
logger.debug(traceback.format_exc())
|
|
374
|
+
logger.warning(
|
|
375
|
+
f"Unable to extract service names from connector config: {exc}"
|
|
376
|
+
)
|
|
377
|
+
return None, None
|
|
378
|
+
|
|
89
379
|
def yield_pipeline(
|
|
90
380
|
self, pipeline_details: KafkaConnectPipelineDetails
|
|
91
381
|
) -> Iterable[Either[CreatePipelineRequest]]:
|
|
@@ -105,9 +395,11 @@ class KafkaconnectSource(PipelineServiceSource):
|
|
|
105
395
|
for task in pipeline_details.tasks or []
|
|
106
396
|
],
|
|
107
397
|
service=self.context.get().pipeline_service,
|
|
108
|
-
description=
|
|
109
|
-
|
|
110
|
-
|
|
398
|
+
description=(
|
|
399
|
+
Markdown(pipeline_details.description)
|
|
400
|
+
if pipeline_details.description
|
|
401
|
+
else None
|
|
402
|
+
),
|
|
111
403
|
)
|
|
112
404
|
yield Either(right=pipeline_request)
|
|
113
405
|
self.register_record(pipeline_request=pipeline_request)
|
|
@@ -130,9 +422,16 @@ class KafkaconnectSource(PipelineServiceSource):
|
|
|
130
422
|
dataset_details = pipeline_details.dataset
|
|
131
423
|
if dataset_details:
|
|
132
424
|
if dataset_details.dataset_type == Table:
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
425
|
+
# Try to match database service from connector config first
|
|
426
|
+
db_service_name, _ = self.get_service_from_connector_config(
|
|
427
|
+
pipeline_details
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# Priority 1: Use matched service from connector config
|
|
431
|
+
if db_service_name:
|
|
432
|
+
logger.info(
|
|
433
|
+
f"Using matched database service '{db_service_name}' from connector config"
|
|
434
|
+
)
|
|
136
435
|
dataset_entity = self.metadata.get_by_name(
|
|
137
436
|
entity=dataset_details.dataset_type,
|
|
138
437
|
fqn=fqn.build(
|
|
@@ -141,31 +440,98 @@ class KafkaconnectSource(PipelineServiceSource):
|
|
|
141
440
|
table_name=dataset_details.table,
|
|
142
441
|
database_name=None,
|
|
143
442
|
schema_name=dataset_details.database,
|
|
144
|
-
service_name=
|
|
443
|
+
service_name=db_service_name,
|
|
145
444
|
),
|
|
146
445
|
)
|
|
147
|
-
|
|
148
446
|
if dataset_entity:
|
|
149
447
|
return dataset_entity
|
|
150
448
|
|
|
449
|
+
# Priority 2: Use configured dbServiceNames
|
|
450
|
+
if (
|
|
451
|
+
hasattr(self.source_config, "lineageInformation")
|
|
452
|
+
and hasattr(
|
|
453
|
+
self.source_config.lineageInformation, "dbServiceNames"
|
|
454
|
+
)
|
|
455
|
+
and self.source_config.lineageInformation.dbServiceNames
|
|
456
|
+
):
|
|
457
|
+
for (
|
|
458
|
+
dbservicename
|
|
459
|
+
) in self.source_config.lineageInformation.dbServiceNames:
|
|
460
|
+
dataset_entity = self.metadata.get_by_name(
|
|
461
|
+
entity=dataset_details.dataset_type,
|
|
462
|
+
fqn=fqn.build(
|
|
463
|
+
metadata=self.metadata,
|
|
464
|
+
entity_type=dataset_details.dataset_type,
|
|
465
|
+
table_name=dataset_details.table,
|
|
466
|
+
database_name=None,
|
|
467
|
+
schema_name=dataset_details.database,
|
|
468
|
+
service_name=dbservicename,
|
|
469
|
+
),
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
if dataset_entity:
|
|
473
|
+
return dataset_entity
|
|
474
|
+
|
|
475
|
+
# Priority 3: Fallback to search across all database services
|
|
476
|
+
logger.info(
|
|
477
|
+
f"No service match found - searching all database services for table {dataset_details.table}"
|
|
478
|
+
)
|
|
479
|
+
# Build search string: schema.table format (with proper quoting for special chars)
|
|
480
|
+
search_string = (
|
|
481
|
+
f"{fqn.quote_name(dataset_details.database)}.{fqn.quote_name(dataset_details.table)}"
|
|
482
|
+
if dataset_details.database
|
|
483
|
+
else fqn.quote_name(dataset_details.table)
|
|
484
|
+
)
|
|
485
|
+
dataset_entity = self.metadata.search_in_any_service(
|
|
486
|
+
entity_type=Table,
|
|
487
|
+
fqn_search_string=search_string,
|
|
488
|
+
)
|
|
489
|
+
if dataset_entity:
|
|
490
|
+
logger.debug(
|
|
491
|
+
f"Found table {dataset_details.table} via search in service {dataset_entity.service.name if dataset_entity.service else 'unknown'}"
|
|
492
|
+
)
|
|
493
|
+
return dataset_entity
|
|
494
|
+
|
|
151
495
|
if dataset_details.dataset_type == Container:
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
storage_entity = self.metadata.get_by_name(
|
|
158
|
-
entity=dataset_details.dataset_type,
|
|
159
|
-
fqn=fqn.build(
|
|
160
|
-
metadata=self.metadata,
|
|
161
|
-
entity_type=dataset_details.dataset_type,
|
|
162
|
-
container_name=dataset_details.container_name,
|
|
163
|
-
service_name=storageservicename,
|
|
164
|
-
parent_container=None,
|
|
165
|
-
),
|
|
496
|
+
# If storageServiceNames is configured, use it to build FQN directly
|
|
497
|
+
if (
|
|
498
|
+
hasattr(self.source_config, "lineageInformation")
|
|
499
|
+
and hasattr(
|
|
500
|
+
self.source_config.lineageInformation, "storageServiceNames"
|
|
166
501
|
)
|
|
502
|
+
and self.source_config.lineageInformation.storageServiceNames
|
|
503
|
+
):
|
|
504
|
+
for (
|
|
505
|
+
storageservicename
|
|
506
|
+
) in self.source_config.lineageInformation.storageServiceNames:
|
|
507
|
+
storage_entity = self.metadata.get_by_name(
|
|
508
|
+
entity=dataset_details.dataset_type,
|
|
509
|
+
fqn=fqn.build(
|
|
510
|
+
metadata=self.metadata,
|
|
511
|
+
entity_type=dataset_details.dataset_type,
|
|
512
|
+
container_name=dataset_details.container_name,
|
|
513
|
+
service_name=storageservicename,
|
|
514
|
+
parent_container=None,
|
|
515
|
+
),
|
|
516
|
+
)
|
|
167
517
|
|
|
518
|
+
if storage_entity:
|
|
519
|
+
return storage_entity
|
|
520
|
+
else:
|
|
521
|
+
# Search across all storage services
|
|
522
|
+
logger.info(
|
|
523
|
+
f"storageServiceNames not configured - searching all storage services for container {dataset_details.container_name}"
|
|
524
|
+
)
|
|
525
|
+
storage_entity = self.metadata.search_in_any_service(
|
|
526
|
+
entity_type=Container,
|
|
527
|
+
fqn_search_string=fqn.quote_name(
|
|
528
|
+
dataset_details.container_name
|
|
529
|
+
),
|
|
530
|
+
)
|
|
168
531
|
if storage_entity:
|
|
532
|
+
logger.debug(
|
|
533
|
+
f"Found container {dataset_details.container_name} via search in service {storage_entity.service.name if storage_entity.service else 'unknown'}"
|
|
534
|
+
)
|
|
169
535
|
return storage_entity
|
|
170
536
|
|
|
171
537
|
except Exception as exc:
|
|
@@ -174,6 +540,528 @@ class KafkaconnectSource(PipelineServiceSource):
|
|
|
174
540
|
|
|
175
541
|
return None
|
|
176
542
|
|
|
543
|
+
def _get_entity_column_fqn(self, entity: T, column_name: str) -> Optional[str]:
|
|
544
|
+
"""
|
|
545
|
+
Get column FQN for any supported entity type.
|
|
546
|
+
Dispatch based on entity type.
|
|
547
|
+
|
|
548
|
+
Args:
|
|
549
|
+
entity: Table or Topic entity
|
|
550
|
+
column_name: Column/field name
|
|
551
|
+
|
|
552
|
+
Returns:
|
|
553
|
+
Fully qualified column name or None
|
|
554
|
+
"""
|
|
555
|
+
if isinstance(entity, Topic):
|
|
556
|
+
return self._get_topic_field_fqn(entity, column_name)
|
|
557
|
+
elif isinstance(entity, Table):
|
|
558
|
+
return get_column_fqn(table_entity=entity, column=column_name)
|
|
559
|
+
else:
|
|
560
|
+
logger.warning(
|
|
561
|
+
f"Unsupported entity type for column FQN: {type(entity).__name__}"
|
|
562
|
+
)
|
|
563
|
+
return None
|
|
564
|
+
|
|
565
|
+
def _parse_cdc_schema_columns(self, schema_text: str) -> List[str]:
|
|
566
|
+
"""
|
|
567
|
+
Parse Debezium CDC schema JSON to extract table column names.
|
|
568
|
+
|
|
569
|
+
Looks for columns in 'after' or 'before' fields within the schema,
|
|
570
|
+
handling nullable oneOf structures.
|
|
571
|
+
|
|
572
|
+
Args:
|
|
573
|
+
schema_text: Raw JSON schema string from topic
|
|
574
|
+
|
|
575
|
+
Returns:
|
|
576
|
+
List of column names, or empty list if parsing fails
|
|
577
|
+
"""
|
|
578
|
+
try:
|
|
579
|
+
import json
|
|
580
|
+
|
|
581
|
+
schema_dict = json.loads(schema_text)
|
|
582
|
+
|
|
583
|
+
# Look for 'after' or 'before' field in the schema
|
|
584
|
+
for field_name in ["after", "before"]:
|
|
585
|
+
if field_name not in schema_dict.get("properties", {}):
|
|
586
|
+
continue
|
|
587
|
+
|
|
588
|
+
field_def = schema_dict["properties"][field_name]
|
|
589
|
+
|
|
590
|
+
# Handle oneOf (nullable types)
|
|
591
|
+
if "oneOf" not in field_def:
|
|
592
|
+
continue
|
|
593
|
+
|
|
594
|
+
for option in field_def["oneOf"]:
|
|
595
|
+
if isinstance(option, dict) and option.get("type") == "object":
|
|
596
|
+
columns = list(option.get("properties", {}).keys())
|
|
597
|
+
logger.debug(
|
|
598
|
+
f"Parsed {len(columns)} columns from CDC '{field_name}' field"
|
|
599
|
+
)
|
|
600
|
+
return columns
|
|
601
|
+
|
|
602
|
+
except Exception as exc:
|
|
603
|
+
logger.debug(f"Unable to parse CDC schema text: {exc}")
|
|
604
|
+
|
|
605
|
+
return []
|
|
606
|
+
|
|
607
|
+
def _extract_columns_from_entity(self, entity: T) -> List[str]:
|
|
608
|
+
"""
|
|
609
|
+
Extract column/field names from Table or Topic entity.
|
|
610
|
+
|
|
611
|
+
For Debezium CDC topics, extracts columns from the 'after' or 'before' field
|
|
612
|
+
which contains the actual table structure, not the CDC envelope fields.
|
|
613
|
+
|
|
614
|
+
Args:
|
|
615
|
+
entity: Table or Topic entity
|
|
616
|
+
|
|
617
|
+
Returns:
|
|
618
|
+
List of column/field names
|
|
619
|
+
"""
|
|
620
|
+
if isinstance(entity, Table):
|
|
621
|
+
return [col.name.root for col in entity.columns or []]
|
|
622
|
+
|
|
623
|
+
if hasattr(entity, "messageSchema") and entity.messageSchema:
|
|
624
|
+
schema_fields = entity.messageSchema.schemaFields or []
|
|
625
|
+
|
|
626
|
+
# Check if this is a Debezium CDC envelope structure
|
|
627
|
+
# Can be either flat (top-level: op, before, after) or nested (Envelope -> op, before, after)
|
|
628
|
+
field_names = {get_field_name(f.name) for f in schema_fields}
|
|
629
|
+
is_debezium_cdc = CDC_ENVELOPE_FIELDS.issubset(field_names)
|
|
630
|
+
|
|
631
|
+
# Fallback: Check schemaText for CDC structure if schemaFields doesn't indicate CDC
|
|
632
|
+
if not is_debezium_cdc and entity.messageSchema.schemaText:
|
|
633
|
+
try:
|
|
634
|
+
import json
|
|
635
|
+
|
|
636
|
+
schema_dict = json.loads(entity.messageSchema.schemaText)
|
|
637
|
+
schema_props = schema_dict.get("properties", {})
|
|
638
|
+
# Check if schemaText has CDC envelope fields
|
|
639
|
+
is_debezium_cdc = CDC_ENVELOPE_FIELDS.issubset(
|
|
640
|
+
set(schema_props.keys())
|
|
641
|
+
)
|
|
642
|
+
except Exception:
|
|
643
|
+
pass
|
|
644
|
+
|
|
645
|
+
logger.debug(
|
|
646
|
+
f"Topic {get_field_name(entity.name) if hasattr(entity, 'name') else 'unknown'}: field_names={field_names}, is_debezium_cdc={is_debezium_cdc}"
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
# Check for nested Debezium CDC structure (single Envelope field with CDC children)
|
|
650
|
+
if not is_debezium_cdc and len(schema_fields) == 1:
|
|
651
|
+
envelope_field = schema_fields[0]
|
|
652
|
+
if envelope_field.children:
|
|
653
|
+
envelope_child_names = {
|
|
654
|
+
get_field_name(c.name) for c in envelope_field.children
|
|
655
|
+
}
|
|
656
|
+
is_debezium_cdc = CDC_ENVELOPE_FIELDS.issubset(envelope_child_names)
|
|
657
|
+
if is_debezium_cdc:
|
|
658
|
+
logger.debug(
|
|
659
|
+
f"Nested Debezium CDC envelope detected: {get_field_name(envelope_field.name)}"
|
|
660
|
+
)
|
|
661
|
+
schema_fields = (
|
|
662
|
+
envelope_field.children
|
|
663
|
+
) # Use envelope children as schema fields
|
|
664
|
+
|
|
665
|
+
if is_debezium_cdc:
|
|
666
|
+
# For Debezium CDC, extract columns from the 'after' field (or 'before' as fallback)
|
|
667
|
+
# The 'after' field contains the complete record structure after the change
|
|
668
|
+
for field in schema_fields:
|
|
669
|
+
field_name_str = get_field_name(field.name)
|
|
670
|
+
# Prefer 'after' for source connectors (contains new/updated record state)
|
|
671
|
+
if field_name_str == "after" and field.children:
|
|
672
|
+
columns = [
|
|
673
|
+
get_field_name(child.name) for child in field.children
|
|
674
|
+
]
|
|
675
|
+
logger.debug(
|
|
676
|
+
f"Debezium CDC: extracted {len(columns)} columns from 'after' field"
|
|
677
|
+
)
|
|
678
|
+
return columns
|
|
679
|
+
|
|
680
|
+
# Fallback to 'before' if 'after' has no children
|
|
681
|
+
for field in schema_fields:
|
|
682
|
+
field_name_str = get_field_name(field.name)
|
|
683
|
+
if field_name_str == "before" and field.children:
|
|
684
|
+
columns = [
|
|
685
|
+
get_field_name(child.name) for child in field.children
|
|
686
|
+
]
|
|
687
|
+
logger.debug(
|
|
688
|
+
f"Debezium CDC: extracted {len(columns)} columns from 'before' field"
|
|
689
|
+
)
|
|
690
|
+
return columns
|
|
691
|
+
|
|
692
|
+
# Final fallback: Parse schemaText if after/before don't have children
|
|
693
|
+
if entity.messageSchema.schemaText:
|
|
694
|
+
columns = self._parse_cdc_schema_columns(
|
|
695
|
+
entity.messageSchema.schemaText
|
|
696
|
+
)
|
|
697
|
+
if columns:
|
|
698
|
+
logger.debug(
|
|
699
|
+
f"Debezium CDC: extracted {len(columns)} columns from schemaText"
|
|
700
|
+
)
|
|
701
|
+
return columns
|
|
702
|
+
|
|
703
|
+
logger.debug(
|
|
704
|
+
"Debezium CDC detected but unable to extract columns from after/before fields"
|
|
705
|
+
)
|
|
706
|
+
return []
|
|
707
|
+
|
|
708
|
+
# Non-CDC topic: extract all fields
|
|
709
|
+
columns = []
|
|
710
|
+
for field in schema_fields:
|
|
711
|
+
if field.children:
|
|
712
|
+
columns.extend(
|
|
713
|
+
[get_field_name(child.name) for child in field.children]
|
|
714
|
+
)
|
|
715
|
+
else:
|
|
716
|
+
columns.append(get_field_name(field.name))
|
|
717
|
+
return columns
|
|
718
|
+
|
|
719
|
+
return []
|
|
720
|
+
|
|
721
|
+
def _get_topic_field_fqn(
|
|
722
|
+
self, topic_entity: Topic, field_name: str
|
|
723
|
+
) -> Optional[str]:
|
|
724
|
+
"""
|
|
725
|
+
Get the fully qualified name for a field in a Topic's schema.
|
|
726
|
+
Handles nested structures where fields may be children of a parent RECORD.
|
|
727
|
+
For Debezium CDC topics, searches for fields inside after/before envelope children.
|
|
728
|
+
"""
|
|
729
|
+
if (
|
|
730
|
+
not topic_entity.messageSchema
|
|
731
|
+
or not topic_entity.messageSchema.schemaFields
|
|
732
|
+
):
|
|
733
|
+
logger.debug(
|
|
734
|
+
f"Topic {get_field_name(topic_entity.name)} has no message schema"
|
|
735
|
+
)
|
|
736
|
+
return None
|
|
737
|
+
|
|
738
|
+
# Search for the field in the schema (including nested fields)
|
|
739
|
+
for field in topic_entity.messageSchema.schemaFields:
|
|
740
|
+
field_name_str = get_field_name(field.name)
|
|
741
|
+
|
|
742
|
+
# Check if it's a direct field
|
|
743
|
+
if field_name_str == field_name:
|
|
744
|
+
return (
|
|
745
|
+
field.fullyQualifiedName.root if field.fullyQualifiedName else None
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
# Check if it's a child field (nested - one level deep)
|
|
749
|
+
if field.children:
|
|
750
|
+
# For Debezium CDC, prioritize 'after' over 'before' when searching for grandchildren
|
|
751
|
+
children_to_search = field.children
|
|
752
|
+
after_child = None
|
|
753
|
+
before_child = None
|
|
754
|
+
|
|
755
|
+
for child in field.children:
|
|
756
|
+
child_name = get_field_name(child.name)
|
|
757
|
+
if child_name == "after":
|
|
758
|
+
after_child = child
|
|
759
|
+
elif child_name == "before":
|
|
760
|
+
before_child = child
|
|
761
|
+
# Check direct child match
|
|
762
|
+
if child_name == field_name:
|
|
763
|
+
return (
|
|
764
|
+
child.fullyQualifiedName.root
|
|
765
|
+
if child.fullyQualifiedName
|
|
766
|
+
else None
|
|
767
|
+
)
|
|
768
|
+
|
|
769
|
+
# Search grandchildren - prefer 'after' over 'before' for CDC topics
|
|
770
|
+
for cdc_child in [after_child, before_child]:
|
|
771
|
+
if cdc_child and cdc_child.children:
|
|
772
|
+
for grandchild in cdc_child.children:
|
|
773
|
+
if get_field_name(grandchild.name) == field_name:
|
|
774
|
+
return (
|
|
775
|
+
grandchild.fullyQualifiedName.root
|
|
776
|
+
if grandchild.fullyQualifiedName
|
|
777
|
+
else None
|
|
778
|
+
)
|
|
779
|
+
|
|
780
|
+
# Search other grandchildren (non-CDC fields)
|
|
781
|
+
for child in field.children:
|
|
782
|
+
if child not in [after_child, before_child] and child.children:
|
|
783
|
+
for grandchild in child.children:
|
|
784
|
+
if get_field_name(grandchild.name) == field_name:
|
|
785
|
+
return (
|
|
786
|
+
grandchild.fullyQualifiedName.root
|
|
787
|
+
if grandchild.fullyQualifiedName
|
|
788
|
+
else None
|
|
789
|
+
)
|
|
790
|
+
|
|
791
|
+
# For Debezium CDC topics, columns might only exist in schemaText (not as field objects)
|
|
792
|
+
# Manually construct FQN: topicFQN.Envelope.columnName
|
|
793
|
+
for field in topic_entity.messageSchema.schemaFields:
|
|
794
|
+
field_name_str = get_field_name(field.name)
|
|
795
|
+
# Check if this is a CDC envelope field
|
|
796
|
+
if "Envelope" in field_name_str and field.fullyQualifiedName:
|
|
797
|
+
# Construct FQN manually for CDC column
|
|
798
|
+
envelope_fqn = field.fullyQualifiedName.root
|
|
799
|
+
return f"{envelope_fqn}.{field_name}"
|
|
800
|
+
|
|
801
|
+
logger.debug(
|
|
802
|
+
f"Field {field_name} not found in topic {get_field_name(topic_entity.name)} schema"
|
|
803
|
+
)
|
|
804
|
+
return None
|
|
805
|
+
|
|
806
|
+
def build_column_lineage(
|
|
807
|
+
self,
|
|
808
|
+
from_entity: T,
|
|
809
|
+
to_entity: T,
|
|
810
|
+
topic_entity: Topic,
|
|
811
|
+
pipeline_details: KafkaConnectPipelineDetails,
|
|
812
|
+
) -> Optional[List[ColumnLineage]]:
|
|
813
|
+
"""
|
|
814
|
+
Build column-level lineage between source table, topic, and target table.
|
|
815
|
+
For source connectors: Table columns -> Topic schema fields
|
|
816
|
+
For sink connectors: Topic schema fields -> Table columns
|
|
817
|
+
"""
|
|
818
|
+
try:
|
|
819
|
+
column_lineages = []
|
|
820
|
+
|
|
821
|
+
# Get column mappings from connector config if available
|
|
822
|
+
if pipeline_details.dataset and pipeline_details.dataset.column_mappings:
|
|
823
|
+
# Use explicit column mappings from connector config
|
|
824
|
+
for mapping in pipeline_details.dataset.column_mappings:
|
|
825
|
+
if pipeline_details.conn_type == ConnectorType.SINK.value:
|
|
826
|
+
from_col = get_column_fqn(
|
|
827
|
+
table_entity=topic_entity, column=mapping.source_column
|
|
828
|
+
)
|
|
829
|
+
to_col = get_column_fqn(
|
|
830
|
+
table_entity=to_entity, column=mapping.target_column
|
|
831
|
+
)
|
|
832
|
+
else:
|
|
833
|
+
from_col = get_column_fqn(
|
|
834
|
+
table_entity=from_entity, column=mapping.source_column
|
|
835
|
+
)
|
|
836
|
+
to_col = get_column_fqn(
|
|
837
|
+
table_entity=topic_entity, column=mapping.target_column
|
|
838
|
+
)
|
|
839
|
+
|
|
840
|
+
if from_col and to_col:
|
|
841
|
+
column_lineages.append(
|
|
842
|
+
ColumnLineage(
|
|
843
|
+
fromColumns=[from_col],
|
|
844
|
+
toColumn=to_col,
|
|
845
|
+
function=None,
|
|
846
|
+
)
|
|
847
|
+
)
|
|
848
|
+
else:
|
|
849
|
+
# Infer 1:1 column mappings based on matching column names
|
|
850
|
+
if pipeline_details.conn_type == ConnectorType.SINK.value:
|
|
851
|
+
source_entity = topic_entity
|
|
852
|
+
target_entity = to_entity
|
|
853
|
+
else:
|
|
854
|
+
source_entity = from_entity
|
|
855
|
+
target_entity = topic_entity
|
|
856
|
+
|
|
857
|
+
# Extract columns from both entities
|
|
858
|
+
source_columns = self._extract_columns_from_entity(source_entity)
|
|
859
|
+
target_columns = self._extract_columns_from_entity(target_entity)
|
|
860
|
+
|
|
861
|
+
logger.debug(
|
|
862
|
+
f"Column matching for {pipeline_details.name}: "
|
|
863
|
+
f"source={len(source_columns)} cols from {source_entity.__class__.__name__}, "
|
|
864
|
+
f"target={len(target_columns)} cols from {target_entity.__class__.__name__}"
|
|
865
|
+
)
|
|
866
|
+
logger.debug(f"Source columns: {source_columns[:5]}") # First 5
|
|
867
|
+
logger.debug(f"Target columns: {target_columns}")
|
|
868
|
+
|
|
869
|
+
# Create lookup dictionary for O(n) performance instead of O(n²)
|
|
870
|
+
target_cols_map = {str(col).lower(): col for col in target_columns}
|
|
871
|
+
|
|
872
|
+
# Match columns by name (case-insensitive)
|
|
873
|
+
for source_col_name in source_columns:
|
|
874
|
+
source_key = str(source_col_name).lower()
|
|
875
|
+
if source_key in target_cols_map:
|
|
876
|
+
target_col_name = target_cols_map[source_key]
|
|
877
|
+
logger.debug(
|
|
878
|
+
f"Matched column: {source_col_name} -> {target_col_name}"
|
|
879
|
+
)
|
|
880
|
+
try:
|
|
881
|
+
# Get fully qualified names for source and target columns
|
|
882
|
+
from_col = self._get_entity_column_fqn(
|
|
883
|
+
source_entity, source_col_name
|
|
884
|
+
)
|
|
885
|
+
to_col = self._get_entity_column_fqn(
|
|
886
|
+
target_entity, target_col_name
|
|
887
|
+
)
|
|
888
|
+
|
|
889
|
+
logger.debug(f"FQNs: from_col={from_col}, to_col={to_col}")
|
|
890
|
+
|
|
891
|
+
if from_col and to_col:
|
|
892
|
+
column_lineages.append(
|
|
893
|
+
ColumnLineage(
|
|
894
|
+
fromColumns=[from_col],
|
|
895
|
+
toColumn=to_col,
|
|
896
|
+
function=None,
|
|
897
|
+
)
|
|
898
|
+
)
|
|
899
|
+
logger.debug(
|
|
900
|
+
f"Added column lineage: {from_col} -> {to_col}"
|
|
901
|
+
)
|
|
902
|
+
except (KeyError, AttributeError) as exc:
|
|
903
|
+
logger.debug(
|
|
904
|
+
f"Error creating column lineage for {source_col_name} -> {target_col_name}: {exc}"
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
if column_lineages:
|
|
908
|
+
logger.debug(
|
|
909
|
+
f"Created {len(column_lineages)} column lineages for {pipeline_details.name}"
|
|
910
|
+
)
|
|
911
|
+
return column_lineages if column_lineages else None
|
|
912
|
+
|
|
913
|
+
except Exception as exc:
|
|
914
|
+
logger.debug(traceback.format_exc())
|
|
915
|
+
logger.warning(f"Unable to build column lineage: {exc}")
|
|
916
|
+
|
|
917
|
+
return None
|
|
918
|
+
|
|
919
|
+
def _search_topics_by_prefix(
|
|
920
|
+
self, database_server_name: str, messaging_service_name: Optional[str] = None
|
|
921
|
+
) -> List[KafkaConnectTopics]:
|
|
922
|
+
"""
|
|
923
|
+
Search for topics in the messaging service that match the database.server.name prefix.
|
|
924
|
+
|
|
925
|
+
This is a fallback when table.include.list is not configured in the connector.
|
|
926
|
+
It relies on topics being already ingested in the messaging service.
|
|
927
|
+
|
|
928
|
+
Args:
|
|
929
|
+
database_server_name: The database.server.name prefix to search for
|
|
930
|
+
messaging_service_name: Optional messaging service name to narrow search
|
|
931
|
+
|
|
932
|
+
Returns:
|
|
933
|
+
List of KafkaConnectTopics that match the prefix
|
|
934
|
+
"""
|
|
935
|
+
topics_found = []
|
|
936
|
+
|
|
937
|
+
try:
|
|
938
|
+
if not database_server_name:
|
|
939
|
+
return topics_found
|
|
940
|
+
|
|
941
|
+
logger.info(
|
|
942
|
+
f"Searching messaging service for topics with prefix: {database_server_name}"
|
|
943
|
+
)
|
|
944
|
+
|
|
945
|
+
# Search for topics matching the prefix
|
|
946
|
+
# Use wildcard pattern: <service>."<prefix>.*"
|
|
947
|
+
search_pattern = f"{database_server_name}.*"
|
|
948
|
+
|
|
949
|
+
if messaging_service_name:
|
|
950
|
+
# Search in specific messaging service
|
|
951
|
+
from metadata.utils import fqn as fqn_utils
|
|
952
|
+
|
|
953
|
+
search_fqn = f"{fqn_utils.quote_name(messaging_service_name)}.{fqn_utils.quote_name(search_pattern)}"
|
|
954
|
+
logger.debug(f"Searching for topics with FQN pattern: {search_fqn}")
|
|
955
|
+
|
|
956
|
+
# Get all topics from the messaging service
|
|
957
|
+
from metadata.generated.schema.entity.data.topic import Topic
|
|
958
|
+
|
|
959
|
+
topics = list(
|
|
960
|
+
self.metadata.list_all_entities(
|
|
961
|
+
entity=Topic,
|
|
962
|
+
params={"service": messaging_service_name},
|
|
963
|
+
)
|
|
964
|
+
)
|
|
965
|
+
|
|
966
|
+
# Filter topics that start with the database_server_name prefix
|
|
967
|
+
for topic in topics:
|
|
968
|
+
topic_name = str(
|
|
969
|
+
topic.name.root if hasattr(topic.name, "root") else topic.name
|
|
970
|
+
)
|
|
971
|
+
if topic_name.startswith(database_server_name + "."):
|
|
972
|
+
# Build full FQN for this topic
|
|
973
|
+
topic_fqn = (
|
|
974
|
+
topic.fullyQualifiedName.root
|
|
975
|
+
if hasattr(topic.fullyQualifiedName, "root")
|
|
976
|
+
else topic.fullyQualifiedName
|
|
977
|
+
)
|
|
978
|
+
topics_found.append(
|
|
979
|
+
KafkaConnectTopics(name=topic_name, fqn=topic_fqn)
|
|
980
|
+
)
|
|
981
|
+
logger.debug(
|
|
982
|
+
f"Found matching topic: {topic_name} (FQN: {topic_fqn})"
|
|
983
|
+
)
|
|
984
|
+
|
|
985
|
+
if topics_found:
|
|
986
|
+
logger.info(
|
|
987
|
+
f"Found {len(topics_found)} topics matching prefix '{database_server_name}' "
|
|
988
|
+
f"in messaging service"
|
|
989
|
+
)
|
|
990
|
+
else:
|
|
991
|
+
logger.warning(
|
|
992
|
+
f"No topics found matching prefix '{database_server_name}'. "
|
|
993
|
+
f"Ensure the messaging service has ingested topics before running Kafka Connect ingestion."
|
|
994
|
+
)
|
|
995
|
+
|
|
996
|
+
except Exception as exc:
|
|
997
|
+
logger.debug(traceback.format_exc())
|
|
998
|
+
logger.warning(f"Unable to search topics by prefix: {exc}")
|
|
999
|
+
|
|
1000
|
+
return topics_found
|
|
1001
|
+
|
|
1002
|
+
def _parse_cdc_topics_from_config(
|
|
1003
|
+
self, pipeline_details: KafkaConnectPipelineDetails, database_server_name: str
|
|
1004
|
+
) -> List[KafkaConnectTopics]:
|
|
1005
|
+
"""
|
|
1006
|
+
Parse CDC topic names from connector config using table.include.list.
|
|
1007
|
+
|
|
1008
|
+
For CDC connectors, topics follow pattern: {database.server.name}.{schema}.{table}
|
|
1009
|
+
Extracts table list from config and constructs expected topic names.
|
|
1010
|
+
|
|
1011
|
+
Args:
|
|
1012
|
+
pipeline_details: Kafka Connect pipeline details with config
|
|
1013
|
+
database_server_name: The database.server.name from connector config
|
|
1014
|
+
|
|
1015
|
+
Returns:
|
|
1016
|
+
List of KafkaConnectTopics with topic names
|
|
1017
|
+
"""
|
|
1018
|
+
topics_found = []
|
|
1019
|
+
|
|
1020
|
+
try:
|
|
1021
|
+
if not pipeline_details.config:
|
|
1022
|
+
return topics_found
|
|
1023
|
+
|
|
1024
|
+
# Get table include list from connector config
|
|
1025
|
+
table_include_list = None
|
|
1026
|
+
for key in ["table.include.list", "table.whitelist"]:
|
|
1027
|
+
if pipeline_details.config.get(key):
|
|
1028
|
+
table_include_list = pipeline_details.config.get(key)
|
|
1029
|
+
logger.debug(
|
|
1030
|
+
f"Found table list from config key '{key}': {table_include_list}"
|
|
1031
|
+
)
|
|
1032
|
+
break
|
|
1033
|
+
|
|
1034
|
+
if not table_include_list:
|
|
1035
|
+
logger.warning(
|
|
1036
|
+
f"⚠️ CDC connector '{pipeline_details.name}' is missing table.include.list or table.whitelist.\n"
|
|
1037
|
+
f" Without this configuration, lineage cannot be created automatically.\n"
|
|
1038
|
+
f' Add to connector config: "table.include.list": "schema1.table1,schema2.table2"\n'
|
|
1039
|
+
)
|
|
1040
|
+
return topics_found
|
|
1041
|
+
|
|
1042
|
+
# Parse table list (format: "schema1.table1,schema2.table2")
|
|
1043
|
+
for table_entry in table_include_list.split(","):
|
|
1044
|
+
table_entry = table_entry.strip()
|
|
1045
|
+
if not table_entry:
|
|
1046
|
+
continue
|
|
1047
|
+
|
|
1048
|
+
# Construct CDC topic name: {database.server.name}.{schema}.{table}
|
|
1049
|
+
# table_entry is already "schema.table" format
|
|
1050
|
+
topic_name = f"{database_server_name}.{table_entry}"
|
|
1051
|
+
|
|
1052
|
+
topics_found.append(KafkaConnectTopics(name=topic_name))
|
|
1053
|
+
logger.debug(f"Parsed CDC topic from config: {topic_name}")
|
|
1054
|
+
|
|
1055
|
+
logger.info(
|
|
1056
|
+
f"Parsed {len(topics_found)} CDC topics from table.include.list"
|
|
1057
|
+
)
|
|
1058
|
+
|
|
1059
|
+
except Exception as exc:
|
|
1060
|
+
logger.debug(traceback.format_exc())
|
|
1061
|
+
logger.warning(f"Unable to parse CDC topics from connector config: {exc}")
|
|
1062
|
+
|
|
1063
|
+
return topics_found
|
|
1064
|
+
|
|
177
1065
|
def yield_pipeline_lineage_details(
|
|
178
1066
|
self, pipeline_details: KafkaConnectPipelineDetails
|
|
179
1067
|
) -> Iterable[Either[AddLineageRequest]]:
|
|
@@ -181,9 +1069,27 @@ class KafkaconnectSource(PipelineServiceSource):
|
|
|
181
1069
|
Get lineage between pipeline and data sources
|
|
182
1070
|
"""
|
|
183
1071
|
try:
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
1072
|
+
# Try to match messaging service from connector config
|
|
1073
|
+
_, messaging_service_name = self.get_service_from_connector_config(
|
|
1074
|
+
pipeline_details
|
|
1075
|
+
)
|
|
1076
|
+
|
|
1077
|
+
# Use matched service if found, otherwise fall back to configured name
|
|
1078
|
+
effective_messaging_service = messaging_service_name or (
|
|
1079
|
+
self.service_connection.messagingServiceName
|
|
1080
|
+
if hasattr(self.service_connection, "messagingServiceName")
|
|
1081
|
+
else None
|
|
1082
|
+
)
|
|
1083
|
+
|
|
1084
|
+
if effective_messaging_service:
|
|
1085
|
+
logger.info(
|
|
1086
|
+
f"Using messaging service '{effective_messaging_service}' "
|
|
1087
|
+
f"({'matched from config' if messaging_service_name else 'from configuration'})"
|
|
1088
|
+
)
|
|
1089
|
+
else:
|
|
1090
|
+
logger.info(
|
|
1091
|
+
"No messaging service specified - will search all messaging services for topics"
|
|
1092
|
+
)
|
|
187
1093
|
|
|
188
1094
|
pipeline_fqn = fqn.build(
|
|
189
1095
|
metadata=self.metadata,
|
|
@@ -196,50 +1102,697 @@ class KafkaconnectSource(PipelineServiceSource):
|
|
|
196
1102
|
entity=Pipeline, fqn=pipeline_fqn
|
|
197
1103
|
)
|
|
198
1104
|
|
|
199
|
-
lineage_details = LineageDetails(
|
|
200
|
-
pipeline=EntityReference(id=pipeline_entity.id.root, type="pipeline"),
|
|
201
|
-
source=LineageSource.PipelineLineage,
|
|
202
|
-
)
|
|
203
|
-
|
|
204
1105
|
dataset_entity = self.get_dataset_entity(pipeline_details=pipeline_details)
|
|
205
1106
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
1107
|
+
# Get database.server.name or topic.prefix for CDC topic parsing
|
|
1108
|
+
# These are ONLY set by Debezium CDC connectors
|
|
1109
|
+
database_server_name = None
|
|
1110
|
+
if pipeline_details.config:
|
|
1111
|
+
database_server_name = pipeline_details.config.get(
|
|
1112
|
+
"database.server.name"
|
|
1113
|
+
) or pipeline_details.config.get("topic.prefix")
|
|
1114
|
+
|
|
1115
|
+
# For CDC connectors without explicit topics, parse topics from connector config
|
|
1116
|
+
# using table.include.list and database.server.name
|
|
1117
|
+
# Only do this for Debezium CDC connectors (identified by database.server.name or topic.prefix)
|
|
1118
|
+
topics_to_process = pipeline_details.topics or []
|
|
1119
|
+
if (
|
|
1120
|
+
not topics_to_process
|
|
1121
|
+
and database_server_name
|
|
1122
|
+
and pipeline_details.conn_type == ConnectorType.SOURCE.value
|
|
1123
|
+
):
|
|
1124
|
+
# Try to parse topics from table.include.list first
|
|
1125
|
+
topics_to_process = self._parse_cdc_topics_from_config(
|
|
1126
|
+
pipeline_details=pipeline_details,
|
|
1127
|
+
database_server_name=database_server_name,
|
|
212
1128
|
)
|
|
213
1129
|
|
|
214
|
-
|
|
1130
|
+
# If table.include.list is not available, fallback to searching topics by prefix
|
|
1131
|
+
# This requires topics to be already ingested in the messaging service
|
|
1132
|
+
if not topics_to_process and effective_messaging_service:
|
|
1133
|
+
logger.info(
|
|
1134
|
+
f"Falling back to searching topics by prefix in messaging service '{effective_messaging_service}'"
|
|
1135
|
+
)
|
|
1136
|
+
topics_to_process = self._search_topics_by_prefix(
|
|
1137
|
+
database_server_name=database_server_name,
|
|
1138
|
+
messaging_service_name=effective_messaging_service,
|
|
1139
|
+
)
|
|
1140
|
+
|
|
1141
|
+
for topic in topics_to_process:
|
|
1142
|
+
topic_entity = None
|
|
215
1143
|
|
|
216
|
-
|
|
217
|
-
continue
|
|
1144
|
+
logger.info(f"Processing topic: {topic.name}")
|
|
218
1145
|
|
|
219
|
-
|
|
220
|
-
|
|
1146
|
+
# If we have FQN from CDC topic discovery, use it directly
|
|
1147
|
+
if topic.fqn:
|
|
1148
|
+
logger.info(f"Searching for topic using pre-built FQN: {topic.fqn}")
|
|
1149
|
+
topic_entity = self.metadata.get_by_name(
|
|
1150
|
+
entity=Topic, fqn=topic.fqn
|
|
1151
|
+
)
|
|
1152
|
+
if topic_entity:
|
|
1153
|
+
logger.info(f"✓ Found topic using stored FQN: {topic.fqn}")
|
|
1154
|
+
else:
|
|
1155
|
+
logger.warning(f"✗ Topic NOT found using FQN: {topic.fqn}")
|
|
1156
|
+
# If messaging service is known (matched or configured), use it to build FQN
|
|
1157
|
+
elif effective_messaging_service:
|
|
1158
|
+
# fqn.build() already quotes each component (service_name and topic_name)
|
|
1159
|
+
topic_fqn = fqn.build(
|
|
1160
|
+
metadata=self.metadata,
|
|
1161
|
+
entity_type=Topic,
|
|
1162
|
+
service_name=effective_messaging_service,
|
|
1163
|
+
topic_name=str(topic.name),
|
|
1164
|
+
)
|
|
1165
|
+
logger.info(
|
|
1166
|
+
f"Built topic FQN: {topic_fqn} "
|
|
1167
|
+
f"(service={effective_messaging_service}, topic_name={topic.name})"
|
|
1168
|
+
)
|
|
1169
|
+
topic_entity = self.metadata.get_by_name(
|
|
1170
|
+
entity=Topic, fqn=topic_fqn
|
|
1171
|
+
)
|
|
1172
|
+
if topic_entity:
|
|
1173
|
+
logger.info(f"✓ Found topic using built FQN: {topic_fqn}")
|
|
1174
|
+
else:
|
|
1175
|
+
logger.warning(
|
|
1176
|
+
f"✗ Topic NOT found using FQN: {topic_fqn} "
|
|
1177
|
+
f"(service={effective_messaging_service}, topic_name={topic.name})"
|
|
1178
|
+
)
|
|
221
1179
|
else:
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
type=ENTITY_REFERENCE_TYPE_MAP[
|
|
236
|
-
type(to_entity).__name__
|
|
237
|
-
],
|
|
238
|
-
),
|
|
239
|
-
lineageDetails=lineage_details,
|
|
1180
|
+
# Fallback: Search across all messaging services
|
|
1181
|
+
search_string = f"*.{fqn.quote_name(str(topic.name))}"
|
|
1182
|
+
logger.info(
|
|
1183
|
+
f"Searching for topic across all services using pattern: {search_string}"
|
|
1184
|
+
)
|
|
1185
|
+
topic_entity = self.metadata.search_in_any_service(
|
|
1186
|
+
entity_type=Topic,
|
|
1187
|
+
fqn_search_string=search_string,
|
|
1188
|
+
)
|
|
1189
|
+
if topic_entity:
|
|
1190
|
+
logger.info(
|
|
1191
|
+
f"✓ Found topic via search: {topic.name} in service "
|
|
1192
|
+
f"{topic_entity.service.name if topic_entity.service else 'unknown'}"
|
|
240
1193
|
)
|
|
1194
|
+
else:
|
|
1195
|
+
logger.warning(f"✗ Topic NOT found via search: {search_string}")
|
|
1196
|
+
|
|
1197
|
+
# If topic not found, we'll still try to create table → pipeline lineage
|
|
1198
|
+
if topic_entity is None:
|
|
1199
|
+
logger.warning(
|
|
1200
|
+
f"Topic {topic.name} not found in OpenMetadata - will create direct table → pipeline lineage"
|
|
1201
|
+
)
|
|
1202
|
+
else:
|
|
1203
|
+
logger.info(f"✓ Successfully found topic entity: {topic.name}")
|
|
1204
|
+
|
|
1205
|
+
# If no dataset entity from config, try to parse table info from CDC topic name
|
|
1206
|
+
current_dataset_entity = dataset_entity
|
|
1207
|
+
if current_dataset_entity:
|
|
1208
|
+
logger.info(
|
|
1209
|
+
f"Using dataset entity from config: {current_dataset_entity.fullyQualifiedName.root if hasattr(current_dataset_entity.fullyQualifiedName, 'root') else current_dataset_entity.fullyQualifiedName}"
|
|
1210
|
+
)
|
|
1211
|
+
|
|
1212
|
+
if (
|
|
1213
|
+
current_dataset_entity is None
|
|
1214
|
+
and pipeline_details.conn_type == ConnectorType.SOURCE.value
|
|
1215
|
+
):
|
|
1216
|
+
# Parse CDC topic name to extract table information
|
|
1217
|
+
logger.info(
|
|
1218
|
+
f"Parsing CDC topic name to extract table info: {topic.name}"
|
|
241
1219
|
)
|
|
1220
|
+
topic_info = parse_cdc_topic_name(
|
|
1221
|
+
str(topic.name), database_server_name
|
|
1222
|
+
)
|
|
1223
|
+
if topic_info.get("database") and topic_info.get("table"):
|
|
1224
|
+
logger.info(
|
|
1225
|
+
f"Parsed CDC topic {topic.name}: schema={topic_info['database']}, table={topic_info['table']}"
|
|
1226
|
+
)
|
|
1227
|
+
|
|
1228
|
+
# Get matched database service name
|
|
1229
|
+
db_service_name, _ = self.get_service_from_connector_config(
|
|
1230
|
+
pipeline_details
|
|
1231
|
+
)
|
|
1232
|
+
|
|
1233
|
+
# Try to find the table entity
|
|
1234
|
+
# Use wildcard search pattern since we don't know the database name
|
|
1235
|
+
# Pattern: service.*.schema.table
|
|
1236
|
+
if db_service_name:
|
|
1237
|
+
# Use matched database service first with wildcard search
|
|
1238
|
+
logger.info(
|
|
1239
|
+
f"Using matched database service: {db_service_name}"
|
|
1240
|
+
)
|
|
1241
|
+
# Build wildcard FQN: service.*.schema.table
|
|
1242
|
+
search_pattern = f"{fqn.quote_name(db_service_name)}.*.{fqn.quote_name(topic_info['database'])}.{fqn.quote_name(topic_info['table'])}"
|
|
1243
|
+
logger.info(
|
|
1244
|
+
f"Searching for table with pattern: {search_pattern} "
|
|
1245
|
+
f"(service={db_service_name}, schema={topic_info['database']}, table={topic_info['table']})"
|
|
1246
|
+
)
|
|
1247
|
+
|
|
1248
|
+
current_dataset_entity = (
|
|
1249
|
+
self.metadata.search_in_any_service(
|
|
1250
|
+
entity_type=Table,
|
|
1251
|
+
fqn_search_string=search_pattern,
|
|
1252
|
+
)
|
|
1253
|
+
)
|
|
1254
|
+
if current_dataset_entity:
|
|
1255
|
+
logger.info(
|
|
1256
|
+
f"✓ Found table using matched service pattern: {search_pattern}"
|
|
1257
|
+
)
|
|
1258
|
+
else:
|
|
1259
|
+
logger.warning(
|
|
1260
|
+
f"✗ Table NOT found using matched service pattern: {search_pattern}"
|
|
1261
|
+
)
|
|
1262
|
+
|
|
1263
|
+
if (
|
|
1264
|
+
not current_dataset_entity
|
|
1265
|
+
and hasattr(self.source_config, "lineageInformation")
|
|
1266
|
+
and hasattr(
|
|
1267
|
+
self.source_config.lineageInformation, "dbServiceNames"
|
|
1268
|
+
)
|
|
1269
|
+
and self.source_config.lineageInformation.dbServiceNames
|
|
1270
|
+
):
|
|
1271
|
+
# Try configured database services with wildcard search
|
|
1272
|
+
logger.info(
|
|
1273
|
+
f"Trying configured database services: {self.source_config.lineageInformation.dbServiceNames}"
|
|
1274
|
+
)
|
|
1275
|
+
for (
|
|
1276
|
+
dbservicename
|
|
1277
|
+
) in self.source_config.lineageInformation.dbServiceNames:
|
|
1278
|
+
# Build wildcard FQN: service.*.schema.table
|
|
1279
|
+
search_pattern = f"{fqn.quote_name(dbservicename)}.*.{fqn.quote_name(topic_info['database'])}.{fqn.quote_name(topic_info['table'])}"
|
|
1280
|
+
logger.info(
|
|
1281
|
+
f"Searching for table with pattern: {search_pattern}"
|
|
1282
|
+
)
|
|
1283
|
+
|
|
1284
|
+
current_dataset_entity = (
|
|
1285
|
+
self.metadata.search_in_any_service(
|
|
1286
|
+
entity_type=Table,
|
|
1287
|
+
fqn_search_string=search_pattern,
|
|
1288
|
+
)
|
|
1289
|
+
)
|
|
1290
|
+
if current_dataset_entity:
|
|
1291
|
+
logger.info(
|
|
1292
|
+
f"✓ Found table in service {dbservicename}: {search_pattern}"
|
|
1293
|
+
)
|
|
1294
|
+
break
|
|
1295
|
+
else:
|
|
1296
|
+
logger.debug(
|
|
1297
|
+
f"✗ Table NOT found in service {dbservicename}"
|
|
1298
|
+
)
|
|
1299
|
+
|
|
1300
|
+
if not current_dataset_entity:
|
|
1301
|
+
# Search across all database services
|
|
1302
|
+
search_string = f"{fqn.quote_name(topic_info['database'])}.{fqn.quote_name(topic_info['table'])}"
|
|
1303
|
+
logger.info(
|
|
1304
|
+
f"Searching for table across all services using pattern: {search_string}"
|
|
1305
|
+
)
|
|
1306
|
+
current_dataset_entity = (
|
|
1307
|
+
self.metadata.search_in_any_service(
|
|
1308
|
+
entity_type=Table,
|
|
1309
|
+
fqn_search_string=search_string,
|
|
1310
|
+
)
|
|
1311
|
+
)
|
|
1312
|
+
if current_dataset_entity:
|
|
1313
|
+
logger.info(
|
|
1314
|
+
f"✓ Found table via search in service "
|
|
1315
|
+
f"{current_dataset_entity.service.name if current_dataset_entity.service else 'unknown'}: "
|
|
1316
|
+
f"{current_dataset_entity.fullyQualifiedName.root if hasattr(current_dataset_entity.fullyQualifiedName, 'root') else current_dataset_entity.fullyQualifiedName}"
|
|
1317
|
+
)
|
|
1318
|
+
else:
|
|
1319
|
+
logger.warning(
|
|
1320
|
+
f"✗ Table NOT found via search: {search_string}"
|
|
1321
|
+
)
|
|
1322
|
+
else:
|
|
1323
|
+
logger.warning(
|
|
1324
|
+
f"Failed to parse table info from CDC topic name: {topic.name}"
|
|
1325
|
+
)
|
|
1326
|
+
|
|
1327
|
+
# Lineage must always be between data assets (Table ↔ Topic)
|
|
1328
|
+
# The pipeline is referenced in lineageDetails, not as a node in the graph
|
|
1329
|
+
# Skip lineage if we don't have BOTH table and topic
|
|
1330
|
+
if current_dataset_entity is None or topic_entity is None:
|
|
1331
|
+
# Get table FQN for tracking
|
|
1332
|
+
if current_dataset_entity:
|
|
1333
|
+
table_fqn_str = (
|
|
1334
|
+
current_dataset_entity.fullyQualifiedName.root
|
|
1335
|
+
if hasattr(
|
|
1336
|
+
current_dataset_entity.fullyQualifiedName, "root"
|
|
1337
|
+
)
|
|
1338
|
+
else str(current_dataset_entity.fullyQualifiedName)
|
|
1339
|
+
)
|
|
1340
|
+
else:
|
|
1341
|
+
# Table not found - construct debug message with search details
|
|
1342
|
+
table_fqn_str = "NOT FOUND"
|
|
1343
|
+
|
|
1344
|
+
# Get matched database service name and hostname
|
|
1345
|
+
(
|
|
1346
|
+
db_service_name_for_debug,
|
|
1347
|
+
_,
|
|
1348
|
+
) = self.get_service_from_connector_config(pipeline_details)
|
|
1349
|
+
|
|
1350
|
+
# Extract hostname from connector config
|
|
1351
|
+
db_hostname_for_debug = "NOT SET"
|
|
1352
|
+
if pipeline_details.config:
|
|
1353
|
+
db_hostname_for_debug = (
|
|
1354
|
+
pipeline_details.config.get("database.hostname")
|
|
1355
|
+
or pipeline_details.config.get("database.server")
|
|
1356
|
+
or pipeline_details.config.get("connection.host")
|
|
1357
|
+
or "NOT SET"
|
|
1358
|
+
)
|
|
1359
|
+
|
|
1360
|
+
# Build debug message with what we searched for
|
|
1361
|
+
if (
|
|
1362
|
+
"topic_info" in locals()
|
|
1363
|
+
and topic_info.get("database")
|
|
1364
|
+
and topic_info.get("table")
|
|
1365
|
+
):
|
|
1366
|
+
search_details = (
|
|
1367
|
+
f"{topic_info['database']}.{topic_info['table']}"
|
|
1368
|
+
)
|
|
1369
|
+
if db_service_name_for_debug:
|
|
1370
|
+
table_fqn_str = f"NOT FOUND (service: {db_service_name_for_debug}, searched: {search_details})"
|
|
1371
|
+
else:
|
|
1372
|
+
table_fqn_str = f"NOT FOUND (searched: {search_details}, hostname: {db_hostname_for_debug}, no service matched)"
|
|
1373
|
+
else:
|
|
1374
|
+
table_fqn_str = f"NOT FOUND (hostname: {db_hostname_for_debug}, no CDC topic info)"
|
|
1375
|
+
|
|
1376
|
+
# Get topic FQN for tracking (show expected FQN even if not found)
|
|
1377
|
+
if topic_entity:
|
|
1378
|
+
# Topic exists - use actual FQN
|
|
1379
|
+
topic_fqn_str = (
|
|
1380
|
+
topic_entity.fullyQualifiedName.root
|
|
1381
|
+
if hasattr(topic_entity.fullyQualifiedName, "root")
|
|
1382
|
+
else str(topic_entity.fullyQualifiedName)
|
|
1383
|
+
)
|
|
1384
|
+
else:
|
|
1385
|
+
# Topic not found - construct expected FQN with service name and quoting
|
|
1386
|
+
if effective_messaging_service and topic:
|
|
1387
|
+
# Build expected FQN: service."topic.name"
|
|
1388
|
+
topic_fqn_str = fqn.build(
|
|
1389
|
+
metadata=self.metadata,
|
|
1390
|
+
entity_type=Topic,
|
|
1391
|
+
service_name=effective_messaging_service,
|
|
1392
|
+
topic_name=str(topic.name),
|
|
1393
|
+
)
|
|
1394
|
+
elif topic:
|
|
1395
|
+
# No service configured - show quoted topic name
|
|
1396
|
+
topic_fqn_str = f'"{fqn.quote_name(str(topic.name))}"'
|
|
1397
|
+
else:
|
|
1398
|
+
topic_fqn_str = "NOT FOUND"
|
|
1399
|
+
|
|
1400
|
+
# Track failure reason
|
|
1401
|
+
if current_dataset_entity is None and topic_entity is None:
|
|
1402
|
+
failure_reason = "Missing both table and topic"
|
|
1403
|
+
elif current_dataset_entity is None:
|
|
1404
|
+
failure_reason = "Missing table"
|
|
1405
|
+
else:
|
|
1406
|
+
failure_reason = "Missing topic"
|
|
1407
|
+
|
|
1408
|
+
self.lineage_results.append(
|
|
1409
|
+
{
|
|
1410
|
+
"connector": pipeline_details.name,
|
|
1411
|
+
"table_fqn": table_fqn_str,
|
|
1412
|
+
"topic_fqn": topic_fqn_str,
|
|
1413
|
+
"status": "FAILED",
|
|
1414
|
+
"reason": failure_reason,
|
|
1415
|
+
}
|
|
1416
|
+
)
|
|
1417
|
+
logger.warning("=" * 80)
|
|
1418
|
+
logger.warning(
|
|
1419
|
+
f"⚠️ SKIPPING LINEAGE for connector: {pipeline_details.name}"
|
|
1420
|
+
)
|
|
1421
|
+
logger.warning("=" * 80)
|
|
1422
|
+
|
|
1423
|
+
# Log connector configuration for debugging FQN construction
|
|
1424
|
+
logger.debug(
|
|
1425
|
+
"\n📋 CONNECTOR CONFIGURATION (used for FQN construction):"
|
|
1426
|
+
)
|
|
1427
|
+
if pipeline_details.config:
|
|
1428
|
+
# Extract key config values used for FQN building
|
|
1429
|
+
connector_class = pipeline_details.config.get(
|
|
1430
|
+
"connector.class", "NOT SET"
|
|
1431
|
+
)
|
|
1432
|
+
db_hostname = (
|
|
1433
|
+
pipeline_details.config.get("database.hostname")
|
|
1434
|
+
or pipeline_details.config.get("database.server")
|
|
1435
|
+
or pipeline_details.config.get("connection.host")
|
|
1436
|
+
or "NOT SET"
|
|
1437
|
+
)
|
|
1438
|
+
bootstrap_servers = (
|
|
1439
|
+
pipeline_details.config.get("kafka.bootstrap.servers")
|
|
1440
|
+
or pipeline_details.config.get("bootstrap.servers")
|
|
1441
|
+
or "NOT SET"
|
|
1442
|
+
)
|
|
1443
|
+
table_include_list = (
|
|
1444
|
+
pipeline_details.config.get("table.include.list")
|
|
1445
|
+
or pipeline_details.config.get("table.whitelist")
|
|
1446
|
+
or "NOT SET"
|
|
1447
|
+
)
|
|
1448
|
+
|
|
1449
|
+
logger.debug(
|
|
1450
|
+
f" • connector.class: {connector_class}\n"
|
|
1451
|
+
f" • database.server.name: {database_server_name or 'NOT SET'}\n"
|
|
1452
|
+
f" • database.hostname: {db_hostname}\n"
|
|
1453
|
+
f" • table.include.list: {table_include_list}\n"
|
|
1454
|
+
f" • bootstrap.servers: {bootstrap_servers}\n"
|
|
1455
|
+
f" • Connector type: {pipeline_details.conn_type}"
|
|
1456
|
+
)
|
|
1457
|
+
else:
|
|
1458
|
+
logger.debug(" NO CONFIG AVAILABLE")
|
|
1459
|
+
|
|
1460
|
+
# Build expected topic FQN with proper quoting
|
|
1461
|
+
expected_topic_fqn = None
|
|
1462
|
+
topic_fqn_params = {}
|
|
1463
|
+
if effective_messaging_service:
|
|
1464
|
+
topic_fqn_params = {
|
|
1465
|
+
"service_name": effective_messaging_service,
|
|
1466
|
+
"topic_name": str(topic.name),
|
|
1467
|
+
}
|
|
1468
|
+
expected_topic_fqn = fqn.build(
|
|
1469
|
+
metadata=self.metadata,
|
|
1470
|
+
entity_type=Topic,
|
|
1471
|
+
**topic_fqn_params,
|
|
1472
|
+
)
|
|
1473
|
+
|
|
1474
|
+
# Build expected table FQN if we parsed CDC topic info
|
|
1475
|
+
expected_table_fqn = None
|
|
1476
|
+
table_fqn_params = {}
|
|
1477
|
+
if (
|
|
1478
|
+
"topic_info" in locals()
|
|
1479
|
+
and topic_info.get("database")
|
|
1480
|
+
and topic_info.get("table")
|
|
1481
|
+
):
|
|
1482
|
+
# Get matched database service name
|
|
1483
|
+
(
|
|
1484
|
+
db_service_name_for_log,
|
|
1485
|
+
_,
|
|
1486
|
+
) = self.get_service_from_connector_config(pipeline_details)
|
|
1487
|
+
if db_service_name_for_log:
|
|
1488
|
+
# Use wildcard pattern since we don't know the database name
|
|
1489
|
+
# Pattern: service.*.schema.table
|
|
1490
|
+
table_fqn_params = {
|
|
1491
|
+
"service_name": db_service_name_for_log,
|
|
1492
|
+
"schema_name": topic_info["database"],
|
|
1493
|
+
"table_name": topic_info["table"],
|
|
1494
|
+
"database_name": "* (wildcard - database name unknown)",
|
|
1495
|
+
}
|
|
1496
|
+
expected_table_fqn = f"{fqn.quote_name(db_service_name_for_log)}.*.{fqn.quote_name(topic_info['database'])}.{fqn.quote_name(topic_info['table'])}"
|
|
1497
|
+
|
|
1498
|
+
# Log FQN construction details
|
|
1499
|
+
logger.debug("\n🔧 FQN CONSTRUCTION DETAILS:")
|
|
1500
|
+
if expected_topic_fqn:
|
|
1501
|
+
logger.debug(f" Topic FQN built with: {topic_fqn_params}")
|
|
1502
|
+
logger.debug(f" → Result: {expected_topic_fqn}")
|
|
1503
|
+
else:
|
|
1504
|
+
logger.debug(
|
|
1505
|
+
f" Topic FQN: NOT BUILT (messaging service not configured)"
|
|
1506
|
+
)
|
|
1507
|
+
|
|
1508
|
+
if expected_table_fqn:
|
|
1509
|
+
logger.debug(f" Table FQN built with: {table_fqn_params}")
|
|
1510
|
+
logger.debug(f" → Result: {expected_table_fqn}")
|
|
1511
|
+
elif "topic_info" in locals() and topic_info:
|
|
1512
|
+
logger.debug(
|
|
1513
|
+
f" Table FQN: NOT BUILT (parsed topic_info: {topic_info}, but no db service matched)"
|
|
1514
|
+
)
|
|
1515
|
+
else:
|
|
1516
|
+
logger.debug(
|
|
1517
|
+
f" Table FQN: NOT BUILT (no CDC topic info parsed)"
|
|
1518
|
+
)
|
|
1519
|
+
|
|
1520
|
+
# Get bootstrap servers from config
|
|
1521
|
+
bootstrap_servers = "NOT SET"
|
|
1522
|
+
if pipeline_details.config:
|
|
1523
|
+
bootstrap_servers = (
|
|
1524
|
+
pipeline_details.config.get("kafka.bootstrap.servers")
|
|
1525
|
+
or pipeline_details.config.get("bootstrap.servers")
|
|
1526
|
+
or "NOT SET"
|
|
1527
|
+
)
|
|
1528
|
+
|
|
1529
|
+
if current_dataset_entity is None and topic_entity is None:
|
|
1530
|
+
expected_fqn_display = (
|
|
1531
|
+
expected_topic_fqn
|
|
1532
|
+
or f'<messaging-service>."{topic.name}" (messaging service not configured)'
|
|
1533
|
+
)
|
|
1534
|
+
|
|
1535
|
+
logger.warning(
|
|
1536
|
+
f"❌ MISSING BOTH SOURCE AND SINK:\n"
|
|
1537
|
+
f" • Table: NOT FOUND (searched for table related to topic '{topic.name}')\n"
|
|
1538
|
+
f" • Topic: NOT FOUND (searched for topic '{topic.name}')\n"
|
|
1539
|
+
f"\n"
|
|
1540
|
+
f"💡 ACTION REQUIRED:\n"
|
|
1541
|
+
f" 1. Ensure the topic is ingested in OpenMetadata:\n"
|
|
1542
|
+
f" - Topic name: {topic.name}\n"
|
|
1543
|
+
f" - Expected FQN: {expected_fqn_display}\n"
|
|
1544
|
+
f" - Messaging service: {effective_messaging_service or 'NOT CONFIGURED - will search all services'}\n"
|
|
1545
|
+
f" - Run messaging service metadata ingestion if needed\n"
|
|
1546
|
+
f" 2. Ensure the source table exists in OpenMetadata:\n"
|
|
1547
|
+
f" - Verify database service is connected and metadata is ingested\n"
|
|
1548
|
+
f"\n"
|
|
1549
|
+
f"⚠️ Lineage requires BOTH table and topic to be present in OpenMetadata"
|
|
1550
|
+
)
|
|
1551
|
+
elif current_dataset_entity is None:
|
|
1552
|
+
# Extract topic details
|
|
1553
|
+
topic_service = (
|
|
1554
|
+
topic_entity.service.name
|
|
1555
|
+
if hasattr(topic_entity, "service") and topic_entity.service
|
|
1556
|
+
else "UNKNOWN"
|
|
1557
|
+
)
|
|
1558
|
+
topic_fqn_full = (
|
|
1559
|
+
topic_entity.fullyQualifiedName.root
|
|
1560
|
+
if hasattr(topic_entity.fullyQualifiedName, "root")
|
|
1561
|
+
else topic_entity.fullyQualifiedName
|
|
1562
|
+
)
|
|
1563
|
+
|
|
1564
|
+
logger.warning(
|
|
1565
|
+
f"❌ MISSING SOURCE (Table):\n"
|
|
1566
|
+
f" • Table: NOT FOUND\n"
|
|
1567
|
+
f" • Topic: FOUND ✓\n"
|
|
1568
|
+
f" - FQN: {topic_fqn_full}\n"
|
|
1569
|
+
f" - Service: {topic_service}\n"
|
|
1570
|
+
f" - Topic name: {topic.name}\n"
|
|
1571
|
+
f"\n"
|
|
1572
|
+
f"💡 ACTION REQUIRED:\n"
|
|
1573
|
+
f" 1. Ensure the source table is ingested in OpenMetadata:\n"
|
|
1574
|
+
f" - For CDC connectors: Check table from schema '{topic_info.get('database') if 'topic_info' in locals() else 'UNKNOWN'}'\n"
|
|
1575
|
+
f" - Table name: {topic_info.get('table') if 'topic_info' in locals() else 'UNKNOWN'}\n"
|
|
1576
|
+
f" - Verify the table exists in database service\n"
|
|
1577
|
+
f" 2. Check connector configuration:\n"
|
|
1578
|
+
f" - Connector type: {pipeline_details.conn_type}\n"
|
|
1579
|
+
f" - Database server: {database_server_name or 'NOT SET'}\n"
|
|
1580
|
+
f" - Table include list: {pipeline_details.config.get('table.include.list', 'NOT SET') if pipeline_details.config else 'NO CONFIG'}\n"
|
|
1581
|
+
f"\n"
|
|
1582
|
+
f"⚠️ Cannot create lineage without both source (table) and sink (topic)"
|
|
1583
|
+
)
|
|
1584
|
+
else:
|
|
1585
|
+
# Extract table details - split FQN to show components
|
|
1586
|
+
table_fqn_full = (
|
|
1587
|
+
current_dataset_entity.fullyQualifiedName.root
|
|
1588
|
+
if hasattr(
|
|
1589
|
+
current_dataset_entity.fullyQualifiedName, "root"
|
|
1590
|
+
)
|
|
1591
|
+
else current_dataset_entity.fullyQualifiedName
|
|
1592
|
+
)
|
|
1593
|
+
table_service = (
|
|
1594
|
+
current_dataset_entity.service.name
|
|
1595
|
+
if hasattr(current_dataset_entity, "service")
|
|
1596
|
+
and current_dataset_entity.service
|
|
1597
|
+
else "UNKNOWN"
|
|
1598
|
+
)
|
|
1599
|
+
|
|
1600
|
+
# Parse table FQN: service.database.schema.table
|
|
1601
|
+
table_fqn_parts = str(table_fqn_full).split(".")
|
|
1602
|
+
if len(table_fqn_parts) >= 4:
|
|
1603
|
+
table_db_service = table_fqn_parts[0]
|
|
1604
|
+
table_database = table_fqn_parts[1]
|
|
1605
|
+
table_schema = table_fqn_parts[2]
|
|
1606
|
+
table_name = ".".join(
|
|
1607
|
+
table_fqn_parts[3:]
|
|
1608
|
+
) # Handle quoted names with dots
|
|
1609
|
+
else:
|
|
1610
|
+
table_db_service = table_service
|
|
1611
|
+
table_database = "UNKNOWN"
|
|
1612
|
+
table_schema = "UNKNOWN"
|
|
1613
|
+
table_name = "UNKNOWN"
|
|
1614
|
+
|
|
1615
|
+
expected_topic_fqn_display = (
|
|
1616
|
+
expected_topic_fqn or f'<messaging-service>."{topic.name}"'
|
|
1617
|
+
)
|
|
1618
|
+
|
|
1619
|
+
logger.warning(
|
|
1620
|
+
f"❌ MISSING SINK (Topic):\n"
|
|
1621
|
+
f" • Table: FOUND ✓\n"
|
|
1622
|
+
f" - FQN: {table_fqn_full}\n"
|
|
1623
|
+
f" - Service: {table_db_service}\n"
|
|
1624
|
+
f" - Database: {table_database}\n"
|
|
1625
|
+
f" - Schema: {table_schema}\n"
|
|
1626
|
+
f" - Table: {table_name}\n"
|
|
1627
|
+
f" • Topic: NOT FOUND\n"
|
|
1628
|
+
f" - Searched for: {topic.name}\n"
|
|
1629
|
+
f" - Expected FQN: {expected_topic_fqn_display}\n"
|
|
1630
|
+
f"\n"
|
|
1631
|
+
f"💡 ACTION REQUIRED:\n"
|
|
1632
|
+
f" 1. Ensure the topic is ingested in OpenMetadata:\n"
|
|
1633
|
+
f" - Topic name in Kafka: {topic.name}\n"
|
|
1634
|
+
f" - Expected FQN in OM: {expected_topic_fqn_display}\n"
|
|
1635
|
+
f" - Messaging service: {effective_messaging_service or 'NOT CONFIGURED - will search all services'}\n"
|
|
1636
|
+
f" - Note: Topics with dots (.) in the name are quoted in FQN\n"
|
|
1637
|
+
f" 2. Run messaging service metadata ingestion:\n"
|
|
1638
|
+
f" - Ingest topics from messaging service '{effective_messaging_service or 'your-kafka-service'}'\n"
|
|
1639
|
+
f" - Verify topic '{topic.name}' exists in Kafka cluster\n"
|
|
1640
|
+
f" 3. Check Kafka Connect configuration:\n"
|
|
1641
|
+
f" - Connector type: {pipeline_details.conn_type}\n"
|
|
1642
|
+
f" - Bootstrap servers: {bootstrap_servers}\n"
|
|
1643
|
+
f" - Database server (CDC): {database_server_name or 'NOT SET'}\n"
|
|
1644
|
+
f"\n"
|
|
1645
|
+
f"⚠️ Cannot create lineage without both source (table) and sink (topic)"
|
|
1646
|
+
)
|
|
1647
|
+
|
|
1648
|
+
logger.warning("=" * 80)
|
|
1649
|
+
continue
|
|
1650
|
+
|
|
1651
|
+
# We have both table and topic - create lineage between them
|
|
1652
|
+
logger.info(f"✓ Found both table and topic entities for lineage")
|
|
1653
|
+
|
|
1654
|
+
# Determine lineage direction based on connector type
|
|
1655
|
+
if pipeline_details.conn_type == ConnectorType.SINK.value:
|
|
1656
|
+
# SINK: topic → table
|
|
1657
|
+
from_entity, to_entity = topic_entity, current_dataset_entity
|
|
1658
|
+
logger.info(
|
|
1659
|
+
f"Creating SINK lineage: {topic_entity.fullyQualifiedName.root if hasattr(topic_entity.fullyQualifiedName, 'root') else topic_entity.fullyQualifiedName} "
|
|
1660
|
+
f"→ {current_dataset_entity.fullyQualifiedName.root if hasattr(current_dataset_entity.fullyQualifiedName, 'root') else current_dataset_entity.fullyQualifiedName}"
|
|
1661
|
+
)
|
|
1662
|
+
else:
|
|
1663
|
+
# SOURCE: table → topic
|
|
1664
|
+
from_entity, to_entity = current_dataset_entity, topic_entity
|
|
1665
|
+
logger.info(
|
|
1666
|
+
f"Creating SOURCE lineage: {current_dataset_entity.fullyQualifiedName.root if hasattr(current_dataset_entity.fullyQualifiedName, 'root') else current_dataset_entity.fullyQualifiedName} "
|
|
1667
|
+
f"→ {topic_entity.fullyQualifiedName.root if hasattr(topic_entity.fullyQualifiedName, 'root') else topic_entity.fullyQualifiedName}"
|
|
1668
|
+
)
|
|
1669
|
+
|
|
1670
|
+
# Build column-level lineage (best effort - don't fail entity-level lineage)
|
|
1671
|
+
column_lineage = None
|
|
1672
|
+
try:
|
|
1673
|
+
column_lineage = self.build_column_lineage(
|
|
1674
|
+
from_entity=from_entity,
|
|
1675
|
+
to_entity=to_entity,
|
|
1676
|
+
topic_entity=topic_entity,
|
|
1677
|
+
pipeline_details=pipeline_details,
|
|
1678
|
+
)
|
|
1679
|
+
except Exception as exc:
|
|
1680
|
+
logger.warning(
|
|
1681
|
+
f"Failed to build column-level lineage for {pipeline_details.name}: {exc}. "
|
|
1682
|
+
"Entity-level lineage will still be created."
|
|
1683
|
+
)
|
|
1684
|
+
logger.debug(traceback.format_exc())
|
|
1685
|
+
|
|
1686
|
+
# Log entity details before creating lineage request
|
|
1687
|
+
from_entity_type = type(from_entity).__name__
|
|
1688
|
+
to_entity_type = type(to_entity).__name__
|
|
1689
|
+
from_entity_id = (
|
|
1690
|
+
from_entity.id.root
|
|
1691
|
+
if hasattr(from_entity.id, "root")
|
|
1692
|
+
else from_entity.id
|
|
1693
|
+
)
|
|
1694
|
+
to_entity_id = (
|
|
1695
|
+
to_entity.id.root if hasattr(to_entity.id, "root") else to_entity.id
|
|
1696
|
+
)
|
|
1697
|
+
from_entity_fqn = (
|
|
1698
|
+
from_entity.fullyQualifiedName.root
|
|
1699
|
+
if hasattr(from_entity.fullyQualifiedName, "root")
|
|
1700
|
+
else from_entity.fullyQualifiedName
|
|
1701
|
+
)
|
|
1702
|
+
to_entity_fqn = (
|
|
1703
|
+
to_entity.fullyQualifiedName.root
|
|
1704
|
+
if hasattr(to_entity.fullyQualifiedName, "root")
|
|
1705
|
+
else to_entity.fullyQualifiedName
|
|
1706
|
+
)
|
|
1707
|
+
|
|
1708
|
+
logger.info(
|
|
1709
|
+
f"Creating lineage edge:\n"
|
|
1710
|
+
f" FROM: {from_entity_type} | ID={from_entity_id} | FQN={from_entity_fqn}\n"
|
|
1711
|
+
f" TO: {to_entity_type} | ID={to_entity_id} | FQN={to_entity_fqn}"
|
|
1712
|
+
)
|
|
1713
|
+
|
|
1714
|
+
lineage_details = LineageDetails(
|
|
1715
|
+
pipeline=EntityReference(
|
|
1716
|
+
id=pipeline_entity.id.root, type="pipeline"
|
|
1717
|
+
),
|
|
1718
|
+
source=LineageSource.PipelineLineage,
|
|
1719
|
+
columnsLineage=column_lineage,
|
|
1720
|
+
)
|
|
1721
|
+
|
|
1722
|
+
lineage_request = AddLineageRequest(
|
|
1723
|
+
edge=EntitiesEdge(
|
|
1724
|
+
fromEntity=EntityReference(
|
|
1725
|
+
id=from_entity.id,
|
|
1726
|
+
type=ENTITY_REFERENCE_TYPE_MAP[type(from_entity).__name__],
|
|
1727
|
+
),
|
|
1728
|
+
toEntity=EntityReference(
|
|
1729
|
+
id=to_entity.id,
|
|
1730
|
+
type=ENTITY_REFERENCE_TYPE_MAP[type(to_entity).__name__],
|
|
1731
|
+
),
|
|
1732
|
+
lineageDetails=lineage_details,
|
|
1733
|
+
)
|
|
1734
|
+
)
|
|
1735
|
+
|
|
1736
|
+
# Track successful lineage creation
|
|
1737
|
+
table_fqn_str = (
|
|
1738
|
+
current_dataset_entity.fullyQualifiedName.root
|
|
1739
|
+
if hasattr(current_dataset_entity.fullyQualifiedName, "root")
|
|
1740
|
+
else str(current_dataset_entity.fullyQualifiedName)
|
|
242
1741
|
)
|
|
1742
|
+
topic_fqn_str = (
|
|
1743
|
+
topic_entity.fullyQualifiedName.root
|
|
1744
|
+
if hasattr(topic_entity.fullyQualifiedName, "root")
|
|
1745
|
+
else str(topic_entity.fullyQualifiedName)
|
|
1746
|
+
)
|
|
1747
|
+
self.lineage_results.append(
|
|
1748
|
+
{
|
|
1749
|
+
"connector": pipeline_details.name,
|
|
1750
|
+
"table_fqn": table_fqn_str,
|
|
1751
|
+
"topic_fqn": topic_fqn_str,
|
|
1752
|
+
"status": "SUCCESS",
|
|
1753
|
+
"reason": f"{from_entity_type} → {to_entity_type}",
|
|
1754
|
+
}
|
|
1755
|
+
)
|
|
1756
|
+
|
|
1757
|
+
# Log successful lineage creation (debug level - details in summary table)
|
|
1758
|
+
logger.debug("=" * 80)
|
|
1759
|
+
logger.debug(
|
|
1760
|
+
f"✅ LINEAGE CREATED SUCCESSFULLY for connector: {pipeline_details.name}"
|
|
1761
|
+
)
|
|
1762
|
+
logger.debug("=" * 80)
|
|
1763
|
+
|
|
1764
|
+
# Extract service names for logging
|
|
1765
|
+
from_service = "UNKNOWN"
|
|
1766
|
+
to_service = "UNKNOWN"
|
|
1767
|
+
if hasattr(from_entity, "service") and from_entity.service:
|
|
1768
|
+
from_service = (
|
|
1769
|
+
from_entity.service.name
|
|
1770
|
+
if hasattr(from_entity.service.name, "root")
|
|
1771
|
+
else from_entity.service.name
|
|
1772
|
+
)
|
|
1773
|
+
if hasattr(to_entity, "service") and to_entity.service:
|
|
1774
|
+
to_service = (
|
|
1775
|
+
to_entity.service.name
|
|
1776
|
+
if hasattr(to_entity.service.name, "root")
|
|
1777
|
+
else to_entity.service.name
|
|
1778
|
+
)
|
|
1779
|
+
|
|
1780
|
+
logger.debug(
|
|
1781
|
+
f"📊 LINEAGE DETAILS:\n"
|
|
1782
|
+
f" • FROM: {from_entity_type}\n"
|
|
1783
|
+
f" - FQN: {from_entity_fqn}\n"
|
|
1784
|
+
f" - Service: {from_service}\n"
|
|
1785
|
+
f" • TO: {to_entity_type}\n"
|
|
1786
|
+
f" - FQN: {to_entity_fqn}\n"
|
|
1787
|
+
f" - Service: {to_service}\n"
|
|
1788
|
+
f" • PIPELINE: {pipeline_details.name}\n"
|
|
1789
|
+
f" - Type: {pipeline_details.conn_type}\n"
|
|
1790
|
+
f" - Pipeline FQN: {pipeline_fqn}\n"
|
|
1791
|
+
f" • COLUMN LINEAGE: {len(column_lineage) if column_lineage else 0} column mappings\n"
|
|
1792
|
+
)
|
|
1793
|
+
logger.debug("=" * 80)
|
|
1794
|
+
|
|
1795
|
+
yield Either(right=lineage_request)
|
|
243
1796
|
except Exception as exc:
|
|
244
1797
|
yield Either(
|
|
245
1798
|
left=StackTraceError(
|
|
@@ -291,7 +1844,7 @@ class KafkaconnectSource(PipelineServiceSource):
|
|
|
291
1844
|
pipeline_details.status, StatusType.Pending
|
|
292
1845
|
),
|
|
293
1846
|
taskStatus=task_status,
|
|
294
|
-
timestamp=Timestamp(datetime_to_ts(datetime.now()))
|
|
1847
|
+
timestamp=Timestamp(datetime_to_ts(datetime.now())),
|
|
295
1848
|
# Kafka connect doesn't provide any details with exec time
|
|
296
1849
|
)
|
|
297
1850
|
|
|
@@ -317,3 +1870,54 @@ class KafkaconnectSource(PipelineServiceSource):
|
|
|
317
1870
|
stackTrace=traceback.format_exc(),
|
|
318
1871
|
)
|
|
319
1872
|
)
|
|
1873
|
+
|
|
1874
|
+
def print_lineage_summary(self):
|
|
1875
|
+
"""
|
|
1876
|
+
Print a summary table of lineage creation results
|
|
1877
|
+
"""
|
|
1878
|
+
if not self.lineage_results:
|
|
1879
|
+
return
|
|
1880
|
+
|
|
1881
|
+
logger.info("\n" + "=" * 180)
|
|
1882
|
+
logger.info("LINEAGE CREATION SUMMARY")
|
|
1883
|
+
logger.info("=" * 180)
|
|
1884
|
+
|
|
1885
|
+
# Count successes and failures
|
|
1886
|
+
successes = [r for r in self.lineage_results if r["status"] == "SUCCESS"]
|
|
1887
|
+
failures = [r for r in self.lineage_results if r["status"] == "FAILED"]
|
|
1888
|
+
|
|
1889
|
+
# Print header
|
|
1890
|
+
logger.info(
|
|
1891
|
+
f"{'Connector':<35} | {'Table FQN':<50} | {'Topic FQN':<50} | {'Status':<10} | {'Details':<20}"
|
|
1892
|
+
)
|
|
1893
|
+
logger.info("-" * 180)
|
|
1894
|
+
|
|
1895
|
+
# Print all results
|
|
1896
|
+
for result in self.lineage_results:
|
|
1897
|
+
status_icon = "✅" if result["status"] == "SUCCESS" else "❌"
|
|
1898
|
+
logger.info(
|
|
1899
|
+
f"{result['connector']:<35} | "
|
|
1900
|
+
f"{result['table_fqn']:<50} | "
|
|
1901
|
+
f"{result['topic_fqn']:<50} | "
|
|
1902
|
+
f"{status_icon} {result['status']:<8} | "
|
|
1903
|
+
f"{result['reason']:<20}"
|
|
1904
|
+
)
|
|
1905
|
+
|
|
1906
|
+
# Print summary stats
|
|
1907
|
+
logger.info("=" * 180)
|
|
1908
|
+
total = len(self.lineage_results)
|
|
1909
|
+
success_count = len(successes)
|
|
1910
|
+
failure_count = len(failures)
|
|
1911
|
+
success_pct = (success_count / total * 100) if total > 0 else 0
|
|
1912
|
+
|
|
1913
|
+
logger.info(
|
|
1914
|
+
f"Total: {total} | Success: {success_count} ({success_pct:.1f}%) | Failed: {failure_count}"
|
|
1915
|
+
)
|
|
1916
|
+
logger.info("=" * 180 + "\n")
|
|
1917
|
+
|
|
1918
|
+
def close(self):
|
|
1919
|
+
"""
|
|
1920
|
+
Called at the end of the ingestion workflow to cleanup and print summary
|
|
1921
|
+
"""
|
|
1922
|
+
self.print_lineage_summary()
|
|
1923
|
+
super().close()
|