openmetadata-ingestion 1.3.1.2__py3-none-any.whl → 1.3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openmetadata-ingestion might be problematic. Click here for more details.
- metadata/clients/azure_client.py +85 -0
- metadata/data_quality/source/test_suite.py +2 -2
- metadata/examples/workflows/datalake_azure_default.yaml +29 -0
- metadata/examples/workflows/dbt.yaml +17 -6
- metadata/generated/schema/analytics/__init__.py +1 -1
- metadata/generated/schema/analytics/basic.py +1 -1
- metadata/generated/schema/analytics/reportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
- metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
- metadata/generated/schema/api/__init__.py +1 -1
- metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
- metadata/generated/schema/api/analytics/__init__.py +1 -1
- metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
- metadata/generated/schema/api/automations/__init__.py +1 -1
- metadata/generated/schema/api/automations/createWorkflow.py +1 -1
- metadata/generated/schema/api/bulkAssets.py +1 -1
- metadata/generated/schema/api/classification/__init__.py +1 -1
- metadata/generated/schema/api/classification/createClassification.py +1 -1
- metadata/generated/schema/api/classification/createTag.py +1 -1
- metadata/generated/schema/api/classification/loadTags.py +1 -1
- metadata/generated/schema/api/createBot.py +1 -1
- metadata/generated/schema/api/createEventPublisherJob.py +1 -1
- metadata/generated/schema/api/createType.py +1 -1
- metadata/generated/schema/api/data/__init__.py +1 -1
- metadata/generated/schema/api/data/createChart.py +1 -1
- metadata/generated/schema/api/data/createContainer.py +1 -1
- metadata/generated/schema/api/data/createCustomProperty.py +1 -1
- metadata/generated/schema/api/data/createDashboard.py +1 -1
- metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
- metadata/generated/schema/api/data/createDatabase.py +1 -1
- metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
- metadata/generated/schema/api/data/createGlossary.py +1 -1
- metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
- metadata/generated/schema/api/data/createMlModel.py +1 -1
- metadata/generated/schema/api/data/createPipeline.py +1 -1
- metadata/generated/schema/api/data/createQuery.py +1 -1
- metadata/generated/schema/api/data/createSearchIndex.py +1 -1
- metadata/generated/schema/api/data/createStoredProcedure.py +1 -1
- metadata/generated/schema/api/data/createTable.py +1 -1
- metadata/generated/schema/api/data/createTableProfile.py +1 -1
- metadata/generated/schema/api/data/createTopic.py +1 -1
- metadata/generated/schema/api/data/loadGlossary.py +1 -1
- metadata/generated/schema/api/data/restoreEntity.py +1 -1
- metadata/generated/schema/api/dataInsight/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
- metadata/generated/schema/api/docStore/__init__.py +1 -1
- metadata/generated/schema/api/docStore/createDocument.py +1 -1
- metadata/generated/schema/api/domains/__init__.py +1 -1
- metadata/generated/schema/api/domains/createDataProduct.py +1 -1
- metadata/generated/schema/api/domains/createDomain.py +1 -1
- metadata/generated/schema/api/feed/__init__.py +1 -1
- metadata/generated/schema/api/feed/closeTask.py +1 -1
- metadata/generated/schema/api/feed/createPost.py +1 -1
- metadata/generated/schema/api/feed/createSuggestion.py +1 -1
- metadata/generated/schema/api/feed/createThread.py +5 -1
- metadata/generated/schema/api/feed/resolveTask.py +1 -1
- metadata/generated/schema/api/feed/threadCount.py +1 -1
- metadata/generated/schema/api/lineage/__init__.py +1 -1
- metadata/generated/schema/api/lineage/addLineage.py +1 -1
- metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
- metadata/generated/schema/api/policies/__init__.py +1 -1
- metadata/generated/schema/api/policies/createPolicy.py +1 -1
- metadata/generated/schema/api/services/__init__.py +1 -1
- metadata/generated/schema/api/services/createDashboardService.py +1 -1
- metadata/generated/schema/api/services/createDatabaseService.py +1 -1
- metadata/generated/schema/api/services/createMessagingService.py +1 -1
- metadata/generated/schema/api/services/createMetadataService.py +1 -1
- metadata/generated/schema/api/services/createMlModelService.py +1 -1
- metadata/generated/schema/api/services/createPipelineService.py +1 -1
- metadata/generated/schema/api/services/createSearchService.py +1 -1
- metadata/generated/schema/api/services/createStorageService.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +1 -1
- metadata/generated/schema/api/setOwner.py +1 -1
- metadata/generated/schema/api/teams/__init__.py +1 -1
- metadata/generated/schema/api/teams/createPersona.py +1 -1
- metadata/generated/schema/api/teams/createRole.py +1 -1
- metadata/generated/schema/api/teams/createTeam.py +1 -1
- metadata/generated/schema/api/teams/createUser.py +1 -1
- metadata/generated/schema/api/tests/__init__.py +1 -1
- metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
- metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
- metadata/generated/schema/api/tests/createTestCase.py +1 -1
- metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
- metadata/generated/schema/api/tests/createTestDefinition.py +1 -1
- metadata/generated/schema/api/tests/createTestSuite.py +1 -1
- metadata/generated/schema/api/voteRequest.py +1 -1
- metadata/generated/schema/auth/__init__.py +1 -1
- metadata/generated/schema/auth/basicAuth.py +1 -1
- metadata/generated/schema/auth/basicLoginRequest.py +1 -1
- metadata/generated/schema/auth/changePasswordRequest.py +1 -1
- metadata/generated/schema/auth/createPersonalToken.py +1 -1
- metadata/generated/schema/auth/emailRequest.py +1 -1
- metadata/generated/schema/auth/emailVerificationToken.py +1 -1
- metadata/generated/schema/auth/generateToken.py +1 -1
- metadata/generated/schema/auth/jwtAuth.py +1 -1
- metadata/generated/schema/auth/loginRequest.py +1 -1
- metadata/generated/schema/auth/logoutRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetToken.py +1 -1
- metadata/generated/schema/auth/personalAccessToken.py +1 -1
- metadata/generated/schema/auth/refreshToken.py +1 -1
- metadata/generated/schema/auth/registrationRequest.py +1 -1
- metadata/generated/schema/auth/revokePersonalToken.py +1 -1
- metadata/generated/schema/auth/revokeToken.py +1 -1
- metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
- metadata/generated/schema/auth/ssoAuth.py +1 -1
- metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
- metadata/generated/schema/configuration/__init__.py +1 -1
- metadata/generated/schema/configuration/appsPrivateConfiguration.py +6 -2
- metadata/generated/schema/configuration/authConfig.py +1 -1
- metadata/generated/schema/configuration/authenticationConfiguration.py +13 -2
- metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
- metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/dataQualityConfiguration.py +1 -1
- metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
- metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
- metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
- metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
- metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
- metadata/generated/schema/configuration/loginConfiguration.py +1 -1
- metadata/generated/schema/configuration/logoConfiguration.py +1 -1
- metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
- metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
- metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
- metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
- metadata/generated/schema/dataInsight/__init__.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
- metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
- metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
- metadata/generated/schema/dataInsight/type/__init__.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithDescriptionByType.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithOwnerByType.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfServicesWithDescription.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfServicesWithOwner.py +1 -1
- metadata/generated/schema/dataInsight/type/totalEntitiesByTier.py +1 -1
- metadata/generated/schema/dataInsight/type/totalEntitiesByType.py +1 -1
- metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
- metadata/generated/schema/email/__init__.py +1 -1
- metadata/generated/schema/email/emailRequest.py +1 -1
- metadata/generated/schema/email/smtpSettings.py +1 -1
- metadata/generated/schema/entity/__init__.py +1 -1
- metadata/generated/schema/entity/applications/__init__.py +1 -1
- metadata/generated/schema/entity/applications/app.py +12 -2
- metadata/generated/schema/entity/applications/appRunRecord.py +2 -7
- metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/applicationConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/autoTaggerAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/metaPilotAppConfig.py +8 -17
- metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/metaPilotAppPrivateConfig.py +1 -1
- metadata/generated/schema/entity/applications/createAppRequest.py +2 -2
- metadata/generated/schema/entity/applications/jobStatus.py +1 -1
- metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +4 -1
- metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +5 -2
- metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
- metadata/generated/schema/entity/automations/__init__.py +1 -1
- metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
- metadata/generated/schema/entity/automations/workflow.py +1 -1
- metadata/generated/schema/entity/bot.py +1 -1
- metadata/generated/schema/entity/classification/__init__.py +1 -1
- metadata/generated/schema/entity/classification/classification.py +1 -1
- metadata/generated/schema/entity/classification/tag.py +1 -1
- metadata/generated/schema/entity/data/__init__.py +1 -1
- metadata/generated/schema/entity/data/chart.py +1 -1
- metadata/generated/schema/entity/data/container.py +1 -1
- metadata/generated/schema/entity/data/dashboard.py +1 -1
- metadata/generated/schema/entity/data/dashboardDataModel.py +1 -1
- metadata/generated/schema/entity/data/database.py +1 -1
- metadata/generated/schema/entity/data/databaseSchema.py +1 -1
- metadata/generated/schema/entity/data/glossary.py +1 -1
- metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
- metadata/generated/schema/entity/data/metrics.py +1 -1
- metadata/generated/schema/entity/data/mlmodel.py +1 -1
- metadata/generated/schema/entity/data/pipeline.py +1 -1
- metadata/generated/schema/entity/data/query.py +1 -1
- metadata/generated/schema/entity/data/report.py +1 -1
- metadata/generated/schema/entity/data/searchIndex.py +1 -1
- metadata/generated/schema/entity/data/storedProcedure.py +1 -1
- metadata/generated/schema/entity/data/table.py +1 -1
- metadata/generated/schema/entity/data/topic.py +1 -1
- metadata/generated/schema/entity/docStore/__init__.py +1 -1
- metadata/generated/schema/entity/docStore/document.py +1 -1
- metadata/generated/schema/entity/domains/__init__.py +1 -1
- metadata/generated/schema/entity/domains/dataProduct.py +1 -1
- metadata/generated/schema/entity/domains/domain.py +1 -1
- metadata/generated/schema/entity/events/__init__.py +1 -1
- metadata/generated/schema/entity/events/webhook.py +1 -1
- metadata/generated/schema/entity/feed/__init__.py +1 -1
- metadata/generated/schema/entity/feed/suggestion.py +1 -1
- metadata/generated/schema/entity/feed/thread.py +12 -1
- metadata/generated/schema/entity/policies/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
- metadata/generated/schema/entity/policies/filters.py +1 -1
- metadata/generated/schema/entity/policies/policy.py +1 -1
- metadata/generated/schema/entity/services/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/connectionBasicType.py +6 -1
- metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/mstrConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +34 -1
- metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/azureConfig.py +20 -0
- metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +7 -3
- metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/postgresConnection.py +7 -3
- metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/__init__.py +3 -0
- metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaHDBConnection.py +20 -0
- metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaSQLConnection.py +36 -0
- metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +6 -40
- metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +34 -1
- metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
- metadata/generated/schema/entity/services/dashboardService.py +1 -1
- metadata/generated/schema/entity/services/databaseService.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
- metadata/generated/schema/entity/services/messagingService.py +1 -1
- metadata/generated/schema/entity/services/metadataService.py +1 -1
- metadata/generated/schema/entity/services/mlmodelService.py +1 -1
- metadata/generated/schema/entity/services/pipelineService.py +1 -1
- metadata/generated/schema/entity/services/searchService.py +1 -1
- metadata/generated/schema/entity/services/serviceType.py +1 -1
- metadata/generated/schema/entity/services/storageService.py +1 -1
- metadata/generated/schema/entity/teams/__init__.py +1 -1
- metadata/generated/schema/entity/teams/persona.py +1 -1
- metadata/generated/schema/entity/teams/role.py +1 -1
- metadata/generated/schema/entity/teams/team.py +1 -1
- metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
- metadata/generated/schema/entity/teams/user.py +1 -1
- metadata/generated/schema/entity/type.py +1 -1
- metadata/generated/schema/entity/utils/__init__.py +1 -1
- metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
- metadata/generated/schema/entity/utils/servicesCount.py +1 -1
- metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
- metadata/generated/schema/events/__init__.py +1 -1
- metadata/generated/schema/events/alertMetrics.py +1 -1
- metadata/generated/schema/events/api/__init__.py +1 -1
- metadata/generated/schema/events/api/createEventSubscription.py +1 -1
- metadata/generated/schema/events/emailAlertConfig.py +1 -1
- metadata/generated/schema/events/eventFilterRule.py +1 -1
- metadata/generated/schema/events/eventSubscription.py +1 -1
- metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
- metadata/generated/schema/events/failedEvent.py +1 -1
- metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
- metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
- metadata/generated/schema/metadataIngestion/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/application.py +1 -1
- metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtPipeline.py +9 -11
- metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +7 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +7 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +7 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +7 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +7 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +7 -1
- metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/workflow.py +1 -1
- metadata/generated/schema/monitoring/__init__.py +1 -1
- metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
- metadata/generated/schema/security/__init__.py +1 -1
- metadata/generated/schema/security/client/__init__.py +1 -1
- metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/oidcClientConfig.py +46 -0
- metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
- metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
- metadata/generated/schema/security/credentials/__init__.py +1 -1
- metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
- metadata/generated/schema/security/credentials/azureCredentials.py +6 -1
- metadata/generated/schema/security/credentials/basicAuth.py +1 -1
- metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpCredentials.py +7 -3
- metadata/generated/schema/security/credentials/gcpExternalAccount.py +37 -0
- metadata/generated/schema/security/credentials/gcpValues.py +2 -2
- metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
- metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
- metadata/generated/schema/security/secrets/__init__.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerProvider.py +1 -1
- metadata/generated/schema/security/securityConfiguration.py +1 -1
- metadata/generated/schema/security/ssl/__init__.py +1 -1
- metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
- metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
- metadata/generated/schema/settings/__init__.py +1 -1
- metadata/generated/schema/settings/settings.py +1 -1
- metadata/generated/schema/system/__init__.py +1 -1
- metadata/generated/schema/system/entityError.py +1 -1
- metadata/generated/schema/system/eventPublisherJob.py +1 -1
- metadata/generated/schema/system/indexingError.py +1 -1
- metadata/generated/schema/system/ui/__init__.py +1 -1
- metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
- metadata/generated/schema/system/ui/page.py +1 -1
- metadata/generated/schema/system/validationResponse.py +43 -0
- metadata/generated/schema/tests/__init__.py +1 -1
- metadata/generated/schema/tests/assigned.py +1 -1
- metadata/generated/schema/tests/basic.py +1 -1
- metadata/generated/schema/tests/customMetric.py +1 -1
- metadata/generated/schema/tests/resolved.py +1 -1
- metadata/generated/schema/tests/testCase.py +1 -1
- metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
- metadata/generated/schema/tests/testDefinition.py +1 -1
- metadata/generated/schema/tests/testSuite.py +1 -1
- metadata/generated/schema/type/__init__.py +1 -1
- metadata/generated/schema/type/auditLog.py +1 -1
- metadata/generated/schema/type/basic.py +1 -1
- metadata/generated/schema/type/bulkOperationResult.py +1 -1
- metadata/generated/schema/type/changeEvent.py +1 -1
- metadata/generated/schema/type/changeEventType.py +1 -1
- metadata/generated/schema/type/collectionDescriptor.py +1 -1
- metadata/generated/schema/type/csvDocumentation.py +1 -1
- metadata/generated/schema/type/csvErrorType.py +1 -1
- metadata/generated/schema/type/csvFile.py +1 -1
- metadata/generated/schema/type/csvImportResult.py +1 -1
- metadata/generated/schema/type/customProperties/__init__.py +1 -1
- metadata/generated/schema/type/customProperties/enumConfig.py +1 -1
- metadata/generated/schema/type/customProperty.py +1 -1
- metadata/generated/schema/type/dailyCount.py +1 -1
- metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
- metadata/generated/schema/type/entityHistory.py +1 -1
- metadata/generated/schema/type/entityLineage.py +1 -1
- metadata/generated/schema/type/entityReference.py +1 -1
- metadata/generated/schema/type/entityReferenceList.py +1 -1
- metadata/generated/schema/type/entityRelationship.py +1 -1
- metadata/generated/schema/type/entityUsage.py +1 -1
- metadata/generated/schema/type/filterPattern.py +1 -1
- metadata/generated/schema/type/function.py +1 -1
- metadata/generated/schema/type/include.py +1 -1
- metadata/generated/schema/type/jdbcConnection.py +1 -1
- metadata/generated/schema/type/lifeCycle.py +1 -1
- metadata/generated/schema/type/paging.py +1 -1
- metadata/generated/schema/type/profile.py +1 -1
- metadata/generated/schema/type/queryParserData.py +1 -1
- metadata/generated/schema/type/reaction.py +1 -1
- metadata/generated/schema/type/schedule.py +1 -1
- metadata/generated/schema/type/schema.py +1 -1
- metadata/generated/schema/type/tableQuery.py +1 -1
- metadata/generated/schema/type/tableUsageCount.py +1 -1
- metadata/generated/schema/type/tagLabel.py +1 -1
- metadata/generated/schema/type/usageDetails.py +1 -1
- metadata/generated/schema/type/usageRequest.py +1 -1
- metadata/generated/schema/type/votes.py +1 -1
- metadata/great_expectations/action.py +5 -3
- metadata/ingestion/api/parser.py +87 -23
- metadata/ingestion/api/topology_runner.py +4 -3
- metadata/ingestion/lineage/parser.py +35 -20
- metadata/ingestion/ometa/mixins/patch_mixin.py +2 -4
- metadata/ingestion/ometa/routes.py +15 -0
- metadata/ingestion/source/dashboard/looker/utils.py +1 -1
- metadata/ingestion/source/dashboard/powerbi/client.py +4 -1
- metadata/ingestion/source/database/azuresql/connection.py +21 -3
- metadata/ingestion/source/database/datalake/connection.py +2 -14
- metadata/ingestion/source/database/datalake/metadata.py +3 -2
- metadata/ingestion/source/database/dbt/dbt_config.py +3 -15
- metadata/ingestion/source/database/mssql/lineage.py +1 -0
- metadata/ingestion/source/database/mssql/usage.py +5 -1
- metadata/ingestion/source/database/mysql/connection.py +14 -0
- metadata/ingestion/source/database/oracle/connection.py +5 -0
- metadata/ingestion/source/database/oracle/queries.py +1 -1
- metadata/ingestion/source/database/postgres/connection.py +15 -0
- metadata/ingestion/source/database/stored_procedures_mixin.py +1 -1
- metadata/ingestion/source/database/unitycatalog/connection.py +12 -8
- metadata/ingestion/source/pipeline/dagster/metadata.py +3 -1
- metadata/ingestion/source/storage/storage_service.py +5 -2
- metadata/parsers/json_schema_parser.py +17 -7
- metadata/pii/scanners/ner_scanner.py +5 -3
- metadata/profiler/interface/profiler_interface.py +4 -4
- metadata/profiler/processor/sample_data_handler.py +45 -8
- metadata/profiler/source/databricks/profiler_source.py +36 -0
- metadata/profiler/source/metadata.py +7 -1
- metadata/profiler/source/profiler_source_factory.py +8 -0
- metadata/readers/dataframe/json.py +11 -6
- metadata/readers/dataframe/models.py +1 -0
- metadata/utils/credentials.py +36 -19
- metadata/utils/datalake/datalake_utils.py +34 -4
- metadata/utils/secrets/azure_kv_secrets_manager.py +3 -19
- metadata/utils/source_hash.py +23 -13
- metadata/utils/storage_metadata_config.py +2 -15
- metadata/workflow/base.py +1 -1
- {openmetadata_ingestion-1.3.1.2.dist-info → openmetadata_ingestion-1.3.2.0.dist-info}/METADATA +289 -289
- {openmetadata_ingestion-1.3.1.2.dist-info → openmetadata_ingestion-1.3.2.0.dist-info}/RECORD +562 -552
- /metadata/examples/workflows/{datalake_azure.yaml → datalake_azure_client_secret.yaml} +0 -0
- {openmetadata_ingestion-1.3.1.2.dist-info → openmetadata_ingestion-1.3.2.0.dist-info}/LICENSE +0 -0
- {openmetadata_ingestion-1.3.1.2.dist-info → openmetadata_ingestion-1.3.2.0.dist-info}/WHEEL +0 -0
- {openmetadata_ingestion-1.3.1.2.dist-info → openmetadata_ingestion-1.3.2.0.dist-info}/entry_points.txt +0 -0
- {openmetadata_ingestion-1.3.1.2.dist-info → openmetadata_ingestion-1.3.2.0.dist-info}/top_level.txt +0 -0
|
@@ -260,7 +260,7 @@ class StorageServiceSource(TopologyRunnerMixin, Source, ABC):
|
|
|
260
260
|
metadata_entry: MetadataEntry,
|
|
261
261
|
) -> List[Column]:
|
|
262
262
|
"""Extract Column related metadata from s3"""
|
|
263
|
-
data_structure_details = fetch_dataframe(
|
|
263
|
+
data_structure_details, raw_data = fetch_dataframe(
|
|
264
264
|
config_source=config_source,
|
|
265
265
|
client=client,
|
|
266
266
|
file_fqn=DatalakeTableSchemaWrapper(
|
|
@@ -269,10 +269,13 @@ class StorageServiceSource(TopologyRunnerMixin, Source, ABC):
|
|
|
269
269
|
file_extension=SupportedTypes(metadata_entry.structureFormat),
|
|
270
270
|
separator=metadata_entry.separator,
|
|
271
271
|
),
|
|
272
|
+
fetch_raw_data=True,
|
|
272
273
|
)
|
|
273
274
|
columns = []
|
|
274
275
|
column_parser = DataFrameColumnParser.create(
|
|
275
|
-
data_structure_details,
|
|
276
|
+
data_structure_details,
|
|
277
|
+
SupportedTypes(metadata_entry.structureFormat),
|
|
278
|
+
raw_data=raw_data,
|
|
276
279
|
)
|
|
277
280
|
columns = column_parser.get_columns()
|
|
278
281
|
return columns
|
|
@@ -18,6 +18,8 @@ import traceback
|
|
|
18
18
|
from enum import Enum
|
|
19
19
|
from typing import List, Optional
|
|
20
20
|
|
|
21
|
+
from pydantic.main import ModelMetaclass
|
|
22
|
+
|
|
21
23
|
from metadata.generated.schema.type.schema import FieldModel
|
|
22
24
|
from metadata.utils.logger import ingestion_logger
|
|
23
25
|
|
|
@@ -36,20 +38,25 @@ class JsonSchemaDataTypes(Enum):
|
|
|
36
38
|
NULL = "null"
|
|
37
39
|
RECORD = "object"
|
|
38
40
|
ARRAY = "array"
|
|
41
|
+
UNKNOWN = "unknown"
|
|
39
42
|
|
|
40
43
|
|
|
41
|
-
def parse_json_schema(
|
|
44
|
+
def parse_json_schema(
|
|
45
|
+
schema_text: str, cls: ModelMetaclass = FieldModel
|
|
46
|
+
) -> Optional[List[FieldModel]]:
|
|
42
47
|
"""
|
|
43
48
|
Method to parse the jsonschema
|
|
44
49
|
"""
|
|
45
50
|
try:
|
|
46
51
|
json_schema_data = json.loads(schema_text)
|
|
47
52
|
field_models = [
|
|
48
|
-
|
|
53
|
+
cls(
|
|
49
54
|
name=json_schema_data.get("title", "default"),
|
|
50
55
|
dataType=JsonSchemaDataTypes(json_schema_data.get("type")).name,
|
|
51
56
|
description=json_schema_data.get("description"),
|
|
52
|
-
children=get_json_schema_fields(
|
|
57
|
+
children=get_json_schema_fields(
|
|
58
|
+
json_schema_data.get("properties", {}), cls=cls
|
|
59
|
+
),
|
|
53
60
|
)
|
|
54
61
|
]
|
|
55
62
|
return field_models
|
|
@@ -59,7 +66,9 @@ def parse_json_schema(schema_text: str) -> Optional[List[FieldModel]]:
|
|
|
59
66
|
return None
|
|
60
67
|
|
|
61
68
|
|
|
62
|
-
def get_json_schema_fields(
|
|
69
|
+
def get_json_schema_fields(
|
|
70
|
+
properties, cls: ModelMetaclass = FieldModel
|
|
71
|
+
) -> Optional[List[FieldModel]]:
|
|
63
72
|
"""
|
|
64
73
|
Recursively convert the parsed schema into required models
|
|
65
74
|
"""
|
|
@@ -67,9 +76,10 @@ def get_json_schema_fields(properties) -> Optional[List[FieldModel]]:
|
|
|
67
76
|
for key, value in properties.items():
|
|
68
77
|
try:
|
|
69
78
|
field_models.append(
|
|
70
|
-
|
|
71
|
-
name=
|
|
72
|
-
|
|
79
|
+
cls(
|
|
80
|
+
name=key,
|
|
81
|
+
displayName=value.get("title"),
|
|
82
|
+
dataType=JsonSchemaDataTypes(value.get("type", "unknown")).name,
|
|
73
83
|
description=value.get("description"),
|
|
74
84
|
children=get_json_schema_fields(value.get("properties"))
|
|
75
85
|
if value.get("type") == "object"
|
|
@@ -21,7 +21,7 @@ from pydantic import BaseModel
|
|
|
21
21
|
|
|
22
22
|
from metadata.generated.schema.entity.classification.tag import Tag
|
|
23
23
|
from metadata.pii.constants import PII, SPACY_EN_MODEL
|
|
24
|
-
from metadata.pii.models import TagAndConfidence
|
|
24
|
+
from metadata.pii.models import TagAndConfidence
|
|
25
25
|
from metadata.pii.ner import NEREntity
|
|
26
26
|
from metadata.utils import fqn
|
|
27
27
|
from metadata.utils.logger import pii_logger
|
|
@@ -119,13 +119,15 @@ class NERScanner:
|
|
|
119
119
|
|
|
120
120
|
if entities_score:
|
|
121
121
|
label, score = self.get_highest_score_label(entities_score)
|
|
122
|
-
tag_type = NEREntity.__members__.get(label
|
|
122
|
+
tag_type = NEREntity.__members__.get(label)
|
|
123
|
+
if not tag_type:
|
|
124
|
+
return None
|
|
123
125
|
return TagAndConfidence(
|
|
124
126
|
tag_fqn=fqn.build(
|
|
125
127
|
metadata=None,
|
|
126
128
|
entity_type=Tag,
|
|
127
129
|
classification_name=PII,
|
|
128
|
-
tag_name=tag_type,
|
|
130
|
+
tag_name=tag_type.value,
|
|
129
131
|
),
|
|
130
132
|
confidence=score,
|
|
131
133
|
)
|
|
@@ -33,7 +33,7 @@ from metadata.generated.schema.entity.data.table import (
|
|
|
33
33
|
TableData,
|
|
34
34
|
)
|
|
35
35
|
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
|
|
36
|
-
|
|
36
|
+
DataStorageConfig,
|
|
37
37
|
)
|
|
38
38
|
from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
|
|
39
39
|
DatalakeConnection,
|
|
@@ -93,7 +93,7 @@ class ProfilerInterface(ABC):
|
|
|
93
93
|
service_connection_config: Union[DatabaseConnection, DatalakeConnection],
|
|
94
94
|
ometa_client: OpenMetadata,
|
|
95
95
|
entity: Table,
|
|
96
|
-
storage_config:
|
|
96
|
+
storage_config: DataStorageConfig,
|
|
97
97
|
profile_sample_config: Optional[ProfileSampleConfig],
|
|
98
98
|
source_config: DatabaseServiceProfilerPipeline,
|
|
99
99
|
sample_query: Optional[str],
|
|
@@ -248,7 +248,7 @@ class ProfilerInterface(ABC):
|
|
|
248
248
|
DatabaseProfilerConfig,
|
|
249
249
|
DatabaseAndSchemaConfig,
|
|
250
250
|
]
|
|
251
|
-
):
|
|
251
|
+
) -> Optional[DataStorageConfig]:
|
|
252
252
|
if (
|
|
253
253
|
config
|
|
254
254
|
and config.sampleDataStorageConfig
|
|
@@ -264,7 +264,7 @@ class ProfilerInterface(ABC):
|
|
|
264
264
|
database_profiler_config: Optional[DatabaseProfilerConfig],
|
|
265
265
|
db_service: Optional[DatabaseService],
|
|
266
266
|
profiler_config: ProfilerProcessorConfig,
|
|
267
|
-
) -> Optional[
|
|
267
|
+
) -> Optional[DataStorageConfig]:
|
|
268
268
|
"""Get config for a specific entity
|
|
269
269
|
|
|
270
270
|
Args:
|
|
@@ -17,8 +17,13 @@ from datetime import datetime
|
|
|
17
17
|
from functools import singledispatch
|
|
18
18
|
from io import BytesIO
|
|
19
19
|
|
|
20
|
+
from pydantic.json import ENCODERS_BY_TYPE
|
|
21
|
+
|
|
20
22
|
from metadata.clients.aws_client import AWSClient
|
|
21
23
|
from metadata.generated.schema.entity.data.table import Table, TableData
|
|
24
|
+
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
|
|
25
|
+
DataStorageConfig,
|
|
26
|
+
)
|
|
22
27
|
from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials
|
|
23
28
|
from metadata.profiler.interface.profiler_interface import ProfilerInterface
|
|
24
29
|
from metadata.utils.helpers import clean_uri
|
|
@@ -27,15 +32,45 @@ from metadata.utils.logger import profiler_logger
|
|
|
27
32
|
logger = profiler_logger()
|
|
28
33
|
|
|
29
34
|
|
|
30
|
-
|
|
35
|
+
class PathPatternException(Exception):
|
|
36
|
+
"""
|
|
37
|
+
Exception class need to validate the file path pattern
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def validate_path_pattern(file_path_format: str) -> None:
|
|
42
|
+
if not (
|
|
43
|
+
"{service_name}" in file_path_format
|
|
44
|
+
and "{database_name}" in file_path_format
|
|
45
|
+
and "{database_schema_name}" in file_path_format
|
|
46
|
+
and "{table_name}" in file_path_format
|
|
47
|
+
and file_path_format.endswith(".parquet")
|
|
48
|
+
):
|
|
49
|
+
raise PathPatternException(
|
|
50
|
+
"Please provide a valid path pattern, "
|
|
51
|
+
"the pattern should include these components {service_name}, "
|
|
52
|
+
"{database_name}, {database_schema_name}, {table_name} and "
|
|
53
|
+
"it should end with extension .parquet"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _get_object_key(
|
|
58
|
+
table: Table, prefix: str, overwrite_data: bool, file_path_format: str
|
|
59
|
+
) -> str:
|
|
60
|
+
validate_path_pattern(file_path_format)
|
|
61
|
+
file_name = file_path_format.format(
|
|
62
|
+
service_name=table.service.name,
|
|
63
|
+
database_name=table.database.name,
|
|
64
|
+
database_schema_name=table.databaseSchema.name,
|
|
65
|
+
table_name=table.name.__root__,
|
|
66
|
+
)
|
|
31
67
|
if not overwrite_data:
|
|
32
|
-
file_name =
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
path = str(table.fullyQualifiedName.__root__).replace(".", "/")
|
|
68
|
+
file_name = file_name.replace(
|
|
69
|
+
".parquet", f"_{datetime.now().strftime('%Y_%m_%d')}.parquet"
|
|
70
|
+
)
|
|
36
71
|
if prefix:
|
|
37
|
-
return f"{clean_uri(prefix)}/{
|
|
38
|
-
return
|
|
72
|
+
return f"{clean_uri(prefix)}/{file_name}"
|
|
73
|
+
return file_name
|
|
39
74
|
|
|
40
75
|
|
|
41
76
|
def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -> None:
|
|
@@ -45,9 +80,10 @@ def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -
|
|
|
45
80
|
import pandas as pd # pylint: disable=import-outside-toplevel
|
|
46
81
|
|
|
47
82
|
try:
|
|
48
|
-
sample_storage_config = profiler_interface.storage_config
|
|
83
|
+
sample_storage_config: DataStorageConfig = profiler_interface.storage_config
|
|
49
84
|
if not sample_storage_config:
|
|
50
85
|
return
|
|
86
|
+
ENCODERS_BY_TYPE[bytes] = lambda v: v.decode("utf-8", "ignore")
|
|
51
87
|
deserialized_data = json.loads(data.json())
|
|
52
88
|
df = pd.DataFrame(
|
|
53
89
|
data=deserialized_data.get("rows", []),
|
|
@@ -59,6 +95,7 @@ def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -
|
|
|
59
95
|
table=profiler_interface.table_entity,
|
|
60
96
|
prefix=sample_storage_config.prefix,
|
|
61
97
|
overwrite_data=sample_storage_config.overwriteData,
|
|
98
|
+
file_path_format=sample_storage_config.filePathPattern,
|
|
62
99
|
)
|
|
63
100
|
upload_to_storage(
|
|
64
101
|
sample_storage_config.storageConfig,
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Extend the ProfilerSource class to add support for Databricks is_disconnect SQA method"""
|
|
2
|
+
|
|
3
|
+
from metadata.generated.schema.entity.services.databaseService import DatabaseService
|
|
4
|
+
from metadata.generated.schema.metadataIngestion.workflow import (
|
|
5
|
+
OpenMetadataWorkflowConfig,
|
|
6
|
+
)
|
|
7
|
+
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
8
|
+
from metadata.profiler.source.base.profiler_source import ProfilerSource
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def is_disconnect(self, e, connection, cursor):
|
|
12
|
+
"""is_disconnect method for the Databricks dialect"""
|
|
13
|
+
if "Invalid SessionHandle: SessionHandle" in str(e):
|
|
14
|
+
return True
|
|
15
|
+
return False
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DataBricksProfilerSource(ProfilerSource):
|
|
19
|
+
"""Databricks Profiler source"""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
config: OpenMetadataWorkflowConfig,
|
|
24
|
+
database: DatabaseService,
|
|
25
|
+
ometa_client: OpenMetadata,
|
|
26
|
+
):
|
|
27
|
+
super().__init__(config, database, ometa_client)
|
|
28
|
+
self.set_is_disconnect()
|
|
29
|
+
|
|
30
|
+
def set_is_disconnect(self):
|
|
31
|
+
"""Set the is_disconnect method for the Databricks dialect"""
|
|
32
|
+
from databricks.sqlalchemy import (
|
|
33
|
+
DatabricksDialect, # pylint: disable=import-outside-toplevel
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
DatabricksDialect.is_disconnect = is_disconnect
|
|
@@ -43,6 +43,10 @@ from metadata.utils.logger import profiler_logger
|
|
|
43
43
|
logger = profiler_logger()
|
|
44
44
|
|
|
45
45
|
|
|
46
|
+
TABLE_FIELDS = ["tableProfilerConfig", "columns", "customMetrics"]
|
|
47
|
+
TAGS_FIELD = ["tags"]
|
|
48
|
+
|
|
49
|
+
|
|
46
50
|
class ProfilerSourceAndEntity(BaseModel):
|
|
47
51
|
"""Return class for the OpenMetadata Profiler Source"""
|
|
48
52
|
|
|
@@ -273,7 +277,9 @@ class OpenMetadataSource(Source):
|
|
|
273
277
|
"""
|
|
274
278
|
tables = self.metadata.list_all_entities(
|
|
275
279
|
entity=Table,
|
|
276
|
-
fields=
|
|
280
|
+
fields=TABLE_FIELDS
|
|
281
|
+
if not self.source_config.processPiiSensitive
|
|
282
|
+
else TABLE_FIELDS + TAGS_FIELD,
|
|
277
283
|
params={
|
|
278
284
|
"service": self.config.source.serviceName,
|
|
279
285
|
"database": fqn.build(
|
|
@@ -16,8 +16,12 @@ Factory class for creating profiler source objects
|
|
|
16
16
|
from metadata.generated.schema.entity.services.connections.database.bigQueryConnection import (
|
|
17
17
|
BigqueryType,
|
|
18
18
|
)
|
|
19
|
+
from metadata.generated.schema.entity.services.connections.database.databricksConnection import (
|
|
20
|
+
DatabricksType,
|
|
21
|
+
)
|
|
19
22
|
from metadata.profiler.source.base.profiler_source import ProfilerSource
|
|
20
23
|
from metadata.profiler.source.bigquery.profiler_source import BigQueryProfilerSource
|
|
24
|
+
from metadata.profiler.source.databricks.profiler_source import DataBricksProfilerSource
|
|
21
25
|
|
|
22
26
|
|
|
23
27
|
class ProfilerSourceFactory:
|
|
@@ -44,3 +48,7 @@ profiler_source_factory.register_source(
|
|
|
44
48
|
BigqueryType.BigQuery.value.lower(),
|
|
45
49
|
BigQueryProfilerSource,
|
|
46
50
|
)
|
|
51
|
+
profiler_source_factory.register_source(
|
|
52
|
+
DatabricksType.Databricks.value.lower(),
|
|
53
|
+
DataBricksProfilerSource,
|
|
54
|
+
)
|
|
@@ -16,7 +16,7 @@ import gzip
|
|
|
16
16
|
import io
|
|
17
17
|
import json
|
|
18
18
|
import zipfile
|
|
19
|
-
from typing import List, Union
|
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
20
20
|
|
|
21
21
|
from metadata.readers.dataframe.base import DataFrameReader
|
|
22
22
|
from metadata.readers.dataframe.common import dataframe_to_chunks
|
|
@@ -47,7 +47,7 @@ class JSONDataFrameReader(DataFrameReader):
|
|
|
47
47
|
@staticmethod
|
|
48
48
|
def read_from_json(
|
|
49
49
|
key: str, json_text: bytes, decode: bool = False, **__
|
|
50
|
-
) -> List["DataFrame"]:
|
|
50
|
+
) -> Tuple[List["DataFrame"], Optional[Dict[str, Any]]]:
|
|
51
51
|
"""
|
|
52
52
|
Decompress a JSON file (if needed) and read its contents
|
|
53
53
|
as a dataframe.
|
|
@@ -60,20 +60,25 @@ class JSONDataFrameReader(DataFrameReader):
|
|
|
60
60
|
import pandas as pd
|
|
61
61
|
|
|
62
62
|
json_text = _get_json_text(key=key, text=json_text, decode=decode)
|
|
63
|
+
raw_data = None
|
|
63
64
|
try:
|
|
64
65
|
data = json.loads(json_text)
|
|
66
|
+
if isinstance(data, dict) and data.get("$schema"):
|
|
67
|
+
raw_data = json_text
|
|
65
68
|
except json.decoder.JSONDecodeError:
|
|
66
69
|
logger.debug("Failed to read as JSON object. Trying to read as JSON Lines")
|
|
67
70
|
data = [json.loads(json_obj) for json_obj in json_text.strip().split("\n")]
|
|
68
71
|
|
|
69
72
|
# if we get a scalar value (e.g. {"a":"b"}) then we need to specify the index
|
|
70
73
|
data = data if not isinstance(data, dict) else [data]
|
|
71
|
-
return dataframe_to_chunks(pd.DataFrame.from_records(data))
|
|
74
|
+
return dataframe_to_chunks(pd.DataFrame.from_records(data)), raw_data
|
|
72
75
|
|
|
73
76
|
def _read(self, *, key: str, bucket_name: str, **kwargs) -> DatalakeColumnWrapper:
|
|
74
77
|
text = self.reader.read(key, bucket_name=bucket_name)
|
|
78
|
+
dataframes, raw_data = self.read_from_json(
|
|
79
|
+
key=key, json_text=text, decode=True, **kwargs
|
|
80
|
+
)
|
|
75
81
|
return DatalakeColumnWrapper(
|
|
76
|
-
dataframes=
|
|
77
|
-
|
|
78
|
-
)
|
|
82
|
+
dataframes=dataframes,
|
|
83
|
+
raw_data=raw_data,
|
|
79
84
|
)
|
|
@@ -29,6 +29,7 @@ class DatalakeColumnWrapper(BaseModel):
|
|
|
29
29
|
|
|
30
30
|
columns: Optional[List[Column]]
|
|
31
31
|
dataframes: Optional[List[Any]] # pandas.Dataframe does not have any validators
|
|
32
|
+
raw_data: Any # in special cases like json schema, we need to store the raw data
|
|
32
33
|
|
|
33
34
|
|
|
34
35
|
class DatalakeTableSchemaWrapper(BaseModel):
|
metadata/utils/credentials.py
CHANGED
|
@@ -15,7 +15,7 @@ import base64
|
|
|
15
15
|
import json
|
|
16
16
|
import os
|
|
17
17
|
import tempfile
|
|
18
|
-
from typing import Dict, List, Optional
|
|
18
|
+
from typing import Dict, List, Optional, Union
|
|
19
19
|
|
|
20
20
|
from cryptography.hazmat.primitives import serialization
|
|
21
21
|
from google import auth
|
|
@@ -25,6 +25,9 @@ from metadata.generated.schema.security.credentials.gcpCredentials import (
|
|
|
25
25
|
GCPCredentials,
|
|
26
26
|
GcpCredentialsPath,
|
|
27
27
|
)
|
|
28
|
+
from metadata.generated.schema.security.credentials.gcpExternalAccount import (
|
|
29
|
+
GcpExternalAccount,
|
|
30
|
+
)
|
|
28
31
|
from metadata.generated.schema.security.credentials.gcpValues import (
|
|
29
32
|
GcpCredentialsValues,
|
|
30
33
|
)
|
|
@@ -85,30 +88,44 @@ def create_credential_tmp_file(credentials: dict) -> str:
|
|
|
85
88
|
return temp_file_path
|
|
86
89
|
|
|
87
90
|
|
|
88
|
-
def build_google_credentials_dict(
|
|
91
|
+
def build_google_credentials_dict(
|
|
92
|
+
gcp_values: Union[GcpCredentialsValues, GcpExternalAccount]
|
|
93
|
+
) -> Dict[str, str]:
|
|
89
94
|
"""
|
|
90
95
|
Given GcPCredentialsValues, build a dictionary as the JSON file
|
|
91
96
|
downloaded from GCP with the service_account
|
|
92
97
|
:param gcp_values: GCP credentials
|
|
93
98
|
:return: Dictionary with credentials
|
|
94
99
|
"""
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
100
|
+
if isinstance(gcp_values, GcpCredentialsValues):
|
|
101
|
+
private_key_str = gcp_values.privateKey.get_secret_value()
|
|
102
|
+
# adding the replace string here to escape line break if passed from env
|
|
103
|
+
private_key_str = private_key_str.replace("\\n", "\n")
|
|
104
|
+
validate_private_key(private_key_str)
|
|
105
|
+
|
|
106
|
+
return {
|
|
107
|
+
"type": gcp_values.type,
|
|
108
|
+
"project_id": gcp_values.projectId.__root__,
|
|
109
|
+
"private_key_id": gcp_values.privateKeyId,
|
|
110
|
+
"private_key": private_key_str,
|
|
111
|
+
"client_email": gcp_values.clientEmail,
|
|
112
|
+
"client_id": gcp_values.clientId,
|
|
113
|
+
"auth_uri": str(gcp_values.authUri),
|
|
114
|
+
"token_uri": str(gcp_values.tokenUri),
|
|
115
|
+
"auth_provider_x509_cert_url": str(gcp_values.authProviderX509CertUrl),
|
|
116
|
+
"client_x509_cert_url": str(gcp_values.clientX509CertUrl),
|
|
117
|
+
}
|
|
118
|
+
if isinstance(gcp_values, GcpExternalAccount):
|
|
119
|
+
return {
|
|
120
|
+
"type": gcp_values.externalType,
|
|
121
|
+
"audience": gcp_values.audience,
|
|
122
|
+
"subject_token_type": gcp_values.subjectTokenType,
|
|
123
|
+
"token_url": gcp_values.tokenURL,
|
|
124
|
+
"credential_source": gcp_values.credentialSource,
|
|
125
|
+
}
|
|
126
|
+
raise InvalidGcpConfigException(
|
|
127
|
+
f"Error trying to build GCP credentials dict due to Invalid GCP config {type(gcp_values)}"
|
|
128
|
+
)
|
|
112
129
|
|
|
113
130
|
|
|
114
131
|
def set_google_credentials(gcp_credentials: GCPCredentials) -> None:
|
|
@@ -17,10 +17,11 @@ import ast
|
|
|
17
17
|
import json
|
|
18
18
|
import random
|
|
19
19
|
import traceback
|
|
20
|
-
from typing import Dict, List, Optional, Union, cast
|
|
20
|
+
from typing import Any, Dict, List, Optional, Union, cast
|
|
21
21
|
|
|
22
22
|
from metadata.generated.schema.entity.data.table import Column, DataType
|
|
23
23
|
from metadata.ingestion.source.database.column_helpers import truncate_column_name
|
|
24
|
+
from metadata.parsers.json_schema_parser import parse_json_schema
|
|
24
25
|
from metadata.readers.dataframe.models import (
|
|
25
26
|
DatalakeColumnWrapper,
|
|
26
27
|
DatalakeTableSchemaWrapper,
|
|
@@ -35,6 +36,7 @@ def fetch_dataframe(
|
|
|
35
36
|
config_source,
|
|
36
37
|
client,
|
|
37
38
|
file_fqn: DatalakeTableSchemaWrapper,
|
|
39
|
+
fetch_raw_data: bool = False,
|
|
38
40
|
**kwargs,
|
|
39
41
|
) -> Optional[List["DataFrame"]]:
|
|
40
42
|
"""
|
|
@@ -60,6 +62,8 @@ def fetch_dataframe(
|
|
|
60
62
|
df_wrapper: DatalakeColumnWrapper = df_reader.read(
|
|
61
63
|
key=key, bucket_name=bucket_name, **kwargs
|
|
62
64
|
)
|
|
65
|
+
if fetch_raw_data:
|
|
66
|
+
return df_wrapper.dataframes, df_wrapper.raw_data
|
|
63
67
|
return df_wrapper.dataframes
|
|
64
68
|
except Exception as err:
|
|
65
69
|
logger.error(
|
|
@@ -73,6 +77,8 @@ def fetch_dataframe(
|
|
|
73
77
|
# Here we need to blow things up. Without the dataframe we cannot move forward
|
|
74
78
|
raise err
|
|
75
79
|
|
|
80
|
+
if fetch_raw_data:
|
|
81
|
+
return None, None
|
|
76
82
|
return None
|
|
77
83
|
|
|
78
84
|
|
|
@@ -112,6 +118,7 @@ class DataFrameColumnParser:
|
|
|
112
118
|
file_type: Optional[SupportedTypes] = None,
|
|
113
119
|
sample: bool = True,
|
|
114
120
|
shuffle: bool = False,
|
|
121
|
+
raw_data: Any = None,
|
|
115
122
|
):
|
|
116
123
|
"""Instantiate a column parser object with the appropriate parser
|
|
117
124
|
|
|
@@ -126,8 +133,14 @@ class DataFrameColumnParser:
|
|
|
126
133
|
data_frame = cls._get_data_frame(data_frame, sample, shuffle)
|
|
127
134
|
if file_type == SupportedTypes.PARQUET:
|
|
128
135
|
parser = ParquetDataFrameColumnParser(data_frame)
|
|
129
|
-
|
|
130
|
-
|
|
136
|
+
elif file_type in {
|
|
137
|
+
SupportedTypes.JSON,
|
|
138
|
+
SupportedTypes.JSONGZ,
|
|
139
|
+
SupportedTypes.JSONZIP,
|
|
140
|
+
}:
|
|
141
|
+
parser = JsonDataFrameColumnParser(data_frame, raw_data=raw_data)
|
|
142
|
+
else:
|
|
143
|
+
parser = GenericDataFrameColumnParser(data_frame)
|
|
131
144
|
return cls(parser)
|
|
132
145
|
|
|
133
146
|
@staticmethod
|
|
@@ -172,8 +185,9 @@ class GenericDataFrameColumnParser:
|
|
|
172
185
|
"bytes": DataType.BYTES,
|
|
173
186
|
}
|
|
174
187
|
|
|
175
|
-
def __init__(self, data_frame: "DataFrame"):
|
|
188
|
+
def __init__(self, data_frame: "DataFrame", raw_data: Any = None):
|
|
176
189
|
self.data_frame = data_frame
|
|
190
|
+
self.raw_data = raw_data
|
|
177
191
|
|
|
178
192
|
def get_columns(self):
|
|
179
193
|
"""
|
|
@@ -472,3 +486,19 @@ class ParquetDataFrameColumnParser:
|
|
|
472
486
|
data_type = self._data_formats.get(str(column.type), DataType.UNKNOWN)
|
|
473
487
|
|
|
474
488
|
return data_type
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
class JsonDataFrameColumnParser(GenericDataFrameColumnParser):
|
|
492
|
+
"""Given a dataframe object generated from a json file, parse the columns and return a list of Column objects."""
|
|
493
|
+
|
|
494
|
+
def get_columns(self):
|
|
495
|
+
"""
|
|
496
|
+
method to process column details for json files
|
|
497
|
+
"""
|
|
498
|
+
if self.raw_data:
|
|
499
|
+
try:
|
|
500
|
+
return parse_json_schema(schema_text=self.raw_data, cls=Column)
|
|
501
|
+
except Exception as exc:
|
|
502
|
+
logger.warning(f"Unable to parse the json schema: {exc}")
|
|
503
|
+
logger.debug(traceback.format_exc())
|
|
504
|
+
return self._get_columns(self.data_frame)
|
|
@@ -17,9 +17,9 @@ import traceback
|
|
|
17
17
|
from abc import ABC
|
|
18
18
|
from typing import Optional
|
|
19
19
|
|
|
20
|
-
from azure.
|
|
21
|
-
from azure.keyvault.secrets import KeyVaultSecret, SecretClient
|
|
20
|
+
from azure.keyvault.secrets import KeyVaultSecret
|
|
22
21
|
|
|
22
|
+
from metadata.clients.azure_client import AzureClient
|
|
23
23
|
from metadata.generated.schema.security.secrets.secretsManagerClientLoader import (
|
|
24
24
|
SecretsManagerClientLoader,
|
|
25
25
|
)
|
|
@@ -105,23 +105,7 @@ class AzureKVSecretsManager(ExternalSecretsManager, ABC):
|
|
|
105
105
|
):
|
|
106
106
|
super().__init__(provider=SecretsManagerProvider.azure_kv, loader=loader)
|
|
107
107
|
|
|
108
|
-
|
|
109
|
-
self.credentials.tenantId
|
|
110
|
-
and self.credentials.clientId
|
|
111
|
-
and self.credentials.clientSecret
|
|
112
|
-
):
|
|
113
|
-
azure_identity = ClientSecretCredential(
|
|
114
|
-
tenant_id=self.credentials.tenantId,
|
|
115
|
-
client_id=self.credentials.clientId,
|
|
116
|
-
client_secret=self.credentials.clientSecret.get_secret_value(),
|
|
117
|
-
)
|
|
118
|
-
else:
|
|
119
|
-
azure_identity = DefaultAzureCredential()
|
|
120
|
-
|
|
121
|
-
self.client = SecretClient(
|
|
122
|
-
vault_url=f"https://{self.credentials.vaultName}.vault.azure.net/",
|
|
123
|
-
credential=azure_identity,
|
|
124
|
-
)
|
|
108
|
+
self.client = AzureClient(self.credentials).create_secret_client()
|
|
125
109
|
|
|
126
110
|
def get_string_value(self, secret_id: str) -> str:
|
|
127
111
|
"""
|
metadata/utils/source_hash.py
CHANGED
|
@@ -14,9 +14,14 @@ Source hash utils module
|
|
|
14
14
|
"""
|
|
15
15
|
|
|
16
16
|
import hashlib
|
|
17
|
+
import traceback
|
|
17
18
|
from typing import Dict, Optional
|
|
18
19
|
|
|
19
20
|
from metadata.ingestion.ometa.ometa_api import C
|
|
21
|
+
from metadata.utils.logger import utils_logger
|
|
22
|
+
|
|
23
|
+
logger = utils_logger()
|
|
24
|
+
|
|
20
25
|
|
|
21
26
|
SOURCE_HASH_EXCLUDE_FIELDS = {
|
|
22
27
|
"sourceHash": True,
|
|
@@ -25,19 +30,24 @@ SOURCE_HASH_EXCLUDE_FIELDS = {
|
|
|
25
30
|
|
|
26
31
|
def generate_source_hash(
|
|
27
32
|
create_request: C, exclude_fields: Optional[Dict] = None
|
|
28
|
-
) -> str:
|
|
33
|
+
) -> Optional[str]:
|
|
29
34
|
"""
|
|
30
35
|
Given a create_request model convert it to json string and generate a hash value
|
|
31
36
|
"""
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
37
|
+
try:
|
|
38
|
+
# We always want to exclude the sourceHash when generating the fingerprint
|
|
39
|
+
exclude_fields = (
|
|
40
|
+
SOURCE_HASH_EXCLUDE_FIELDS.update(exclude_fields)
|
|
41
|
+
if exclude_fields
|
|
42
|
+
else SOURCE_HASH_EXCLUDE_FIELDS
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
create_request_json = create_request.json(exclude=exclude_fields)
|
|
46
|
+
|
|
47
|
+
json_bytes = create_request_json.encode("utf-8")
|
|
48
|
+
return hashlib.md5(json_bytes).hexdigest()
|
|
49
|
+
|
|
50
|
+
except Exception as exc:
|
|
51
|
+
logger.warning(f"Failed to generate source hash due to - {exc}")
|
|
52
|
+
logger.debug(traceback.format_exc())
|
|
53
|
+
return None
|
|
@@ -17,6 +17,7 @@ from functools import singledispatch
|
|
|
17
17
|
|
|
18
18
|
import requests
|
|
19
19
|
|
|
20
|
+
from metadata.clients.azure_client import AzureClient
|
|
20
21
|
from metadata.generated.schema.entity.services.connections.database.datalake.azureConfig import (
|
|
21
22
|
AzureConfig,
|
|
22
23
|
)
|
|
@@ -153,21 +154,7 @@ def _(config: StorageMetadataAdlsConfig) -> ManifestMetadataConfig:
|
|
|
153
154
|
else STORAGE_METADATA_MANIFEST_FILE_NAME
|
|
154
155
|
)
|
|
155
156
|
|
|
156
|
-
|
|
157
|
-
ClientSecretCredential,
|
|
158
|
-
)
|
|
159
|
-
from azure.storage.blob import ( # pylint: disable=import-outside-toplevel
|
|
160
|
-
BlobServiceClient,
|
|
161
|
-
)
|
|
162
|
-
|
|
163
|
-
blob_client = BlobServiceClient(
|
|
164
|
-
account_url=f"https://{config.securityConfig.accountName}.blob.core.windows.net/",
|
|
165
|
-
credential=ClientSecretCredential(
|
|
166
|
-
config.securityConfig.tenantId,
|
|
167
|
-
config.securityConfig.clientId,
|
|
168
|
-
config.securityConfig.clientSecret.get_secret_value(),
|
|
169
|
-
),
|
|
170
|
-
)
|
|
157
|
+
blob_client = AzureClient(config.securityConfig).create_blob_client()
|
|
171
158
|
|
|
172
159
|
reader = get_reader(
|
|
173
160
|
config_source=AzureConfig(securityConfig=config.securityConfig),
|