openmetadata-ingestion 1.3.2.0rc1__py3-none-any.whl → 1.3.2.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openmetadata-ingestion might be problematic. Click here for more details.
- metadata/examples/workflows/dbt.yaml +17 -6
- metadata/generated/schema/analytics/__init__.py +1 -1
- metadata/generated/schema/analytics/basic.py +1 -1
- metadata/generated/schema/analytics/reportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/__init__.py +1 -1
- metadata/generated/schema/analytics/reportDataType/aggregatedCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/entityReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/rawCostAnalysisReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticEntityViewReportData.py +1 -1
- metadata/generated/schema/analytics/reportDataType/webAnalyticUserActivityReportData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventData.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/__init__.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/customEvent.py +1 -1
- metadata/generated/schema/analytics/webAnalyticEventType/pageViewEvent.py +1 -1
- metadata/generated/schema/api/__init__.py +1 -1
- metadata/generated/schema/api/addGlossaryToAssetsRequest.py +1 -1
- metadata/generated/schema/api/analytics/__init__.py +1 -1
- metadata/generated/schema/api/analytics/createWebAnalyticEvent.py +1 -1
- metadata/generated/schema/api/automations/__init__.py +1 -1
- metadata/generated/schema/api/automations/createWorkflow.py +1 -1
- metadata/generated/schema/api/bulkAssets.py +1 -1
- metadata/generated/schema/api/classification/__init__.py +1 -1
- metadata/generated/schema/api/classification/createClassification.py +1 -1
- metadata/generated/schema/api/classification/createTag.py +1 -1
- metadata/generated/schema/api/classification/loadTags.py +1 -1
- metadata/generated/schema/api/createBot.py +1 -1
- metadata/generated/schema/api/createEventPublisherJob.py +1 -1
- metadata/generated/schema/api/createType.py +1 -1
- metadata/generated/schema/api/data/__init__.py +1 -1
- metadata/generated/schema/api/data/createChart.py +1 -1
- metadata/generated/schema/api/data/createContainer.py +1 -1
- metadata/generated/schema/api/data/createCustomProperty.py +1 -1
- metadata/generated/schema/api/data/createDashboard.py +1 -1
- metadata/generated/schema/api/data/createDashboardDataModel.py +1 -1
- metadata/generated/schema/api/data/createDatabase.py +1 -1
- metadata/generated/schema/api/data/createDatabaseSchema.py +1 -1
- metadata/generated/schema/api/data/createGlossary.py +1 -1
- metadata/generated/schema/api/data/createGlossaryTerm.py +1 -1
- metadata/generated/schema/api/data/createMlModel.py +1 -1
- metadata/generated/schema/api/data/createPipeline.py +1 -1
- metadata/generated/schema/api/data/createQuery.py +1 -1
- metadata/generated/schema/api/data/createSearchIndex.py +1 -1
- metadata/generated/schema/api/data/createStoredProcedure.py +1 -1
- metadata/generated/schema/api/data/createTable.py +1 -1
- metadata/generated/schema/api/data/createTableProfile.py +1 -1
- metadata/generated/schema/api/data/createTopic.py +1 -1
- metadata/generated/schema/api/data/loadGlossary.py +1 -1
- metadata/generated/schema/api/data/restoreEntity.py +1 -1
- metadata/generated/schema/api/dataInsight/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/createDataInsightChart.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/api/dataInsight/kpi/createKpiRequest.py +1 -1
- metadata/generated/schema/api/docStore/__init__.py +1 -1
- metadata/generated/schema/api/docStore/createDocument.py +1 -1
- metadata/generated/schema/api/domains/__init__.py +1 -1
- metadata/generated/schema/api/domains/createDataProduct.py +1 -1
- metadata/generated/schema/api/domains/createDomain.py +1 -1
- metadata/generated/schema/api/feed/__init__.py +1 -1
- metadata/generated/schema/api/feed/closeTask.py +1 -1
- metadata/generated/schema/api/feed/createPost.py +1 -1
- metadata/generated/schema/api/feed/createSuggestion.py +1 -1
- metadata/generated/schema/api/feed/createThread.py +1 -1
- metadata/generated/schema/api/feed/resolveTask.py +1 -1
- metadata/generated/schema/api/feed/threadCount.py +1 -1
- metadata/generated/schema/api/lineage/__init__.py +1 -1
- metadata/generated/schema/api/lineage/addLineage.py +1 -1
- metadata/generated/schema/api/openMetadataServerVersion.py +1 -1
- metadata/generated/schema/api/policies/__init__.py +1 -1
- metadata/generated/schema/api/policies/createPolicy.py +1 -1
- metadata/generated/schema/api/services/__init__.py +1 -1
- metadata/generated/schema/api/services/createDashboardService.py +1 -1
- metadata/generated/schema/api/services/createDatabaseService.py +1 -1
- metadata/generated/schema/api/services/createMessagingService.py +1 -1
- metadata/generated/schema/api/services/createMetadataService.py +1 -1
- metadata/generated/schema/api/services/createMlModelService.py +1 -1
- metadata/generated/schema/api/services/createPipelineService.py +1 -1
- metadata/generated/schema/api/services/createSearchService.py +1 -1
- metadata/generated/schema/api/services/createStorageService.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/api/services/ingestionPipelines/createIngestionPipeline.py +1 -1
- metadata/generated/schema/api/setOwner.py +1 -1
- metadata/generated/schema/api/teams/__init__.py +1 -1
- metadata/generated/schema/api/teams/createPersona.py +1 -1
- metadata/generated/schema/api/teams/createRole.py +1 -1
- metadata/generated/schema/api/teams/createTeam.py +1 -1
- metadata/generated/schema/api/teams/createUser.py +1 -1
- metadata/generated/schema/api/tests/__init__.py +1 -1
- metadata/generated/schema/api/tests/createCustomMetric.py +1 -1
- metadata/generated/schema/api/tests/createLogicalTestCases.py +1 -1
- metadata/generated/schema/api/tests/createTestCase.py +1 -1
- metadata/generated/schema/api/tests/createTestCaseResolutionStatus.py +1 -1
- metadata/generated/schema/api/tests/createTestDefinition.py +1 -1
- metadata/generated/schema/api/tests/createTestSuite.py +1 -1
- metadata/generated/schema/api/voteRequest.py +1 -1
- metadata/generated/schema/auth/__init__.py +1 -1
- metadata/generated/schema/auth/basicAuth.py +1 -1
- metadata/generated/schema/auth/basicLoginRequest.py +1 -1
- metadata/generated/schema/auth/changePasswordRequest.py +1 -1
- metadata/generated/schema/auth/createPersonalToken.py +1 -1
- metadata/generated/schema/auth/emailRequest.py +1 -1
- metadata/generated/schema/auth/emailVerificationToken.py +1 -1
- metadata/generated/schema/auth/generateToken.py +1 -1
- metadata/generated/schema/auth/jwtAuth.py +1 -1
- metadata/generated/schema/auth/loginRequest.py +1 -1
- metadata/generated/schema/auth/logoutRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetRequest.py +1 -1
- metadata/generated/schema/auth/passwordResetToken.py +1 -1
- metadata/generated/schema/auth/personalAccessToken.py +1 -1
- metadata/generated/schema/auth/refreshToken.py +1 -1
- metadata/generated/schema/auth/registrationRequest.py +1 -1
- metadata/generated/schema/auth/revokePersonalToken.py +1 -1
- metadata/generated/schema/auth/revokeToken.py +1 -1
- metadata/generated/schema/auth/serviceTokenEnum.py +1 -1
- metadata/generated/schema/auth/ssoAuth.py +1 -1
- metadata/generated/schema/auth/tokenRefreshRequest.py +1 -1
- metadata/generated/schema/configuration/__init__.py +1 -1
- metadata/generated/schema/configuration/appsPrivateConfiguration.py +1 -1
- metadata/generated/schema/configuration/authConfig.py +1 -1
- metadata/generated/schema/configuration/authenticationConfiguration.py +1 -1
- metadata/generated/schema/configuration/authorizerConfiguration.py +1 -1
- metadata/generated/schema/configuration/changeEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/dataQualityConfiguration.py +1 -1
- metadata/generated/schema/configuration/elasticSearchConfiguration.py +1 -1
- metadata/generated/schema/configuration/eventHandlerConfiguration.py +1 -1
- metadata/generated/schema/configuration/fernetConfiguration.py +1 -1
- metadata/generated/schema/configuration/jwtTokenConfiguration.py +1 -1
- metadata/generated/schema/configuration/kafkaEventConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapConfiguration.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/__init__.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/customTrustManagerConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/hostNameConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/jvmDefaultConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/trustAllConfig.py +1 -1
- metadata/generated/schema/configuration/ldapTrustStoreConfig/truststoreConfig.py +1 -1
- metadata/generated/schema/configuration/loginConfiguration.py +1 -1
- metadata/generated/schema/configuration/logoConfiguration.py +1 -1
- metadata/generated/schema/configuration/pipelineServiceClientConfiguration.py +1 -1
- metadata/generated/schema/configuration/slackAppConfiguration.py +1 -1
- metadata/generated/schema/configuration/taskNotificationConfiguration.py +1 -1
- metadata/generated/schema/configuration/testResultNotificationConfiguration.py +1 -1
- metadata/generated/schema/dataInsight/__init__.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChart.py +1 -1
- metadata/generated/schema/dataInsight/dataInsightChartResult.py +1 -1
- metadata/generated/schema/dataInsight/kpi/__init__.py +1 -1
- metadata/generated/schema/dataInsight/kpi/basic.py +1 -1
- metadata/generated/schema/dataInsight/kpi/kpi.py +1 -1
- metadata/generated/schema/dataInsight/type/__init__.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsCount.py +1 -1
- metadata/generated/schema/dataInsight/type/aggregatedUsedVsUnusedAssetsSize.py +1 -1
- metadata/generated/schema/dataInsight/type/dailyActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostActiveUsers.py +1 -1
- metadata/generated/schema/dataInsight/type/mostViewedEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/pageViewsByEntities.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithDescriptionByType.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfEntitiesWithOwnerByType.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfServicesWithDescription.py +1 -1
- metadata/generated/schema/dataInsight/type/percentageOfServicesWithOwner.py +1 -1
- metadata/generated/schema/dataInsight/type/totalEntitiesByTier.py +1 -1
- metadata/generated/schema/dataInsight/type/totalEntitiesByType.py +1 -1
- metadata/generated/schema/dataInsight/type/unusedAssets.py +1 -1
- metadata/generated/schema/email/__init__.py +1 -1
- metadata/generated/schema/email/emailRequest.py +1 -1
- metadata/generated/schema/email/smtpSettings.py +1 -1
- metadata/generated/schema/entity/__init__.py +1 -1
- metadata/generated/schema/entity/applications/__init__.py +1 -1
- metadata/generated/schema/entity/applications/app.py +6 -1
- metadata/generated/schema/entity/applications/appRunRecord.py +1 -1
- metadata/generated/schema/entity/applications/configuration/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/applicationConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/autoTaggerAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/external/metaPilotAppConfig.py +8 -17
- metadata/generated/schema/entity/applications/configuration/internal/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/dataInsightsReportAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/internal/searchIndexingAppConfig.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/__init__.py +1 -1
- metadata/generated/schema/entity/applications/configuration/private/external/metaPilotAppPrivateConfig.py +1 -1
- metadata/generated/schema/entity/applications/createAppRequest.py +2 -2
- metadata/generated/schema/entity/applications/jobStatus.py +1 -1
- metadata/generated/schema/entity/applications/liveExecutionContext.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/__init__.py +1 -1
- metadata/generated/schema/entity/applications/marketplace/appMarketPlaceDefinition.py +4 -1
- metadata/generated/schema/entity/applications/marketplace/createAppMarketPlaceDefinitionReq.py +5 -2
- metadata/generated/schema/entity/applications/scheduledExecutionContext.py +1 -1
- metadata/generated/schema/entity/automations/__init__.py +1 -1
- metadata/generated/schema/entity/automations/testServiceConnection.py +1 -1
- metadata/generated/schema/entity/automations/workflow.py +1 -1
- metadata/generated/schema/entity/bot.py +1 -1
- metadata/generated/schema/entity/classification/__init__.py +1 -1
- metadata/generated/schema/entity/classification/classification.py +1 -1
- metadata/generated/schema/entity/classification/tag.py +1 -1
- metadata/generated/schema/entity/data/__init__.py +1 -1
- metadata/generated/schema/entity/data/chart.py +1 -1
- metadata/generated/schema/entity/data/container.py +1 -1
- metadata/generated/schema/entity/data/dashboard.py +1 -1
- metadata/generated/schema/entity/data/dashboardDataModel.py +1 -1
- metadata/generated/schema/entity/data/database.py +1 -1
- metadata/generated/schema/entity/data/databaseSchema.py +1 -1
- metadata/generated/schema/entity/data/glossary.py +1 -1
- metadata/generated/schema/entity/data/glossaryTerm.py +1 -1
- metadata/generated/schema/entity/data/metrics.py +1 -1
- metadata/generated/schema/entity/data/mlmodel.py +1 -1
- metadata/generated/schema/entity/data/pipeline.py +1 -1
- metadata/generated/schema/entity/data/query.py +1 -1
- metadata/generated/schema/entity/data/report.py +1 -1
- metadata/generated/schema/entity/data/searchIndex.py +1 -1
- metadata/generated/schema/entity/data/storedProcedure.py +1 -1
- metadata/generated/schema/entity/data/table.py +1 -1
- metadata/generated/schema/entity/data/topic.py +1 -1
- metadata/generated/schema/entity/docStore/__init__.py +1 -1
- metadata/generated/schema/entity/docStore/document.py +1 -1
- metadata/generated/schema/entity/domains/__init__.py +1 -1
- metadata/generated/schema/entity/domains/dataProduct.py +1 -1
- metadata/generated/schema/entity/domains/domain.py +1 -1
- metadata/generated/schema/entity/events/__init__.py +1 -1
- metadata/generated/schema/entity/events/webhook.py +1 -1
- metadata/generated/schema/entity/feed/__init__.py +1 -1
- metadata/generated/schema/entity/feed/suggestion.py +1 -1
- metadata/generated/schema/entity/feed/thread.py +1 -1
- metadata/generated/schema/entity/policies/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/__init__.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourceDescriptor.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/resourcePermission.py +1 -1
- metadata/generated/schema/entity/policies/accessControl/rule.py +1 -1
- metadata/generated/schema/entity/policies/filters.py +1 -1
- metadata/generated/schema/entity/policies/policy.py +1 -1
- metadata/generated/schema/entity/services/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertPaths.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslCertValues.py +1 -1
- metadata/generated/schema/entity/services/connections/common/sslConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/connectionBasicType.py +6 -1
- metadata/generated/schema/entity/services/connections/dashboard/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/customDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/domoDashboardConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lightdashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/lookerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/metabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/modeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/mstrConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/powerBIConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/qlikSenseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/quickSightConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/redashConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/supersetConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/dashboard/tableauConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/athenaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/azureSQLConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigQueryConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/bigTableConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/clickhouseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/iamAuthConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/common/jwtAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/database/couchbaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/customDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/databricksConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/azureConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/gcsConfig.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalake/s3Config.py +1 -1
- metadata/generated/schema/entity/services/connections/database/datalakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/db2Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/deltaLakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/domoDatabaseConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dorisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/druidConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/dynamoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/glueConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/greenplumConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/hiveConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/dynamoDbCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/glueCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/hiveCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergCatalog.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/icebergFileSystem.py +1 -1
- metadata/generated/schema/entity/services/connections/database/iceberg/restCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/icebergConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/impalaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mariaDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mongoDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mssqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/mysqlConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/oracleConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/pinotDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/postgresConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/prestoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/redshiftConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/salesforceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaHDBConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHana/sapHanaSQLConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sapHanaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/singleStoreConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/snowflakeConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/sqliteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/trinoConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/unityCatalogConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/database/verticaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/customMessagingConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kafkaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/kinesisConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/pulsarConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/redpandaConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/messaging/saslMechanismType.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/alationConnection.py +34 -1
- metadata/generated/schema/entity/services/connections/metadata/amundsenConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/atlasConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/metadataESConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/metadata/openMetadataConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/customMlModelConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/mlflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sageMakerConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/mlmodel/sklearnConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airbyteConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/airflowConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/backendConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/customPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/dagsterConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/databricksPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/domoPipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/fivetranConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/gluePipelineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/nifiConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/sparkConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/pipeline/splineConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/customSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/apiAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearch/basicAuth.py +1 -1
- metadata/generated/schema/entity/services/connections/search/elasticSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/search/openSearchConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/serviceConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/__init__.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/adlsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/customStorageConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/gcsConnection.py +1 -1
- metadata/generated/schema/entity/services/connections/storage/s3Connection.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionDefinition.py +1 -1
- metadata/generated/schema/entity/services/connections/testConnectionResult.py +1 -1
- metadata/generated/schema/entity/services/dashboardService.py +1 -1
- metadata/generated/schema/entity/services/databaseService.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/__init__.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/ingestionPipeline.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/pipelineServiceClientResponse.py +1 -1
- metadata/generated/schema/entity/services/ingestionPipelines/status.py +1 -1
- metadata/generated/schema/entity/services/messagingService.py +1 -1
- metadata/generated/schema/entity/services/metadataService.py +1 -1
- metadata/generated/schema/entity/services/mlmodelService.py +1 -1
- metadata/generated/schema/entity/services/pipelineService.py +1 -1
- metadata/generated/schema/entity/services/searchService.py +1 -1
- metadata/generated/schema/entity/services/serviceType.py +1 -1
- metadata/generated/schema/entity/services/storageService.py +1 -1
- metadata/generated/schema/entity/teams/__init__.py +1 -1
- metadata/generated/schema/entity/teams/persona.py +1 -1
- metadata/generated/schema/entity/teams/role.py +1 -1
- metadata/generated/schema/entity/teams/team.py +1 -1
- metadata/generated/schema/entity/teams/teamHierarchy.py +1 -1
- metadata/generated/schema/entity/teams/user.py +1 -1
- metadata/generated/schema/entity/type.py +1 -1
- metadata/generated/schema/entity/utils/__init__.py +1 -1
- metadata/generated/schema/entity/utils/entitiesCount.py +1 -1
- metadata/generated/schema/entity/utils/servicesCount.py +1 -1
- metadata/generated/schema/entity/utils/supersetApiConnection.py +1 -1
- metadata/generated/schema/events/__init__.py +1 -1
- metadata/generated/schema/events/alertMetrics.py +1 -1
- metadata/generated/schema/events/api/__init__.py +1 -1
- metadata/generated/schema/events/api/createEventSubscription.py +1 -1
- metadata/generated/schema/events/emailAlertConfig.py +1 -1
- metadata/generated/schema/events/eventFilterRule.py +1 -1
- metadata/generated/schema/events/eventSubscription.py +1 -1
- metadata/generated/schema/events/eventSubscriptionOffset.py +1 -1
- metadata/generated/schema/events/failedEvent.py +1 -1
- metadata/generated/schema/events/filterResourceDescriptor.py +1 -1
- metadata/generated/schema/events/subscriptionResourceDescriptor.py +1 -1
- metadata/generated/schema/metadataIngestion/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/application.py +1 -1
- metadata/generated/schema/metadataIngestion/applicationPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dashboardServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dataInsightPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceProfilerPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryLineagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/databaseServiceQueryUsagePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtPipeline.py +9 -11
- metadata/generated/schema/metadataIngestion/dbtconfig/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtAzureConfig.py +7 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtCloudConfig.py +7 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtGCSConfig.py +7 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtHttpConfig.py +7 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtLocalConfig.py +7 -1
- metadata/generated/schema/metadataIngestion/dbtconfig/dbtS3Config.py +7 -1
- metadata/generated/schema/metadataIngestion/messagingServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/metadataToElasticSearchPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/mlmodelServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/pipelineServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/searchServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/__init__.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/containerMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/manifestMetadataConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageBucketDetails.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataADLSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataGCSConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataHttpConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataLocalConfig.py +1 -1
- metadata/generated/schema/metadataIngestion/storage/storageMetadataS3Config.py +1 -1
- metadata/generated/schema/metadataIngestion/storageServiceMetadataPipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/testSuitePipeline.py +1 -1
- metadata/generated/schema/metadataIngestion/workflow.py +1 -1
- metadata/generated/schema/monitoring/__init__.py +1 -1
- metadata/generated/schema/monitoring/eventMonitorProvider.py +1 -1
- metadata/generated/schema/security/__init__.py +1 -1
- metadata/generated/schema/security/client/__init__.py +1 -1
- metadata/generated/schema/security/client/auth0SSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/azureSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/customOidcSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/googleSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/oidcClientConfig.py +1 -1
- metadata/generated/schema/security/client/oktaSSOClientConfig.py +1 -1
- metadata/generated/schema/security/client/openMetadataJWTClientConfig.py +1 -1
- metadata/generated/schema/security/client/samlSSOClientConfig.py +1 -1
- metadata/generated/schema/security/credentials/__init__.py +1 -1
- metadata/generated/schema/security/credentials/accessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/apiAccessTokenAuth.py +1 -1
- metadata/generated/schema/security/credentials/awsCredentials.py +1 -1
- metadata/generated/schema/security/credentials/azureCredentials.py +1 -1
- metadata/generated/schema/security/credentials/basicAuth.py +1 -1
- metadata/generated/schema/security/credentials/bitbucketCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpCredentials.py +1 -1
- metadata/generated/schema/security/credentials/gcpExternalAccount.py +1 -1
- metadata/generated/schema/security/credentials/gcpValues.py +1 -1
- metadata/generated/schema/security/credentials/gitCredentials.py +1 -1
- metadata/generated/schema/security/credentials/githubCredentials.py +1 -1
- metadata/generated/schema/security/secrets/__init__.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerClientLoader.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerConfiguration.py +1 -1
- metadata/generated/schema/security/secrets/secretsManagerProvider.py +1 -1
- metadata/generated/schema/security/securityConfiguration.py +1 -1
- metadata/generated/schema/security/ssl/__init__.py +1 -1
- metadata/generated/schema/security/ssl/validateSSLClientConfig.py +1 -1
- metadata/generated/schema/security/ssl/verifySSLConfig.py +1 -1
- metadata/generated/schema/settings/__init__.py +1 -1
- metadata/generated/schema/settings/settings.py +1 -1
- metadata/generated/schema/system/__init__.py +1 -1
- metadata/generated/schema/system/entityError.py +1 -1
- metadata/generated/schema/system/eventPublisherJob.py +1 -1
- metadata/generated/schema/system/indexingError.py +1 -1
- metadata/generated/schema/system/ui/__init__.py +1 -1
- metadata/generated/schema/system/ui/knowledgePanel.py +1 -1
- metadata/generated/schema/system/ui/page.py +1 -1
- metadata/generated/schema/system/validationResponse.py +1 -1
- metadata/generated/schema/tests/__init__.py +1 -1
- metadata/generated/schema/tests/assigned.py +1 -1
- metadata/generated/schema/tests/basic.py +1 -1
- metadata/generated/schema/tests/customMetric.py +1 -1
- metadata/generated/schema/tests/resolved.py +1 -1
- metadata/generated/schema/tests/testCase.py +1 -1
- metadata/generated/schema/tests/testCaseResolutionStatus.py +1 -1
- metadata/generated/schema/tests/testDefinition.py +1 -1
- metadata/generated/schema/tests/testSuite.py +1 -1
- metadata/generated/schema/type/__init__.py +1 -1
- metadata/generated/schema/type/auditLog.py +1 -1
- metadata/generated/schema/type/basic.py +1 -1
- metadata/generated/schema/type/bulkOperationResult.py +1 -1
- metadata/generated/schema/type/changeEvent.py +1 -1
- metadata/generated/schema/type/changeEventType.py +1 -1
- metadata/generated/schema/type/collectionDescriptor.py +1 -1
- metadata/generated/schema/type/csvDocumentation.py +1 -1
- metadata/generated/schema/type/csvErrorType.py +1 -1
- metadata/generated/schema/type/csvFile.py +1 -1
- metadata/generated/schema/type/csvImportResult.py +1 -1
- metadata/generated/schema/type/customProperties/__init__.py +1 -1
- metadata/generated/schema/type/customProperties/enumConfig.py +1 -1
- metadata/generated/schema/type/customProperty.py +1 -1
- metadata/generated/schema/type/dailyCount.py +1 -1
- metadata/generated/schema/type/databaseConnectionConfig.py +1 -1
- metadata/generated/schema/type/entityHistory.py +1 -1
- metadata/generated/schema/type/entityLineage.py +1 -1
- metadata/generated/schema/type/entityReference.py +1 -1
- metadata/generated/schema/type/entityReferenceList.py +1 -1
- metadata/generated/schema/type/entityRelationship.py +1 -1
- metadata/generated/schema/type/entityUsage.py +1 -1
- metadata/generated/schema/type/filterPattern.py +1 -1
- metadata/generated/schema/type/function.py +1 -1
- metadata/generated/schema/type/include.py +1 -1
- metadata/generated/schema/type/jdbcConnection.py +1 -1
- metadata/generated/schema/type/lifeCycle.py +1 -1
- metadata/generated/schema/type/paging.py +1 -1
- metadata/generated/schema/type/profile.py +1 -1
- metadata/generated/schema/type/queryParserData.py +1 -1
- metadata/generated/schema/type/reaction.py +1 -1
- metadata/generated/schema/type/schedule.py +1 -1
- metadata/generated/schema/type/schema.py +1 -1
- metadata/generated/schema/type/tableQuery.py +1 -1
- metadata/generated/schema/type/tableUsageCount.py +1 -1
- metadata/generated/schema/type/tagLabel.py +1 -1
- metadata/generated/schema/type/usageDetails.py +1 -1
- metadata/generated/schema/type/usageRequest.py +1 -1
- metadata/generated/schema/type/votes.py +1 -1
- metadata/ingestion/api/parser.py +87 -23
- metadata/ingestion/api/topology_runner.py +4 -3
- metadata/ingestion/lineage/parser.py +35 -23
- metadata/ingestion/ometa/routes.py +15 -0
- metadata/ingestion/source/database/datalake/metadata.py +3 -2
- metadata/ingestion/source/database/dbt/dbt_config.py +1 -1
- metadata/ingestion/source/database/oracle/connection.py +5 -0
- metadata/ingestion/source/database/oracle/queries.py +1 -1
- metadata/ingestion/source/database/stored_procedures_mixin.py +1 -1
- metadata/ingestion/source/database/unitycatalog/connection.py +12 -8
- metadata/ingestion/source/pipeline/dagster/metadata.py +3 -1
- metadata/ingestion/source/storage/storage_service.py +5 -2
- metadata/parsers/json_schema_parser.py +17 -7
- metadata/profiler/interface/profiler_interface.py +4 -4
- metadata/profiler/processor/sample_data_handler.py +45 -8
- metadata/readers/dataframe/json.py +11 -6
- metadata/readers/dataframe/models.py +1 -0
- metadata/utils/datalake/datalake_utils.py +34 -4
- metadata/utils/source_hash.py +23 -13
- {openmetadata_ingestion-1.3.2.0rc1.dist-info → openmetadata_ingestion-1.3.2.0rc3.dist-info}/METADATA +295 -295
- {openmetadata_ingestion-1.3.2.0rc1.dist-info → openmetadata_ingestion-1.3.2.0rc3.dist-info}/RECORD +540 -540
- {openmetadata_ingestion-1.3.2.0rc1.dist-info → openmetadata_ingestion-1.3.2.0rc3.dist-info}/LICENSE +0 -0
- {openmetadata_ingestion-1.3.2.0rc1.dist-info → openmetadata_ingestion-1.3.2.0rc3.dist-info}/WHEEL +0 -0
- {openmetadata_ingestion-1.3.2.0rc1.dist-info → openmetadata_ingestion-1.3.2.0rc3.dist-info}/entry_points.txt +0 -0
- {openmetadata_ingestion-1.3.2.0rc1.dist-info → openmetadata_ingestion-1.3.2.0rc3.dist-info}/top_level.txt +0 -0
metadata/ingestion/api/parser.py
CHANGED
|
@@ -69,6 +69,28 @@ from metadata.generated.schema.metadataIngestion.databaseServiceQueryUsagePipeli
|
|
|
69
69
|
DatabaseServiceQueryUsagePipeline,
|
|
70
70
|
DatabaseUsageConfigType,
|
|
71
71
|
)
|
|
72
|
+
from metadata.generated.schema.metadataIngestion.dbtconfig.dbtAzureConfig import (
|
|
73
|
+
DbtAzureConfig,
|
|
74
|
+
)
|
|
75
|
+
from metadata.generated.schema.metadataIngestion.dbtconfig.dbtCloudConfig import (
|
|
76
|
+
DbtCloudConfig,
|
|
77
|
+
)
|
|
78
|
+
from metadata.generated.schema.metadataIngestion.dbtconfig.dbtGCSConfig import (
|
|
79
|
+
DbtGcsConfig,
|
|
80
|
+
)
|
|
81
|
+
from metadata.generated.schema.metadataIngestion.dbtconfig.dbtHttpConfig import (
|
|
82
|
+
DbtHttpConfig,
|
|
83
|
+
)
|
|
84
|
+
from metadata.generated.schema.metadataIngestion.dbtconfig.dbtLocalConfig import (
|
|
85
|
+
DbtLocalConfig,
|
|
86
|
+
)
|
|
87
|
+
from metadata.generated.schema.metadataIngestion.dbtconfig.dbtS3Config import (
|
|
88
|
+
DbtS3Config,
|
|
89
|
+
)
|
|
90
|
+
from metadata.generated.schema.metadataIngestion.dbtPipeline import (
|
|
91
|
+
DbtConfigType,
|
|
92
|
+
DbtPipeline,
|
|
93
|
+
)
|
|
72
94
|
from metadata.generated.schema.metadataIngestion.messagingServiceMetadataPipeline import (
|
|
73
95
|
MessagingMetadataConfigType,
|
|
74
96
|
MessagingServiceMetadataPipeline,
|
|
@@ -125,6 +147,16 @@ SOURCE_CONFIG_CLASS_MAP = {
|
|
|
125
147
|
DatabaseMetadataConfigType.DatabaseMetadata.value: DatabaseServiceMetadataPipeline,
|
|
126
148
|
StorageMetadataConfigType.StorageMetadata.value: StorageServiceMetadataPipeline,
|
|
127
149
|
SearchMetadataConfigType.SearchMetadata.value: SearchServiceMetadataPipeline,
|
|
150
|
+
DbtConfigType.DBT.value: DbtPipeline,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
DBT_CONFIG_TYPE_MAP = {
|
|
154
|
+
"cloud": DbtCloudConfig,
|
|
155
|
+
"local": DbtLocalConfig,
|
|
156
|
+
"http": DbtHttpConfig,
|
|
157
|
+
"s3": DbtS3Config,
|
|
158
|
+
"gcs": DbtGcsConfig,
|
|
159
|
+
"azure": DbtAzureConfig,
|
|
128
160
|
}
|
|
129
161
|
|
|
130
162
|
|
|
@@ -171,6 +203,7 @@ def get_source_config_class(
|
|
|
171
203
|
Type[PipelineServiceMetadataPipeline],
|
|
172
204
|
Type[MlModelServiceMetadataPipeline],
|
|
173
205
|
Type[DatabaseServiceMetadataPipeline],
|
|
206
|
+
Type[DbtPipeline],
|
|
174
207
|
]:
|
|
175
208
|
"""
|
|
176
209
|
Return the source config type for a source string
|
|
@@ -179,7 +212,7 @@ def get_source_config_class(
|
|
|
179
212
|
"""
|
|
180
213
|
source_config_class = SOURCE_CONFIG_CLASS_MAP.get(source_config_type)
|
|
181
214
|
|
|
182
|
-
if
|
|
215
|
+
if source_config_class:
|
|
183
216
|
return source_config_class
|
|
184
217
|
|
|
185
218
|
raise ValueError(f"Cannot find the service type of {source_config_type}")
|
|
@@ -266,6 +299,27 @@ def _unsafe_parse_config(config: dict, cls: Type[T], message: str) -> None:
|
|
|
266
299
|
raise err
|
|
267
300
|
|
|
268
301
|
|
|
302
|
+
def _unsafe_parse_dbt_config(config: dict, cls: Type[T], message: str) -> None:
|
|
303
|
+
"""
|
|
304
|
+
Given a config dictionary and the class it should match,
|
|
305
|
+
try to parse it or log the given message
|
|
306
|
+
"""
|
|
307
|
+
logger.debug(f"Parsing message: [{message}]")
|
|
308
|
+
try:
|
|
309
|
+
# Parse the oneOf config types of dbt to check
|
|
310
|
+
dbt_config_type = config["dbtConfigSource"]["dbtConfigType"]
|
|
311
|
+
dbt_config_class = DBT_CONFIG_TYPE_MAP.get(dbt_config_type)
|
|
312
|
+
dbt_config_class.parse_obj(config["dbtConfigSource"])
|
|
313
|
+
|
|
314
|
+
# Parse the entire dbtPipeline object
|
|
315
|
+
cls.parse_obj(config)
|
|
316
|
+
except ValidationError as err:
|
|
317
|
+
logger.debug(
|
|
318
|
+
f"The supported properties for {cls.__name__} are {list(cls.__fields__.keys())}"
|
|
319
|
+
)
|
|
320
|
+
raise err
|
|
321
|
+
|
|
322
|
+
|
|
269
323
|
def _parse_inner_connection(config_dict: dict, source_type: str) -> None:
|
|
270
324
|
"""
|
|
271
325
|
Parse the inner connection of the flagged connectors
|
|
@@ -291,32 +345,35 @@ def parse_service_connection(config_dict: dict) -> None:
|
|
|
291
345
|
:param config_dict: JSON configuration
|
|
292
346
|
"""
|
|
293
347
|
# Unsafe access to the keys. Allow a KeyError if the config is not well formatted
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
348
|
+
if config_dict["source"].get("serviceConnection"):
|
|
349
|
+
source_type = config_dict["source"]["serviceConnection"]["config"].get("type")
|
|
350
|
+
if source_type is None:
|
|
351
|
+
raise InvalidWorkflowException(
|
|
352
|
+
"Missing type in the serviceConnection config"
|
|
353
|
+
)
|
|
297
354
|
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
355
|
+
logger.debug(
|
|
356
|
+
f"Error parsing the Workflow Configuration for {source_type} ingestion"
|
|
357
|
+
)
|
|
301
358
|
|
|
302
|
-
|
|
303
|
-
|
|
359
|
+
service_type = get_service_type(source_type)
|
|
360
|
+
connection_class = get_connection_class(source_type, service_type)
|
|
304
361
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
362
|
+
if source_type in HAS_INNER_CONNECTION:
|
|
363
|
+
# We will first parse the inner `connection` configuration
|
|
364
|
+
_parse_inner_connection(
|
|
365
|
+
config_dict["source"]["serviceConnection"]["config"]["connection"][
|
|
366
|
+
"config"
|
|
367
|
+
]["connection"],
|
|
368
|
+
source_type,
|
|
369
|
+
)
|
|
313
370
|
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
371
|
+
# Parse the service connection dictionary with the scoped class
|
|
372
|
+
_unsafe_parse_config(
|
|
373
|
+
config=config_dict["source"]["serviceConnection"]["config"],
|
|
374
|
+
cls=connection_class,
|
|
375
|
+
message="Error parsing the service connection",
|
|
376
|
+
)
|
|
320
377
|
|
|
321
378
|
|
|
322
379
|
def parse_source_config(config_dict: dict) -> None:
|
|
@@ -334,6 +391,13 @@ def parse_source_config(config_dict: dict) -> None:
|
|
|
334
391
|
|
|
335
392
|
source_config_class = get_source_config_class(source_config_type)
|
|
336
393
|
|
|
394
|
+
if source_config_class == DbtPipeline:
|
|
395
|
+
_unsafe_parse_dbt_config(
|
|
396
|
+
config=config_dict["source"]["sourceConfig"]["config"],
|
|
397
|
+
cls=source_config_class,
|
|
398
|
+
message="Error parsing the dbt source config",
|
|
399
|
+
)
|
|
400
|
+
|
|
337
401
|
_unsafe_parse_config(
|
|
338
402
|
config=config_dict["source"]["sourceConfig"]["config"],
|
|
339
403
|
cls=source_config_class,
|
|
@@ -265,11 +265,12 @@ class TopologyRunnerMixin(Generic[C]):
|
|
|
265
265
|
if entity:
|
|
266
266
|
same_fingerprint = True
|
|
267
267
|
|
|
268
|
-
create_entity_request_hash =
|
|
269
|
-
create_request=entity_request.right,
|
|
270
|
-
)
|
|
268
|
+
create_entity_request_hash = None
|
|
271
269
|
|
|
272
270
|
if hasattr(entity_request.right, "sourceHash"):
|
|
271
|
+
create_entity_request_hash = generate_source_hash(
|
|
272
|
+
create_request=entity_request.right,
|
|
273
|
+
)
|
|
273
274
|
entity_request.right.sourceHash = create_entity_request_hash
|
|
274
275
|
|
|
275
276
|
if entity is None and stage.use_cache:
|
|
@@ -217,6 +217,11 @@ class LineageParser:
|
|
|
217
217
|
"""
|
|
218
218
|
aliases = self.table_aliases
|
|
219
219
|
values = identifier.value.split(".")
|
|
220
|
+
|
|
221
|
+
if len(values) > 4:
|
|
222
|
+
logger.debug(f"Invalid comparison element from identifier: {identifier}")
|
|
223
|
+
return None, None
|
|
224
|
+
|
|
220
225
|
database_name, schema_name, table_or_alias, column_name = (
|
|
221
226
|
[None] * (4 - len(values))
|
|
222
227
|
) + values
|
|
@@ -307,32 +312,39 @@ class LineageParser:
|
|
|
307
312
|
comparisons.append(sub)
|
|
308
313
|
|
|
309
314
|
for comparison in comparisons:
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
f"Can't extract table names when parsing JOIN information from {comparison}"
|
|
315
|
+
try:
|
|
316
|
+
if (
|
|
317
|
+
"." not in comparison.left.value
|
|
318
|
+
or "." not in comparison.right.value
|
|
319
|
+
):
|
|
320
|
+
logger.debug(f"Ignoring comparison {comparison}")
|
|
321
|
+
continue
|
|
322
|
+
|
|
323
|
+
table_left, column_left = self.get_comparison_elements(
|
|
324
|
+
identifier=comparison.left
|
|
325
|
+
)
|
|
326
|
+
table_right, column_right = self.get_comparison_elements(
|
|
327
|
+
identifier=comparison.right
|
|
324
328
|
)
|
|
325
|
-
logger.debug(f"Query: {sql_statement}")
|
|
326
|
-
continue
|
|
327
329
|
|
|
328
|
-
|
|
329
|
-
|
|
330
|
+
if not table_left or not table_right:
|
|
331
|
+
logger.warning(
|
|
332
|
+
f"Can't extract table names when parsing JOIN information from {comparison}"
|
|
333
|
+
)
|
|
334
|
+
logger.debug(f"Query: {sql_statement}")
|
|
335
|
+
continue
|
|
336
|
+
|
|
337
|
+
left_table_column = TableColumn(table=table_left, column=column_left)
|
|
338
|
+
right_table_column = TableColumn(table=table_right, column=column_right)
|
|
330
339
|
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
340
|
+
# We just send the info once, from Left -> Right.
|
|
341
|
+
# The backend will prepare the symmetric information.
|
|
342
|
+
self.stateful_add_table_joins(
|
|
343
|
+
join_data, left_table_column, right_table_column
|
|
344
|
+
)
|
|
345
|
+
except Exception as exc:
|
|
346
|
+
logger.debug(f"Cannot process comparison {comparison}: {exc}")
|
|
347
|
+
logger.debug(traceback.format_exc())
|
|
336
348
|
|
|
337
349
|
@cached_property
|
|
338
350
|
def table_joins(self) -> Dict[str, List[TableColumnJoin]]:
|
|
@@ -90,6 +90,16 @@ from metadata.generated.schema.api.tests.createTestDefinition import (
|
|
|
90
90
|
from metadata.generated.schema.api.tests.createTestSuite import CreateTestSuiteRequest
|
|
91
91
|
from metadata.generated.schema.dataInsight.dataInsightChart import DataInsightChart
|
|
92
92
|
from metadata.generated.schema.dataInsight.kpi.kpi import Kpi
|
|
93
|
+
from metadata.generated.schema.entity.applications.app import App
|
|
94
|
+
from metadata.generated.schema.entity.applications.createAppRequest import (
|
|
95
|
+
CreateAppRequest,
|
|
96
|
+
)
|
|
97
|
+
from metadata.generated.schema.entity.applications.marketplace.appMarketPlaceDefinition import (
|
|
98
|
+
AppMarketPlaceDefinition,
|
|
99
|
+
)
|
|
100
|
+
from metadata.generated.schema.entity.applications.marketplace.createAppMarketPlaceDefinitionReq import (
|
|
101
|
+
CreateAppMarketPlaceDefinitionRequest,
|
|
102
|
+
)
|
|
93
103
|
from metadata.generated.schema.entity.automations.workflow import Workflow
|
|
94
104
|
from metadata.generated.schema.entity.bot import Bot
|
|
95
105
|
from metadata.generated.schema.entity.classification.classification import (
|
|
@@ -232,4 +242,9 @@ ROUTES = {
|
|
|
232
242
|
# Suggestions
|
|
233
243
|
Suggestion.__name__: "/suggestions",
|
|
234
244
|
CreateSuggestionRequest.__name__: "/suggestions",
|
|
245
|
+
# Apps
|
|
246
|
+
App.__name__: "/apps",
|
|
247
|
+
CreateAppRequest.__name__: "/apps",
|
|
248
|
+
AppMarketPlaceDefinition.__name__: "/apps/marketplace",
|
|
249
|
+
CreateAppMarketPlaceDefinitionRequest.__name__: "/apps/marketplace",
|
|
235
250
|
}
|
|
@@ -407,7 +407,7 @@ class DatalakeSource(DatabaseServiceSource):
|
|
|
407
407
|
schema_name = self.context.database_schema
|
|
408
408
|
try:
|
|
409
409
|
table_constraints = None
|
|
410
|
-
data_frame = fetch_dataframe(
|
|
410
|
+
data_frame, raw_data = fetch_dataframe(
|
|
411
411
|
config_source=self.config_source,
|
|
412
412
|
client=self.client,
|
|
413
413
|
file_fqn=DatalakeTableSchemaWrapper(
|
|
@@ -415,10 +415,11 @@ class DatalakeSource(DatabaseServiceSource):
|
|
|
415
415
|
bucket_name=schema_name,
|
|
416
416
|
file_extension=table_extension,
|
|
417
417
|
),
|
|
418
|
+
fetch_raw_data=True,
|
|
418
419
|
)
|
|
419
420
|
if data_frame:
|
|
420
421
|
column_parser = DataFrameColumnParser.create(
|
|
421
|
-
data_frame[0], table_extension
|
|
422
|
+
data_frame[0], table_extension, raw_data=raw_data
|
|
422
423
|
)
|
|
423
424
|
columns = column_parser.get_columns()
|
|
424
425
|
else:
|
|
@@ -173,7 +173,7 @@ def _(config: DbtCloudConfig): # pylint: disable=too-many-locals
|
|
|
173
173
|
params_data["job_definition_id"] = job_id
|
|
174
174
|
|
|
175
175
|
response = client.get(f"/accounts/{account_id}/runs", data=params_data)
|
|
176
|
-
if not response
|
|
176
|
+
if not response or not response.get("data"):
|
|
177
177
|
raise DBTConfigException(
|
|
178
178
|
"Unable to get the dbt job runs information.\n"
|
|
179
179
|
"Please check if the auth token is correct and has the necessary scopes to fetch dbt runs"
|
|
@@ -38,6 +38,7 @@ from metadata.ingestion.connections.builders import (
|
|
|
38
38
|
)
|
|
39
39
|
from metadata.ingestion.connections.test_connections import test_connection_db_common
|
|
40
40
|
from metadata.ingestion.ometa.ometa_api import OpenMetadata
|
|
41
|
+
from metadata.ingestion.source.database.oracle.queries import CHECK_ACCESS_TO_DBA
|
|
41
42
|
from metadata.utils.logger import ingestion_logger
|
|
42
43
|
|
|
43
44
|
CX_ORACLE_LIB_VERSION = "8.3.0"
|
|
@@ -136,9 +137,13 @@ def test_connection(
|
|
|
136
137
|
Test connection. This can be executed either as part
|
|
137
138
|
of a metadata workflow or during an Automation Workflow
|
|
138
139
|
"""
|
|
140
|
+
|
|
141
|
+
test_conn_queries = {"CheckAccess": CHECK_ACCESS_TO_DBA}
|
|
142
|
+
|
|
139
143
|
test_connection_db_common(
|
|
140
144
|
metadata=metadata,
|
|
141
145
|
engine=engine,
|
|
142
146
|
service_connection=service_connection,
|
|
143
147
|
automation_workflow=automation_workflow,
|
|
148
|
+
queries=test_conn_queries,
|
|
144
149
|
)
|
|
@@ -138,7 +138,7 @@ class StoredProcedureMixin(ABC):
|
|
|
138
138
|
return True
|
|
139
139
|
|
|
140
140
|
if query_type == "INSERT" and re.search(
|
|
141
|
-
"^.*insert.*into.*select.*$", query_text, re.IGNORECASE
|
|
141
|
+
"^.*insert.*into.*select.*$", query_text.replace("\n", " "), re.IGNORECASE
|
|
142
142
|
):
|
|
143
143
|
return True
|
|
144
144
|
|
|
@@ -68,16 +68,20 @@ def test_connection(
|
|
|
68
68
|
break
|
|
69
69
|
|
|
70
70
|
def get_schemas(connection: WorkspaceClient, table_obj: DatabricksTable):
|
|
71
|
-
for
|
|
72
|
-
|
|
73
|
-
|
|
71
|
+
for catalog in connection.catalogs.list():
|
|
72
|
+
for schema in connection.schemas.list(catalog_name=catalog.name):
|
|
73
|
+
if schema.name:
|
|
74
|
+
table_obj.schema_name = schema.name
|
|
75
|
+
table_obj.catalog_name = catalog.name
|
|
76
|
+
return
|
|
74
77
|
|
|
75
78
|
def get_tables(connection: WorkspaceClient, table_obj: DatabricksTable):
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
79
|
+
if table_obj.catalog_name and table_obj.schema_name:
|
|
80
|
+
for table in connection.tables.list(
|
|
81
|
+
catalog_name=table_obj.catalog_name, schema_name=table_obj.schema_name
|
|
82
|
+
):
|
|
83
|
+
table_obj.name = table.name
|
|
84
|
+
break
|
|
81
85
|
|
|
82
86
|
test_fn = {
|
|
83
87
|
"CheckAccess": connection.catalogs.list,
|
|
@@ -214,7 +214,9 @@ class DagsterSource(PipelineServiceSource):
|
|
|
214
214
|
service_name=self.context.pipeline_service,
|
|
215
215
|
pipeline_name=self.context.pipeline,
|
|
216
216
|
)
|
|
217
|
-
pipeline_entity = self.metadata.get_by_name(
|
|
217
|
+
pipeline_entity = self.metadata.get_by_name(
|
|
218
|
+
entity=Pipeline, fqn=pipeline_fqn, fields=["tasks"]
|
|
219
|
+
)
|
|
218
220
|
for task in pipeline_entity.tasks or []:
|
|
219
221
|
try:
|
|
220
222
|
runs = self.client.get_task_runs(
|
|
@@ -260,7 +260,7 @@ class StorageServiceSource(TopologyRunnerMixin, Source, ABC):
|
|
|
260
260
|
metadata_entry: MetadataEntry,
|
|
261
261
|
) -> List[Column]:
|
|
262
262
|
"""Extract Column related metadata from s3"""
|
|
263
|
-
data_structure_details = fetch_dataframe(
|
|
263
|
+
data_structure_details, raw_data = fetch_dataframe(
|
|
264
264
|
config_source=config_source,
|
|
265
265
|
client=client,
|
|
266
266
|
file_fqn=DatalakeTableSchemaWrapper(
|
|
@@ -269,10 +269,13 @@ class StorageServiceSource(TopologyRunnerMixin, Source, ABC):
|
|
|
269
269
|
file_extension=SupportedTypes(metadata_entry.structureFormat),
|
|
270
270
|
separator=metadata_entry.separator,
|
|
271
271
|
),
|
|
272
|
+
fetch_raw_data=True,
|
|
272
273
|
)
|
|
273
274
|
columns = []
|
|
274
275
|
column_parser = DataFrameColumnParser.create(
|
|
275
|
-
data_structure_details,
|
|
276
|
+
data_structure_details,
|
|
277
|
+
SupportedTypes(metadata_entry.structureFormat),
|
|
278
|
+
raw_data=raw_data,
|
|
276
279
|
)
|
|
277
280
|
columns = column_parser.get_columns()
|
|
278
281
|
return columns
|
|
@@ -18,6 +18,8 @@ import traceback
|
|
|
18
18
|
from enum import Enum
|
|
19
19
|
from typing import List, Optional
|
|
20
20
|
|
|
21
|
+
from pydantic.main import ModelMetaclass
|
|
22
|
+
|
|
21
23
|
from metadata.generated.schema.type.schema import FieldModel
|
|
22
24
|
from metadata.utils.logger import ingestion_logger
|
|
23
25
|
|
|
@@ -36,20 +38,25 @@ class JsonSchemaDataTypes(Enum):
|
|
|
36
38
|
NULL = "null"
|
|
37
39
|
RECORD = "object"
|
|
38
40
|
ARRAY = "array"
|
|
41
|
+
UNKNOWN = "unknown"
|
|
39
42
|
|
|
40
43
|
|
|
41
|
-
def parse_json_schema(
|
|
44
|
+
def parse_json_schema(
|
|
45
|
+
schema_text: str, cls: ModelMetaclass = FieldModel
|
|
46
|
+
) -> Optional[List[FieldModel]]:
|
|
42
47
|
"""
|
|
43
48
|
Method to parse the jsonschema
|
|
44
49
|
"""
|
|
45
50
|
try:
|
|
46
51
|
json_schema_data = json.loads(schema_text)
|
|
47
52
|
field_models = [
|
|
48
|
-
|
|
53
|
+
cls(
|
|
49
54
|
name=json_schema_data.get("title", "default"),
|
|
50
55
|
dataType=JsonSchemaDataTypes(json_schema_data.get("type")).name,
|
|
51
56
|
description=json_schema_data.get("description"),
|
|
52
|
-
children=get_json_schema_fields(
|
|
57
|
+
children=get_json_schema_fields(
|
|
58
|
+
json_schema_data.get("properties", {}), cls=cls
|
|
59
|
+
),
|
|
53
60
|
)
|
|
54
61
|
]
|
|
55
62
|
return field_models
|
|
@@ -59,7 +66,9 @@ def parse_json_schema(schema_text: str) -> Optional[List[FieldModel]]:
|
|
|
59
66
|
return None
|
|
60
67
|
|
|
61
68
|
|
|
62
|
-
def get_json_schema_fields(
|
|
69
|
+
def get_json_schema_fields(
|
|
70
|
+
properties, cls: ModelMetaclass = FieldModel
|
|
71
|
+
) -> Optional[List[FieldModel]]:
|
|
63
72
|
"""
|
|
64
73
|
Recursively convert the parsed schema into required models
|
|
65
74
|
"""
|
|
@@ -67,9 +76,10 @@ def get_json_schema_fields(properties) -> Optional[List[FieldModel]]:
|
|
|
67
76
|
for key, value in properties.items():
|
|
68
77
|
try:
|
|
69
78
|
field_models.append(
|
|
70
|
-
|
|
71
|
-
name=
|
|
72
|
-
|
|
79
|
+
cls(
|
|
80
|
+
name=key,
|
|
81
|
+
displayName=value.get("title"),
|
|
82
|
+
dataType=JsonSchemaDataTypes(value.get("type", "unknown")).name,
|
|
73
83
|
description=value.get("description"),
|
|
74
84
|
children=get_json_schema_fields(value.get("properties"))
|
|
75
85
|
if value.get("type") == "object"
|
|
@@ -33,7 +33,7 @@ from metadata.generated.schema.entity.data.table import (
|
|
|
33
33
|
TableData,
|
|
34
34
|
)
|
|
35
35
|
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
|
|
36
|
-
|
|
36
|
+
DataStorageConfig,
|
|
37
37
|
)
|
|
38
38
|
from metadata.generated.schema.entity.services.connections.database.datalakeConnection import (
|
|
39
39
|
DatalakeConnection,
|
|
@@ -93,7 +93,7 @@ class ProfilerInterface(ABC):
|
|
|
93
93
|
service_connection_config: Union[DatabaseConnection, DatalakeConnection],
|
|
94
94
|
ometa_client: OpenMetadata,
|
|
95
95
|
entity: Table,
|
|
96
|
-
storage_config:
|
|
96
|
+
storage_config: DataStorageConfig,
|
|
97
97
|
profile_sample_config: Optional[ProfileSampleConfig],
|
|
98
98
|
source_config: DatabaseServiceProfilerPipeline,
|
|
99
99
|
sample_query: Optional[str],
|
|
@@ -248,7 +248,7 @@ class ProfilerInterface(ABC):
|
|
|
248
248
|
DatabaseProfilerConfig,
|
|
249
249
|
DatabaseAndSchemaConfig,
|
|
250
250
|
]
|
|
251
|
-
):
|
|
251
|
+
) -> Optional[DataStorageConfig]:
|
|
252
252
|
if (
|
|
253
253
|
config
|
|
254
254
|
and config.sampleDataStorageConfig
|
|
@@ -264,7 +264,7 @@ class ProfilerInterface(ABC):
|
|
|
264
264
|
database_profiler_config: Optional[DatabaseProfilerConfig],
|
|
265
265
|
db_service: Optional[DatabaseService],
|
|
266
266
|
profiler_config: ProfilerProcessorConfig,
|
|
267
|
-
) -> Optional[
|
|
267
|
+
) -> Optional[DataStorageConfig]:
|
|
268
268
|
"""Get config for a specific entity
|
|
269
269
|
|
|
270
270
|
Args:
|
|
@@ -17,8 +17,13 @@ from datetime import datetime
|
|
|
17
17
|
from functools import singledispatch
|
|
18
18
|
from io import BytesIO
|
|
19
19
|
|
|
20
|
+
from pydantic.json import ENCODERS_BY_TYPE
|
|
21
|
+
|
|
20
22
|
from metadata.clients.aws_client import AWSClient
|
|
21
23
|
from metadata.generated.schema.entity.data.table import Table, TableData
|
|
24
|
+
from metadata.generated.schema.entity.services.connections.connectionBasicType import (
|
|
25
|
+
DataStorageConfig,
|
|
26
|
+
)
|
|
22
27
|
from metadata.generated.schema.security.credentials.awsCredentials import AWSCredentials
|
|
23
28
|
from metadata.profiler.interface.profiler_interface import ProfilerInterface
|
|
24
29
|
from metadata.utils.helpers import clean_uri
|
|
@@ -27,15 +32,45 @@ from metadata.utils.logger import profiler_logger
|
|
|
27
32
|
logger = profiler_logger()
|
|
28
33
|
|
|
29
34
|
|
|
30
|
-
|
|
35
|
+
class PathPatternException(Exception):
|
|
36
|
+
"""
|
|
37
|
+
Exception class need to validate the file path pattern
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def validate_path_pattern(file_path_format: str) -> None:
|
|
42
|
+
if not (
|
|
43
|
+
"{service_name}" in file_path_format
|
|
44
|
+
and "{database_name}" in file_path_format
|
|
45
|
+
and "{database_schema_name}" in file_path_format
|
|
46
|
+
and "{table_name}" in file_path_format
|
|
47
|
+
and file_path_format.endswith(".parquet")
|
|
48
|
+
):
|
|
49
|
+
raise PathPatternException(
|
|
50
|
+
"Please provide a valid path pattern, "
|
|
51
|
+
"the pattern should include these components {service_name}, "
|
|
52
|
+
"{database_name}, {database_schema_name}, {table_name} and "
|
|
53
|
+
"it should end with extension .parquet"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _get_object_key(
|
|
58
|
+
table: Table, prefix: str, overwrite_data: bool, file_path_format: str
|
|
59
|
+
) -> str:
|
|
60
|
+
validate_path_pattern(file_path_format)
|
|
61
|
+
file_name = file_path_format.format(
|
|
62
|
+
service_name=table.service.name,
|
|
63
|
+
database_name=table.database.name,
|
|
64
|
+
database_schema_name=table.databaseSchema.name,
|
|
65
|
+
table_name=table.name.__root__,
|
|
66
|
+
)
|
|
31
67
|
if not overwrite_data:
|
|
32
|
-
file_name =
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
path = str(table.fullyQualifiedName.__root__).replace(".", "/")
|
|
68
|
+
file_name = file_name.replace(
|
|
69
|
+
".parquet", f"_{datetime.now().strftime('%Y_%m_%d')}.parquet"
|
|
70
|
+
)
|
|
36
71
|
if prefix:
|
|
37
|
-
return f"{clean_uri(prefix)}/{
|
|
38
|
-
return
|
|
72
|
+
return f"{clean_uri(prefix)}/{file_name}"
|
|
73
|
+
return file_name
|
|
39
74
|
|
|
40
75
|
|
|
41
76
|
def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -> None:
|
|
@@ -45,9 +80,10 @@ def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -
|
|
|
45
80
|
import pandas as pd # pylint: disable=import-outside-toplevel
|
|
46
81
|
|
|
47
82
|
try:
|
|
48
|
-
sample_storage_config = profiler_interface.storage_config
|
|
83
|
+
sample_storage_config: DataStorageConfig = profiler_interface.storage_config
|
|
49
84
|
if not sample_storage_config:
|
|
50
85
|
return
|
|
86
|
+
ENCODERS_BY_TYPE[bytes] = lambda v: v.decode("utf-8", "ignore")
|
|
51
87
|
deserialized_data = json.loads(data.json())
|
|
52
88
|
df = pd.DataFrame(
|
|
53
89
|
data=deserialized_data.get("rows", []),
|
|
@@ -59,6 +95,7 @@ def upload_sample_data(data: TableData, profiler_interface: ProfilerInterface) -
|
|
|
59
95
|
table=profiler_interface.table_entity,
|
|
60
96
|
prefix=sample_storage_config.prefix,
|
|
61
97
|
overwrite_data=sample_storage_config.overwriteData,
|
|
98
|
+
file_path_format=sample_storage_config.filePathPattern,
|
|
62
99
|
)
|
|
63
100
|
upload_to_storage(
|
|
64
101
|
sample_storage_config.storageConfig,
|
|
@@ -16,7 +16,7 @@ import gzip
|
|
|
16
16
|
import io
|
|
17
17
|
import json
|
|
18
18
|
import zipfile
|
|
19
|
-
from typing import List, Union
|
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
20
20
|
|
|
21
21
|
from metadata.readers.dataframe.base import DataFrameReader
|
|
22
22
|
from metadata.readers.dataframe.common import dataframe_to_chunks
|
|
@@ -47,7 +47,7 @@ class JSONDataFrameReader(DataFrameReader):
|
|
|
47
47
|
@staticmethod
|
|
48
48
|
def read_from_json(
|
|
49
49
|
key: str, json_text: bytes, decode: bool = False, **__
|
|
50
|
-
) -> List["DataFrame"]:
|
|
50
|
+
) -> Tuple[List["DataFrame"], Optional[Dict[str, Any]]]:
|
|
51
51
|
"""
|
|
52
52
|
Decompress a JSON file (if needed) and read its contents
|
|
53
53
|
as a dataframe.
|
|
@@ -60,20 +60,25 @@ class JSONDataFrameReader(DataFrameReader):
|
|
|
60
60
|
import pandas as pd
|
|
61
61
|
|
|
62
62
|
json_text = _get_json_text(key=key, text=json_text, decode=decode)
|
|
63
|
+
raw_data = None
|
|
63
64
|
try:
|
|
64
65
|
data = json.loads(json_text)
|
|
66
|
+
if isinstance(data, dict) and data.get("$schema"):
|
|
67
|
+
raw_data = json_text
|
|
65
68
|
except json.decoder.JSONDecodeError:
|
|
66
69
|
logger.debug("Failed to read as JSON object. Trying to read as JSON Lines")
|
|
67
70
|
data = [json.loads(json_obj) for json_obj in json_text.strip().split("\n")]
|
|
68
71
|
|
|
69
72
|
# if we get a scalar value (e.g. {"a":"b"}) then we need to specify the index
|
|
70
73
|
data = data if not isinstance(data, dict) else [data]
|
|
71
|
-
return dataframe_to_chunks(pd.DataFrame.from_records(data))
|
|
74
|
+
return dataframe_to_chunks(pd.DataFrame.from_records(data)), raw_data
|
|
72
75
|
|
|
73
76
|
def _read(self, *, key: str, bucket_name: str, **kwargs) -> DatalakeColumnWrapper:
|
|
74
77
|
text = self.reader.read(key, bucket_name=bucket_name)
|
|
78
|
+
dataframes, raw_data = self.read_from_json(
|
|
79
|
+
key=key, json_text=text, decode=True, **kwargs
|
|
80
|
+
)
|
|
75
81
|
return DatalakeColumnWrapper(
|
|
76
|
-
dataframes=
|
|
77
|
-
|
|
78
|
-
)
|
|
82
|
+
dataframes=dataframes,
|
|
83
|
+
raw_data=raw_data,
|
|
79
84
|
)
|