oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +40 -0
- ads/aqua/app.py +507 -0
- ads/aqua/cli.py +96 -0
- ads/aqua/client/__init__.py +3 -0
- ads/aqua/client/client.py +836 -0
- ads/aqua/client/openai_client.py +305 -0
- ads/aqua/common/__init__.py +5 -0
- ads/aqua/common/decorator.py +125 -0
- ads/aqua/common/entities.py +274 -0
- ads/aqua/common/enums.py +134 -0
- ads/aqua/common/errors.py +109 -0
- ads/aqua/common/utils.py +1295 -0
- ads/aqua/config/__init__.py +4 -0
- ads/aqua/config/container_config.py +246 -0
- ads/aqua/config/evaluation/__init__.py +4 -0
- ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
- ads/aqua/config/utils/__init__.py +4 -0
- ads/aqua/config/utils/serializer.py +339 -0
- ads/aqua/constants.py +116 -0
- ads/aqua/data.py +14 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation/__init__.py +8 -0
- ads/aqua/evaluation/constants.py +53 -0
- ads/aqua/evaluation/entities.py +186 -0
- ads/aqua/evaluation/errors.py +70 -0
- ads/aqua/evaluation/evaluation.py +1814 -0
- ads/aqua/extension/__init__.py +42 -0
- ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
- ads/aqua/extension/base_handler.py +90 -0
- ads/aqua/extension/common_handler.py +121 -0
- ads/aqua/extension/common_ws_msg_handler.py +36 -0
- ads/aqua/extension/deployment_handler.py +381 -0
- ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
- ads/aqua/extension/errors.py +30 -0
- ads/aqua/extension/evaluation_handler.py +129 -0
- ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
- ads/aqua/extension/finetune_handler.py +96 -0
- ads/aqua/extension/model_handler.py +390 -0
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +145 -0
- ads/aqua/extension/models_ws_msg_handler.py +50 -0
- ads/aqua/extension/ui_handler.py +300 -0
- ads/aqua/extension/ui_websocket_handler.py +130 -0
- ads/aqua/extension/utils.py +133 -0
- ads/aqua/finetuning/__init__.py +7 -0
- ads/aqua/finetuning/constants.py +23 -0
- ads/aqua/finetuning/entities.py +181 -0
- ads/aqua/finetuning/finetuning.py +749 -0
- ads/aqua/model/__init__.py +8 -0
- ads/aqua/model/constants.py +60 -0
- ads/aqua/model/entities.py +385 -0
- ads/aqua/model/enums.py +32 -0
- ads/aqua/model/model.py +2134 -0
- ads/aqua/model/utils.py +52 -0
- ads/aqua/modeldeployment/__init__.py +6 -0
- ads/aqua/modeldeployment/constants.py +10 -0
- ads/aqua/modeldeployment/deployment.py +1315 -0
- ads/aqua/modeldeployment/entities.py +653 -0
- ads/aqua/modeldeployment/utils.py +543 -0
- ads/aqua/resources/gpu_shapes_index.json +94 -0
- ads/aqua/server/__init__.py +4 -0
- ads/aqua/server/__main__.py +24 -0
- ads/aqua/server/app.py +47 -0
- ads/aqua/server/aqua_spec.yml +1291 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +476 -0
- ads/aqua/ui.py +519 -0
- ads/automl/__init__.py +9 -0
- ads/automl/driver.py +330 -0
- ads/automl/provider.py +975 -0
- ads/bds/__init__.py +5 -0
- ads/bds/auth.py +127 -0
- ads/bds/big_data_service.py +255 -0
- ads/catalog/__init__.py +19 -0
- ads/catalog/model.py +1576 -0
- ads/catalog/notebook.py +461 -0
- ads/catalog/project.py +468 -0
- ads/catalog/summary.py +178 -0
- ads/common/__init__.py +11 -0
- ads/common/analyzer.py +65 -0
- ads/common/artifact/.model-ignore +63 -0
- ads/common/artifact/__init__.py +10 -0
- ads/common/auth.py +1122 -0
- ads/common/card_identifier.py +83 -0
- ads/common/config.py +647 -0
- ads/common/data.py +165 -0
- ads/common/decorator/__init__.py +9 -0
- ads/common/decorator/argument_to_case.py +88 -0
- ads/common/decorator/deprecate.py +69 -0
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/decorator/runtime_dependency.py +178 -0
- ads/common/decorator/threaded.py +97 -0
- ads/common/decorator/utils.py +35 -0
- ads/common/dsc_file_system.py +303 -0
- ads/common/error.py +14 -0
- ads/common/extended_enum.py +81 -0
- ads/common/function/__init__.py +5 -0
- ads/common/function/fn_util.py +142 -0
- ads/common/function/func_conf.yaml +25 -0
- ads/common/ipython.py +76 -0
- ads/common/model.py +679 -0
- ads/common/model_artifact.py +1759 -0
- ads/common/model_artifact_schema.json +107 -0
- ads/common/model_export_util.py +664 -0
- ads/common/model_metadata.py +24 -0
- ads/common/object_storage_details.py +296 -0
- ads/common/oci_client.py +179 -0
- ads/common/oci_datascience.py +46 -0
- ads/common/oci_logging.py +1144 -0
- ads/common/oci_mixin.py +957 -0
- ads/common/oci_resource.py +136 -0
- ads/common/serializer.py +559 -0
- ads/common/utils.py +1852 -0
- ads/common/word_lists.py +1491 -0
- ads/common/work_request.py +189 -0
- ads/config.py +1 -0
- ads/data_labeling/__init__.py +13 -0
- ads/data_labeling/boundingbox.py +253 -0
- ads/data_labeling/constants.py +47 -0
- ads/data_labeling/data_labeling_service.py +244 -0
- ads/data_labeling/interface/__init__.py +5 -0
- ads/data_labeling/interface/loader.py +16 -0
- ads/data_labeling/interface/parser.py +16 -0
- ads/data_labeling/interface/reader.py +23 -0
- ads/data_labeling/loader/__init__.py +5 -0
- ads/data_labeling/loader/file_loader.py +241 -0
- ads/data_labeling/metadata.py +110 -0
- ads/data_labeling/mixin/__init__.py +5 -0
- ads/data_labeling/mixin/data_labeling.py +232 -0
- ads/data_labeling/ner.py +129 -0
- ads/data_labeling/parser/__init__.py +5 -0
- ads/data_labeling/parser/dls_record_parser.py +388 -0
- ads/data_labeling/parser/export_metadata_parser.py +94 -0
- ads/data_labeling/parser/export_record_parser.py +473 -0
- ads/data_labeling/reader/__init__.py +5 -0
- ads/data_labeling/reader/dataset_reader.py +574 -0
- ads/data_labeling/reader/dls_record_reader.py +121 -0
- ads/data_labeling/reader/export_record_reader.py +62 -0
- ads/data_labeling/reader/jsonl_reader.py +75 -0
- ads/data_labeling/reader/metadata_reader.py +203 -0
- ads/data_labeling/reader/record_reader.py +263 -0
- ads/data_labeling/record.py +52 -0
- ads/data_labeling/visualizer/__init__.py +5 -0
- ads/data_labeling/visualizer/image_visualizer.py +525 -0
- ads/data_labeling/visualizer/text_visualizer.py +357 -0
- ads/database/__init__.py +5 -0
- ads/database/connection.py +338 -0
- ads/dataset/__init__.py +10 -0
- ads/dataset/capabilities.md +51 -0
- ads/dataset/classification_dataset.py +339 -0
- ads/dataset/correlation.py +226 -0
- ads/dataset/correlation_plot.py +563 -0
- ads/dataset/dask_series.py +173 -0
- ads/dataset/dataframe_transformer.py +110 -0
- ads/dataset/dataset.py +1979 -0
- ads/dataset/dataset_browser.py +360 -0
- ads/dataset/dataset_with_target.py +995 -0
- ads/dataset/exception.py +25 -0
- ads/dataset/factory.py +987 -0
- ads/dataset/feature_engineering_transformer.py +35 -0
- ads/dataset/feature_selection.py +107 -0
- ads/dataset/forecasting_dataset.py +26 -0
- ads/dataset/helper.py +1450 -0
- ads/dataset/label_encoder.py +99 -0
- ads/dataset/mixin/__init__.py +5 -0
- ads/dataset/mixin/dataset_accessor.py +134 -0
- ads/dataset/pipeline.py +58 -0
- ads/dataset/plot.py +710 -0
- ads/dataset/progress.py +86 -0
- ads/dataset/recommendation.py +297 -0
- ads/dataset/recommendation_transformer.py +502 -0
- ads/dataset/regression_dataset.py +14 -0
- ads/dataset/sampled_dataset.py +1050 -0
- ads/dataset/target.py +98 -0
- ads/dataset/timeseries.py +18 -0
- ads/dbmixin/__init__.py +5 -0
- ads/dbmixin/db_pandas_accessor.py +153 -0
- ads/environment/__init__.py +9 -0
- ads/environment/ml_runtime.py +66 -0
- ads/evaluations/README.md +14 -0
- ads/evaluations/__init__.py +109 -0
- ads/evaluations/evaluation_plot.py +983 -0
- ads/evaluations/evaluator.py +1334 -0
- ads/evaluations/statistical_metrics.py +543 -0
- ads/experiments/__init__.py +9 -0
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +21 -0
- ads/explanations/base_explainer.py +142 -0
- ads/explanations/capabilities.md +83 -0
- ads/explanations/explainer.py +190 -0
- ads/explanations/mlx_global_explainer.py +1050 -0
- ads/explanations/mlx_interface.py +386 -0
- ads/explanations/mlx_local_explainer.py +287 -0
- ads/explanations/mlx_whatif_explainer.py +201 -0
- ads/feature_engineering/__init__.py +20 -0
- ads/feature_engineering/accessor/__init__.py +5 -0
- ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
- ads/feature_engineering/accessor/mixin/__init__.py +5 -0
- ads/feature_engineering/accessor/mixin/correlation.py +166 -0
- ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
- ads/feature_engineering/accessor/mixin/utils.py +65 -0
- ads/feature_engineering/accessor/series_accessor.py +431 -0
- ads/feature_engineering/adsimage/__init__.py +5 -0
- ads/feature_engineering/adsimage/image.py +192 -0
- ads/feature_engineering/adsimage/image_reader.py +170 -0
- ads/feature_engineering/adsimage/interface/__init__.py +5 -0
- ads/feature_engineering/adsimage/interface/reader.py +19 -0
- ads/feature_engineering/adsstring/__init__.py +7 -0
- ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
- ads/feature_engineering/adsstring/string/__init__.py +8 -0
- ads/feature_engineering/data_schema.json +57 -0
- ads/feature_engineering/dataset/__init__.py +5 -0
- ads/feature_engineering/dataset/zip_code_data.py +42062 -0
- ads/feature_engineering/exceptions.py +40 -0
- ads/feature_engineering/feature_type/__init__.py +133 -0
- ads/feature_engineering/feature_type/address.py +184 -0
- ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
- ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
- ads/feature_engineering/feature_type/adsstring/string.py +258 -0
- ads/feature_engineering/feature_type/base.py +58 -0
- ads/feature_engineering/feature_type/boolean.py +183 -0
- ads/feature_engineering/feature_type/category.py +146 -0
- ads/feature_engineering/feature_type/constant.py +137 -0
- ads/feature_engineering/feature_type/continuous.py +151 -0
- ads/feature_engineering/feature_type/creditcard.py +314 -0
- ads/feature_engineering/feature_type/datetime.py +190 -0
- ads/feature_engineering/feature_type/discrete.py +134 -0
- ads/feature_engineering/feature_type/document.py +43 -0
- ads/feature_engineering/feature_type/gis.py +251 -0
- ads/feature_engineering/feature_type/handler/__init__.py +5 -0
- ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
- ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
- ads/feature_engineering/feature_type/handler/warnings.py +128 -0
- ads/feature_engineering/feature_type/integer.py +142 -0
- ads/feature_engineering/feature_type/ip_address.py +144 -0
- ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
- ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
- ads/feature_engineering/feature_type/lat_long.py +256 -0
- ads/feature_engineering/feature_type/object.py +43 -0
- ads/feature_engineering/feature_type/ordinal.py +132 -0
- ads/feature_engineering/feature_type/phone_number.py +135 -0
- ads/feature_engineering/feature_type/string.py +171 -0
- ads/feature_engineering/feature_type/text.py +93 -0
- ads/feature_engineering/feature_type/unknown.py +43 -0
- ads/feature_engineering/feature_type/zip_code.py +164 -0
- ads/feature_engineering/feature_type_manager.py +406 -0
- ads/feature_engineering/schema.py +795 -0
- ads/feature_engineering/utils.py +245 -0
- ads/feature_store/.readthedocs.yaml +19 -0
- ads/feature_store/README.md +65 -0
- ads/feature_store/__init__.py +9 -0
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +339 -0
- ads/feature_store/common/exceptions.py +18 -0
- ads/feature_store/common/spark_session_singleton.py +125 -0
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
- ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
- ads/feature_store/common/utils/transformation_utils.py +82 -0
- ads/feature_store/common/utils/utility.py +403 -0
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +129 -0
- ads/feature_store/dataset.py +1230 -0
- ads/feature_store/dataset_job.py +530 -0
- ads/feature_store/docs/Dockerfile +7 -0
- ads/feature_store/docs/Makefile +44 -0
- ads/feature_store/docs/conf.py +28 -0
- ads/feature_store/docs/requirements.txt +14 -0
- ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
- ads/feature_store/docs/source/cicd.rst +137 -0
- ads/feature_store/docs/source/conf.py +86 -0
- ads/feature_store/docs/source/data_versioning.rst +33 -0
- ads/feature_store/docs/source/dataset.rst +388 -0
- ads/feature_store/docs/source/dataset_job.rst +27 -0
- ads/feature_store/docs/source/demo.rst +70 -0
- ads/feature_store/docs/source/entity.rst +78 -0
- ads/feature_store/docs/source/feature_group.rst +624 -0
- ads/feature_store/docs/source/feature_group_job.rst +29 -0
- ads/feature_store/docs/source/feature_store.rst +122 -0
- ads/feature_store/docs/source/feature_store_class.rst +123 -0
- ads/feature_store/docs/source/feature_validation.rst +66 -0
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +81 -0
- ads/feature_store/docs/source/module.rst +8 -0
- ads/feature_store/docs/source/notebook.rst +94 -0
- ads/feature_store/docs/source/overview.rst +47 -0
- ads/feature_store/docs/source/quickstart.rst +176 -0
- ads/feature_store/docs/source/release_notes.rst +194 -0
- ads/feature_store/docs/source/setup_feature_store.rst +81 -0
- ads/feature_store/docs/source/statistics.rst +58 -0
- ads/feature_store/docs/source/transformation.rst +199 -0
- ads/feature_store/docs/source/ui.rst +65 -0
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
- ads/feature_store/entity.py +718 -0
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
- ads/feature_store/execution_strategy/execution_strategy.py +113 -0
- ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
- ads/feature_store/feature.py +192 -0
- ads/feature_store/feature_group.py +1494 -0
- ads/feature_store/feature_group_expectation.py +346 -0
- ads/feature_store/feature_group_job.py +602 -0
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +180 -0
- ads/feature_store/feature_option_details.py +50 -0
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +99 -0
- ads/feature_store/feature_store.py +699 -0
- ads/feature_store/feature_store_registrar.py +518 -0
- ads/feature_store/input_feature_detail.py +149 -0
- ads/feature_store/mixin/__init__.py +4 -0
- ads/feature_store/mixin/oci_feature_store.py +145 -0
- ads/feature_store/model_details.py +73 -0
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +266 -0
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +298 -0
- ads/feature_store/query/join.py +161 -0
- ads/feature_store/query/query.py +403 -0
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +57 -0
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +68 -0
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +139 -0
- ads/feature_store/service/oci_dataset_job.py +199 -0
- ads/feature_store/service/oci_entity.py +125 -0
- ads/feature_store/service/oci_feature_group.py +164 -0
- ads/feature_store/service/oci_feature_group_job.py +214 -0
- ads/feature_store/service/oci_feature_store.py +182 -0
- ads/feature_store/service/oci_lineage.py +87 -0
- ads/feature_store/service/oci_transformation.py +104 -0
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +49 -0
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
- ads/feature_store/statistics/charts/box_plot.py +148 -0
- ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
- ads/feature_store/statistics/charts/probability_distribution.py +68 -0
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
- ads/feature_store/statistics/feature_stat.py +126 -0
- ads/feature_store/statistics/generic_feature_value.py +33 -0
- ads/feature_store/statistics/statistics.py +41 -0
- ads/feature_store/statistics_config.py +101 -0
- ads/feature_store/templates/feature_store_template.yaml +45 -0
- ads/feature_store/transformation.py +499 -0
- ads/feature_store/validation_output.py +57 -0
- ads/hpo/__init__.py +9 -0
- ads/hpo/_imports.py +91 -0
- ads/hpo/ads_search_space.py +439 -0
- ads/hpo/distributions.py +325 -0
- ads/hpo/objective.py +280 -0
- ads/hpo/search_cv.py +1657 -0
- ads/hpo/stopping_criterion.py +75 -0
- ads/hpo/tuner_artifact.py +413 -0
- ads/hpo/utils.py +91 -0
- ads/hpo/validation.py +140 -0
- ads/hpo/visualization/__init__.py +5 -0
- ads/hpo/visualization/_contour.py +23 -0
- ads/hpo/visualization/_edf.py +20 -0
- ads/hpo/visualization/_intermediate_values.py +21 -0
- ads/hpo/visualization/_optimization_history.py +25 -0
- ads/hpo/visualization/_parallel_coordinate.py +169 -0
- ads/hpo/visualization/_param_importances.py +26 -0
- ads/jobs/__init__.py +53 -0
- ads/jobs/ads_job.py +663 -0
- ads/jobs/builders/__init__.py +5 -0
- ads/jobs/builders/base.py +156 -0
- ads/jobs/builders/infrastructure/__init__.py +6 -0
- ads/jobs/builders/infrastructure/base.py +165 -0
- ads/jobs/builders/infrastructure/dataflow.py +1252 -0
- ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
- ads/jobs/builders/infrastructure/utils.py +65 -0
- ads/jobs/builders/runtimes/__init__.py +5 -0
- ads/jobs/builders/runtimes/artifact.py +338 -0
- ads/jobs/builders/runtimes/base.py +325 -0
- ads/jobs/builders/runtimes/container_runtime.py +242 -0
- ads/jobs/builders/runtimes/python_runtime.py +1016 -0
- ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
- ads/jobs/cli.py +104 -0
- ads/jobs/env_var_parser.py +131 -0
- ads/jobs/extension.py +160 -0
- ads/jobs/schema/__init__.py +5 -0
- ads/jobs/schema/infrastructure_schema.json +116 -0
- ads/jobs/schema/job_schema.json +42 -0
- ads/jobs/schema/runtime_schema.json +183 -0
- ads/jobs/schema/validator.py +141 -0
- ads/jobs/serializer.py +296 -0
- ads/jobs/templates/__init__.py +5 -0
- ads/jobs/templates/container.py +6 -0
- ads/jobs/templates/driver_notebook.py +177 -0
- ads/jobs/templates/driver_oci.py +500 -0
- ads/jobs/templates/driver_python.py +48 -0
- ads/jobs/templates/driver_pytorch.py +852 -0
- ads/jobs/templates/driver_utils.py +615 -0
- ads/jobs/templates/hostname_from_env.c +55 -0
- ads/jobs/templates/oci_metrics.py +181 -0
- ads/jobs/utils.py +104 -0
- ads/llm/__init__.py +28 -0
- ads/llm/autogen/__init__.py +2 -0
- ads/llm/autogen/constants.py +15 -0
- ads/llm/autogen/reports/__init__.py +2 -0
- ads/llm/autogen/reports/base.py +67 -0
- ads/llm/autogen/reports/data.py +103 -0
- ads/llm/autogen/reports/session.py +526 -0
- ads/llm/autogen/reports/templates/chat_box.html +13 -0
- ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
- ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
- ads/llm/autogen/reports/utils.py +56 -0
- ads/llm/autogen/v02/__init__.py +4 -0
- ads/llm/autogen/v02/client.py +295 -0
- ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
- ads/llm/autogen/v02/loggers/__init__.py +6 -0
- ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
- ads/llm/autogen/v02/loggers/session_logger.py +580 -0
- ads/llm/autogen/v02/loggers/utils.py +86 -0
- ads/llm/autogen/v02/runtime_logging.py +163 -0
- ads/llm/chain.py +268 -0
- ads/llm/chat_template.py +31 -0
- ads/llm/deploy.py +63 -0
- ads/llm/guardrails/__init__.py +5 -0
- ads/llm/guardrails/base.py +442 -0
- ads/llm/guardrails/huggingface.py +44 -0
- ads/llm/langchain/__init__.py +5 -0
- ads/llm/langchain/plugins/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
- ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
- ads/llm/langchain/plugins/llms/__init__.py +5 -0
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
- ads/llm/requirements.txt +3 -0
- ads/llm/serialize.py +219 -0
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +153 -0
- ads/llm/serializers/runnable_parallel.py +27 -0
- ads/llm/templates/score_chain.jinja2 +155 -0
- ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
- ads/model/__init__.py +52 -0
- ads/model/artifact.py +573 -0
- ads/model/artifact_downloader.py +254 -0
- ads/model/artifact_uploader.py +267 -0
- ads/model/base_properties.py +238 -0
- ads/model/common/.model-ignore +66 -0
- ads/model/common/__init__.py +5 -0
- ads/model/common/utils.py +142 -0
- ads/model/datascience_model.py +2635 -0
- ads/model/deployment/__init__.py +20 -0
- ads/model/deployment/common/__init__.py +5 -0
- ads/model/deployment/common/utils.py +308 -0
- ads/model/deployment/model_deployer.py +466 -0
- ads/model/deployment/model_deployment.py +1846 -0
- ads/model/deployment/model_deployment_infrastructure.py +671 -0
- ads/model/deployment/model_deployment_properties.py +493 -0
- ads/model/deployment/model_deployment_runtime.py +838 -0
- ads/model/extractor/__init__.py +5 -0
- ads/model/extractor/automl_extractor.py +74 -0
- ads/model/extractor/embedding_onnx_extractor.py +80 -0
- ads/model/extractor/huggingface_extractor.py +88 -0
- ads/model/extractor/keras_extractor.py +84 -0
- ads/model/extractor/lightgbm_extractor.py +93 -0
- ads/model/extractor/model_info_extractor.py +114 -0
- ads/model/extractor/model_info_extractor_factory.py +105 -0
- ads/model/extractor/pytorch_extractor.py +87 -0
- ads/model/extractor/sklearn_extractor.py +112 -0
- ads/model/extractor/spark_extractor.py +89 -0
- ads/model/extractor/tensorflow_extractor.py +85 -0
- ads/model/extractor/xgboost_extractor.py +94 -0
- ads/model/framework/__init__.py +5 -0
- ads/model/framework/automl_model.py +178 -0
- ads/model/framework/embedding_onnx_model.py +438 -0
- ads/model/framework/huggingface_model.py +399 -0
- ads/model/framework/lightgbm_model.py +266 -0
- ads/model/framework/pytorch_model.py +266 -0
- ads/model/framework/sklearn_model.py +250 -0
- ads/model/framework/spark_model.py +326 -0
- ads/model/framework/tensorflow_model.py +254 -0
- ads/model/framework/xgboost_model.py +258 -0
- ads/model/generic_model.py +3518 -0
- ads/model/model_artifact_boilerplate/README.md +381 -0
- ads/model/model_artifact_boilerplate/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
- ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
- ads/model/model_artifact_boilerplate/score.py +61 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_introspect.py +331 -0
- ads/model/model_metadata.py +1810 -0
- ads/model/model_metadata_mixin.py +460 -0
- ads/model/model_properties.py +63 -0
- ads/model/model_version_set.py +739 -0
- ads/model/runtime/__init__.py +5 -0
- ads/model/runtime/env_info.py +306 -0
- ads/model/runtime/model_deployment_details.py +37 -0
- ads/model/runtime/model_provenance_details.py +58 -0
- ads/model/runtime/runtime_info.py +81 -0
- ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
- ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
- ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
- ads/model/runtime/utils.py +201 -0
- ads/model/serde/__init__.py +5 -0
- ads/model/serde/common.py +40 -0
- ads/model/serde/model_input.py +547 -0
- ads/model/serde/model_serializer.py +1184 -0
- ads/model/service/__init__.py +5 -0
- ads/model/service/oci_datascience_model.py +1076 -0
- ads/model/service/oci_datascience_model_deployment.py +500 -0
- ads/model/service/oci_datascience_model_version_set.py +176 -0
- ads/model/transformer/__init__.py +5 -0
- ads/model/transformer/onnx_transformer.py +324 -0
- ads/mysqldb/__init__.py +5 -0
- ads/mysqldb/mysql_db.py +227 -0
- ads/opctl/__init__.py +18 -0
- ads/opctl/anomaly_detection.py +11 -0
- ads/opctl/backend/__init__.py +5 -0
- ads/opctl/backend/ads_dataflow.py +353 -0
- ads/opctl/backend/ads_ml_job.py +710 -0
- ads/opctl/backend/ads_ml_pipeline.py +164 -0
- ads/opctl/backend/ads_model_deployment.py +209 -0
- ads/opctl/backend/base.py +146 -0
- ads/opctl/backend/local.py +1053 -0
- ads/opctl/backend/marketplace/__init__.py +9 -0
- ads/opctl/backend/marketplace/helm_helper.py +173 -0
- ads/opctl/backend/marketplace/local_marketplace.py +271 -0
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
- ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
- ads/opctl/backend/marketplace/models/__init__.py +5 -0
- ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
- ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
- ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
- ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
- ads/opctl/cli.py +707 -0
- ads/opctl/cmds.py +869 -0
- ads/opctl/conda/__init__.py +5 -0
- ads/opctl/conda/cli.py +193 -0
- ads/opctl/conda/cmds.py +749 -0
- ads/opctl/conda/config.yaml +34 -0
- ads/opctl/conda/manifest_template.yaml +13 -0
- ads/opctl/conda/multipart_uploader.py +188 -0
- ads/opctl/conda/pack.py +89 -0
- ads/opctl/config/__init__.py +5 -0
- ads/opctl/config/base.py +57 -0
- ads/opctl/config/diagnostics/__init__.py +5 -0
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
- ads/opctl/config/merger.py +255 -0
- ads/opctl/config/resolver.py +297 -0
- ads/opctl/config/utils.py +79 -0
- ads/opctl/config/validator.py +17 -0
- ads/opctl/config/versioner.py +68 -0
- ads/opctl/config/yaml_parsers/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/base.py +58 -0
- ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
- ads/opctl/constants.py +66 -0
- ads/opctl/decorator/__init__.py +5 -0
- ads/opctl/decorator/common.py +129 -0
- ads/opctl/diagnostics/__init__.py +5 -0
- ads/opctl/diagnostics/__main__.py +25 -0
- ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
- ads/opctl/diagnostics/check_requirements.py +144 -0
- ads/opctl/diagnostics/requirement_exception.py +9 -0
- ads/opctl/distributed/README.md +109 -0
- ads/opctl/distributed/__init__.py +5 -0
- ads/opctl/distributed/certificates.py +32 -0
- ads/opctl/distributed/cli.py +207 -0
- ads/opctl/distributed/cmds.py +731 -0
- ads/opctl/distributed/common/__init__.py +5 -0
- ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
- ads/opctl/distributed/common/cluster_config_helper.py +103 -0
- ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
- ads/opctl/distributed/common/cluster_runner.py +54 -0
- ads/opctl/distributed/common/framework_factory.py +29 -0
- ads/opctl/docker/Dockerfile.job +103 -0
- ads/opctl/docker/Dockerfile.job.arm +107 -0
- ads/opctl/docker/Dockerfile.job.gpu +175 -0
- ads/opctl/docker/base-env.yaml +13 -0
- ads/opctl/docker/cuda.repo +6 -0
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +41 -0
- ads/opctl/docker/operator/Dockerfile.gpu +85 -0
- ads/opctl/docker/operator/cuda.repo +6 -0
- ads/opctl/docker/operator/environment.yaml +8 -0
- ads/opctl/forecast.py +11 -0
- ads/opctl/index.yaml +3 -0
- ads/opctl/model/__init__.py +5 -0
- ads/opctl/model/cli.py +65 -0
- ads/opctl/model/cmds.py +73 -0
- ads/opctl/operator/README.md +4 -0
- ads/opctl/operator/__init__.py +31 -0
- ads/opctl/operator/cli.py +344 -0
- ads/opctl/operator/cmd.py +596 -0
- ads/opctl/operator/common/__init__.py +5 -0
- ads/opctl/operator/common/backend_factory.py +460 -0
- ads/opctl/operator/common/const.py +27 -0
- ads/opctl/operator/common/data/synthetic.csv +16001 -0
- ads/opctl/operator/common/dictionary_merger.py +148 -0
- ads/opctl/operator/common/errors.py +42 -0
- ads/opctl/operator/common/operator_config.py +99 -0
- ads/opctl/operator/common/operator_loader.py +811 -0
- ads/opctl/operator/common/operator_schema.yaml +130 -0
- ads/opctl/operator/common/operator_yaml_generator.py +152 -0
- ads/opctl/operator/common/utils.py +208 -0
- ads/opctl/operator/lowcode/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
- ads/opctl/operator/lowcode/anomaly/README.md +207 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +167 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +116 -0
- ads/opctl/operator/lowcode/common/errors.py +47 -0
- ads/opctl/operator/lowcode/common/transformations.py +296 -0
- ads/opctl/operator/lowcode/common/utils.py +384 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
- ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
- ads/opctl/operator/lowcode/forecast/README.md +209 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
- ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
- ads/opctl/operator/lowcode/forecast/const.py +92 -0
- ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
- ads/opctl/operator/lowcode/forecast/errors.py +26 -0
- ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
- ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
- ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
- ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
- ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
- ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
- ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
- ads/opctl/operator/lowcode/forecast/utils.py +397 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
- ads/opctl/operator/lowcode/pii/MLoperator +17 -0
- ads/opctl/operator/lowcode/pii/README.md +208 -0
- ads/opctl/operator/lowcode/pii/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/__main__.py +78 -0
- ads/opctl/operator/lowcode/pii/cmd.py +39 -0
- ads/opctl/operator/lowcode/pii/constant.py +84 -0
- ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
- ads/opctl/operator/lowcode/pii/errors.py +27 -0
- ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
- ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
- ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
- ads/opctl/operator/lowcode/pii/model/report.py +487 -0
- ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
- ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
- ads/opctl/operator/lowcode/pii/utils.py +43 -0
- ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
- ads/opctl/operator/lowcode/recommender/README.md +206 -0
- ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
- ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
- ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
- ads/opctl/operator/lowcode/recommender/constant.py +30 -0
- ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
- ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
- ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
- ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
- ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
- ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
- ads/opctl/operator/lowcode/recommender/utils.py +13 -0
- ads/opctl/operator/runtime/__init__.py +5 -0
- ads/opctl/operator/runtime/const.py +17 -0
- ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
- ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/runtime.py +115 -0
- ads/opctl/schema.yaml.yml +36 -0
- ads/opctl/script.py +40 -0
- ads/opctl/spark/__init__.py +5 -0
- ads/opctl/spark/cli.py +43 -0
- ads/opctl/spark/cmds.py +147 -0
- ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
- ads/opctl/utils.py +344 -0
- ads/oracledb/__init__.py +5 -0
- ads/oracledb/oracle_db.py +346 -0
- ads/pipeline/__init__.py +39 -0
- ads/pipeline/ads_pipeline.py +2279 -0
- ads/pipeline/ads_pipeline_run.py +772 -0
- ads/pipeline/ads_pipeline_step.py +605 -0
- ads/pipeline/builders/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/custom_script.py +32 -0
- ads/pipeline/cli.py +119 -0
- ads/pipeline/extension.py +291 -0
- ads/pipeline/schema/__init__.py +5 -0
- ads/pipeline/schema/cs_step_schema.json +35 -0
- ads/pipeline/schema/ml_step_schema.json +31 -0
- ads/pipeline/schema/pipeline_schema.json +71 -0
- ads/pipeline/visualizer/__init__.py +5 -0
- ads/pipeline/visualizer/base.py +570 -0
- ads/pipeline/visualizer/graph_renderer.py +272 -0
- ads/pipeline/visualizer/text_renderer.py +84 -0
- ads/secrets/__init__.py +11 -0
- ads/secrets/adb.py +386 -0
- ads/secrets/auth_token.py +86 -0
- ads/secrets/big_data_service.py +365 -0
- ads/secrets/mysqldb.py +149 -0
- ads/secrets/oracledb.py +160 -0
- ads/secrets/secrets.py +407 -0
- ads/telemetry/__init__.py +7 -0
- ads/telemetry/base.py +69 -0
- ads/telemetry/client.py +122 -0
- ads/telemetry/telemetry.py +257 -0
- ads/templates/dataflow_pyspark.jinja2 +13 -0
- ads/templates/dataflow_sparksql.jinja2 +22 -0
- ads/templates/func.jinja2 +20 -0
- ads/templates/schemas/openapi.json +1740 -0
- ads/templates/score-pkl.jinja2 +173 -0
- ads/templates/score.jinja2 +322 -0
- ads/templates/score_embedding_onnx.jinja2 +202 -0
- ads/templates/score_generic.jinja2 +165 -0
- ads/templates/score_huggingface_pipeline.jinja2 +217 -0
- ads/templates/score_lightgbm.jinja2 +185 -0
- ads/templates/score_onnx.jinja2 +407 -0
- ads/templates/score_onnx_new.jinja2 +473 -0
- ads/templates/score_oracle_automl.jinja2 +185 -0
- ads/templates/score_pyspark.jinja2 +154 -0
- ads/templates/score_pytorch.jinja2 +219 -0
- ads/templates/score_scikit-learn.jinja2 +184 -0
- ads/templates/score_tensorflow.jinja2 +184 -0
- ads/templates/score_xgboost.jinja2 +178 -0
- ads/text_dataset/__init__.py +5 -0
- ads/text_dataset/backends.py +211 -0
- ads/text_dataset/dataset.py +445 -0
- ads/text_dataset/extractor.py +207 -0
- ads/text_dataset/options.py +53 -0
- ads/text_dataset/udfs.py +22 -0
- ads/text_dataset/utils.py +49 -0
- ads/type_discovery/__init__.py +9 -0
- ads/type_discovery/abstract_detector.py +21 -0
- ads/type_discovery/constant_detector.py +41 -0
- ads/type_discovery/continuous_detector.py +54 -0
- ads/type_discovery/credit_card_detector.py +99 -0
- ads/type_discovery/datetime_detector.py +92 -0
- ads/type_discovery/discrete_detector.py +118 -0
- ads/type_discovery/document_detector.py +146 -0
- ads/type_discovery/ip_detector.py +68 -0
- ads/type_discovery/latlon_detector.py +90 -0
- ads/type_discovery/phone_number_detector.py +63 -0
- ads/type_discovery/type_discovery_driver.py +87 -0
- ads/type_discovery/typed_feature.py +594 -0
- ads/type_discovery/unknown_detector.py +41 -0
- ads/type_discovery/zipcode_detector.py +48 -0
- ads/vault/__init__.py +7 -0
- ads/vault/vault.py +237 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/METADATA +150 -149
- oracle_ads-2.13.10.dist-info/RECORD +858 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/WHEEL +1 -2
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/entry_points.txt +2 -1
- oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,403 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
import copy
|
4
|
+
import os
|
5
|
+
|
6
|
+
# Copyright (c) 2023 Oracle and/or its affiliates.
|
7
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
8
|
+
|
9
|
+
from typing import Union, List
|
10
|
+
|
11
|
+
from great_expectations.core import ExpectationSuite
|
12
|
+
|
13
|
+
from ads.common.decorator.runtime_dependency import OptionalDependency
|
14
|
+
from ads.feature_store.common.utils.feature_schema_mapper import (
|
15
|
+
map_spark_type_to_feature_type,
|
16
|
+
map_feature_type_to_pandas,
|
17
|
+
)
|
18
|
+
from ads.feature_store.feature import Feature, DatasetFeature
|
19
|
+
from ads.feature_store.feature_group_expectation import Rule, Expectation
|
20
|
+
from ads.feature_store.input_feature_detail import FeatureDetail
|
21
|
+
from ads.feature_store.common.spark_session_singleton import SparkSessionSingleton
|
22
|
+
|
23
|
+
try:
|
24
|
+
from pyspark.pandas import DataFrame
|
25
|
+
except ModuleNotFoundError:
|
26
|
+
raise ModuleNotFoundError(
|
27
|
+
f"The `pyspark` module was not found. Please run `pip install "
|
28
|
+
f"{OptionalDependency.SPARK}`."
|
29
|
+
)
|
30
|
+
except Exception as e:
|
31
|
+
raise
|
32
|
+
import pandas as pd
|
33
|
+
|
34
|
+
from ads.feature_store.common.enums import (
|
35
|
+
ExecutionEngine,
|
36
|
+
FeatureType,
|
37
|
+
ExpectationType,
|
38
|
+
ValidationEngineType,
|
39
|
+
EntityType,
|
40
|
+
)
|
41
|
+
import logging
|
42
|
+
|
43
|
+
from ads.feature_engineering.feature_type import datetime
|
44
|
+
|
45
|
+
logger = logging.getLogger(__name__)
|
46
|
+
logger.setLevel(logging.INFO)
|
47
|
+
|
48
|
+
|
49
|
+
def get_execution_engine_type(
|
50
|
+
data_frame: Union[DataFrame, pd.DataFrame]
|
51
|
+
) -> ExecutionEngine:
|
52
|
+
"""
|
53
|
+
Determines the execution engine type for a given DataFrame.
|
54
|
+
|
55
|
+
Args:
|
56
|
+
data_frame (Union[DataFrame, pd.DataFrame]): The DataFrame whose execution engine type should be determined.
|
57
|
+
|
58
|
+
Returns:
|
59
|
+
ExecutionEngine: The execution engine type for the given DataFrame.
|
60
|
+
"""
|
61
|
+
return (
|
62
|
+
ExecutionEngine.PANDAS
|
63
|
+
if isinstance(data_frame, pd.DataFrame)
|
64
|
+
else ExecutionEngine.SPARK
|
65
|
+
)
|
66
|
+
|
67
|
+
|
68
|
+
def get_metastore_id(feature_store_id: str):
|
69
|
+
"""
|
70
|
+
Retrieves the metastore ID for a given feature store ID.
|
71
|
+
|
72
|
+
Args:
|
73
|
+
feature_store_id (str): The ID of the feature store.
|
74
|
+
|
75
|
+
Returns:
|
76
|
+
str: The metastore ID for the feature store, if available. Otherwise, returns None.
|
77
|
+
"""
|
78
|
+
from ads.feature_store.feature_store import FeatureStore
|
79
|
+
|
80
|
+
feature_store = FeatureStore.from_id(feature_store_id)
|
81
|
+
|
82
|
+
return (
|
83
|
+
feature_store.offline_config.get(feature_store.CONST_METASTORE_ID)
|
84
|
+
if feature_store.offline_config
|
85
|
+
else None
|
86
|
+
)
|
87
|
+
|
88
|
+
|
89
|
+
def validate_delta_format_parameters(
|
90
|
+
timestamp: datetime = None, version_number: int = None, is_restore: bool = False
|
91
|
+
):
|
92
|
+
"""
|
93
|
+
Validate the user input provided as part of preview, restore APIs for ingested data, Ingested data is
|
94
|
+
getting saved in versioned manner where every commit generates a commit timestamp and auto increment current version.
|
95
|
+
This information will be used in order to provide timetravel and restore support
|
96
|
+
|
97
|
+
Args:
|
98
|
+
timestamp (datetime): The commit timestamp for ingestion date time
|
99
|
+
version_number: The commit version number for ingested data
|
100
|
+
is_restore: additional restore check to be enabled for
|
101
|
+
|
102
|
+
Returns:
|
103
|
+
"""
|
104
|
+
|
105
|
+
if timestamp is not None and version_number is not None:
|
106
|
+
logger.error(
|
107
|
+
f"timestamp {timestamp} and version number {version_number} both are present"
|
108
|
+
)
|
109
|
+
raise Exception(
|
110
|
+
f"Timestamp and version number cannot be passed at the same time"
|
111
|
+
)
|
112
|
+
elif is_restore and timestamp is None and version_number is None:
|
113
|
+
logger.error(f"Either timestamp or version number must be provided for restore")
|
114
|
+
raise Exception(
|
115
|
+
f"Either timestamp or version number must be provided for restore"
|
116
|
+
)
|
117
|
+
else:
|
118
|
+
if version_number is not None and version_number < 0:
|
119
|
+
logger.error(f"version number {version_number} cannot be negative")
|
120
|
+
raise Exception(f"version number cannot be negative")
|
121
|
+
|
122
|
+
|
123
|
+
def show_ingestion_summary(
|
124
|
+
entity_id: str,
|
125
|
+
entity_type: EntityType = EntityType.FEATURE_GROUP,
|
126
|
+
error_details: str = None,
|
127
|
+
):
|
128
|
+
"""
|
129
|
+
Displays a ingestion summary table with the given entity type and error details.
|
130
|
+
|
131
|
+
Args:
|
132
|
+
entity_id: str
|
133
|
+
entity_type (EntityType, optional): The type of entity being ingested. Defaults to EntityType.FEATURE_GROUP.
|
134
|
+
error_details (str, optional): Details of any errors that occurred during ingestion. Defaults to None.
|
135
|
+
"""
|
136
|
+
from tabulate import tabulate
|
137
|
+
|
138
|
+
table_headers = ["entity_id", "entity_type", "ingestion_status", "error_details"]
|
139
|
+
ingestion_status = "Failed" if error_details else "Succeeded"
|
140
|
+
|
141
|
+
table_values = [
|
142
|
+
entity_id,
|
143
|
+
entity_type.value,
|
144
|
+
ingestion_status,
|
145
|
+
error_details if error_details else "None",
|
146
|
+
]
|
147
|
+
|
148
|
+
logger.info(
|
149
|
+
"Ingestion Summary \n"
|
150
|
+
+ tabulate(
|
151
|
+
[table_values],
|
152
|
+
headers=table_headers,
|
153
|
+
tablefmt="fancy_grid",
|
154
|
+
numalign="center",
|
155
|
+
stralign="center",
|
156
|
+
)
|
157
|
+
)
|
158
|
+
|
159
|
+
|
160
|
+
def show_validation_summary(ingestion_status: str, validation_output, expectation_type):
|
161
|
+
from tabulate import tabulate
|
162
|
+
|
163
|
+
statistics = validation_output["statistics"]
|
164
|
+
|
165
|
+
table_headers = (
|
166
|
+
["expectation_type"] + list(statistics.keys()) + ["ingestion_status"]
|
167
|
+
)
|
168
|
+
|
169
|
+
table_values = [expectation_type] + list(statistics.values()) + [ingestion_status]
|
170
|
+
|
171
|
+
logger.info(
|
172
|
+
"Validation Summary \n"
|
173
|
+
+ tabulate(
|
174
|
+
[table_values],
|
175
|
+
headers=table_headers,
|
176
|
+
tablefmt="fancy_grid",
|
177
|
+
numalign="center",
|
178
|
+
stralign="center",
|
179
|
+
)
|
180
|
+
)
|
181
|
+
|
182
|
+
rule_table_headers = ["rule_type", "arguments", "status"]
|
183
|
+
|
184
|
+
rule_table_values = [
|
185
|
+
[
|
186
|
+
rule_output["expectation_config"].get("expectation_type"),
|
187
|
+
{
|
188
|
+
key: value
|
189
|
+
for key, value in rule_output["expectation_config"]["kwargs"].items()
|
190
|
+
if key != "batch_id"
|
191
|
+
},
|
192
|
+
rule_output.get("success"),
|
193
|
+
]
|
194
|
+
for rule_output in validation_output["results"]
|
195
|
+
]
|
196
|
+
|
197
|
+
logger.info(
|
198
|
+
"Validations Rules Summary \n"
|
199
|
+
+ tabulate(
|
200
|
+
rule_table_values,
|
201
|
+
headers=rule_table_headers,
|
202
|
+
tablefmt="fancy_grid",
|
203
|
+
numalign="center",
|
204
|
+
stralign="center",
|
205
|
+
)
|
206
|
+
)
|
207
|
+
|
208
|
+
|
209
|
+
def get_features(
|
210
|
+
output_columns: List[dict],
|
211
|
+
parent_id: str,
|
212
|
+
entity_type: EntityType = EntityType.FEATURE_GROUP,
|
213
|
+
) -> List[Feature]:
|
214
|
+
"""
|
215
|
+
Returns a list of features, given a list of output_columns and a feature_group_id.
|
216
|
+
|
217
|
+
Parameters:
|
218
|
+
output_columns (List[dict]): A list of dictionaries representing the output columns, with keys "name" and "featureType".
|
219
|
+
parent_id (str): String representing the ID of the Parent that could be FeatureGroup or Dataset.
|
220
|
+
entity_type (EntityType): String representing the Entity Type.
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
features (List[Feature]): A list of Feature objects representing the features.
|
224
|
+
"""
|
225
|
+
features = []
|
226
|
+
|
227
|
+
# Loop through each output column and create a Feature object with the name, featureType, and feature_group_id.
|
228
|
+
for output_column in output_columns:
|
229
|
+
features.append(
|
230
|
+
Feature(
|
231
|
+
output_column.get("name"),
|
232
|
+
output_column.get("featureType"),
|
233
|
+
parent_id,
|
234
|
+
)
|
235
|
+
if entity_type == EntityType.FEATURE_GROUP
|
236
|
+
else DatasetFeature(
|
237
|
+
output_column.get("name"),
|
238
|
+
output_column.get("featureType"),
|
239
|
+
parent_id,
|
240
|
+
)
|
241
|
+
)
|
242
|
+
|
243
|
+
return features
|
244
|
+
|
245
|
+
|
246
|
+
def get_schema_from_pandas_df(df: pd.DataFrame, feature_store_id: str):
|
247
|
+
spark = SparkSessionSingleton(
|
248
|
+
get_metastore_id(feature_store_id)
|
249
|
+
).get_spark_session()
|
250
|
+
converted_df = spark.createDataFrame(df)
|
251
|
+
return get_schema_from_spark_df(converted_df)
|
252
|
+
|
253
|
+
|
254
|
+
def get_schema_from_spark_df(df: DataFrame):
|
255
|
+
schema_details = []
|
256
|
+
|
257
|
+
for order_number, field in enumerate(df.schema.fields, start=1):
|
258
|
+
details = {
|
259
|
+
"name": field.name,
|
260
|
+
"feature_type": map_spark_type_to_feature_type(field.dataType),
|
261
|
+
"order_number": order_number,
|
262
|
+
}
|
263
|
+
schema_details.append(details)
|
264
|
+
|
265
|
+
return schema_details
|
266
|
+
|
267
|
+
|
268
|
+
def get_schema_from_df(
|
269
|
+
data_frame: Union[DataFrame, pd.DataFrame], feature_store_id: str
|
270
|
+
) -> List[dict]:
|
271
|
+
"""
|
272
|
+
Given a DataFrame, returns a list of dictionaries that describe its schema.
|
273
|
+
If the DataFrame is a pandas DataFrame, it uses pandas methods to get the schema.
|
274
|
+
If it's a PySpark DataFrame, it uses PySpark methods to get the schema.
|
275
|
+
"""
|
276
|
+
if isinstance(data_frame, pd.DataFrame):
|
277
|
+
return get_schema_from_pandas_df(data_frame, feature_store_id)
|
278
|
+
else:
|
279
|
+
return get_schema_from_spark_df(data_frame)
|
280
|
+
|
281
|
+
|
282
|
+
def get_input_features_from_df(
|
283
|
+
data_frame: Union[DataFrame, pd.DataFrame], feature_store_id: str
|
284
|
+
) -> List[FeatureDetail]:
|
285
|
+
"""
|
286
|
+
Given a DataFrame, returns a list of FeatureDetail objects that represent its input features.
|
287
|
+
Each FeatureDetail object contains information about a single input feature, such as its name, data type, and
|
288
|
+
whether it's categorical or numerical.
|
289
|
+
"""
|
290
|
+
schema_details = get_schema_from_df(data_frame, feature_store_id)
|
291
|
+
feature_details = []
|
292
|
+
|
293
|
+
for schema_detail in schema_details:
|
294
|
+
feature_details.append(FeatureDetail(**schema_detail))
|
295
|
+
|
296
|
+
return feature_details
|
297
|
+
|
298
|
+
|
299
|
+
def convert_expectation_suite_to_expectation(
|
300
|
+
expectation_suite: ExpectationSuite, expectation_type: ExpectationType
|
301
|
+
):
|
302
|
+
"""
|
303
|
+
Convert an ExpectationSuite object to an Expectation object with detailed rule information.
|
304
|
+
|
305
|
+
Args:
|
306
|
+
expectation_suite (ExpectationSuite): The ExpectationSuite object to convert.
|
307
|
+
expectation_type (ExpectationType): The type of expectation to assign to the resulting Expectation object.
|
308
|
+
|
309
|
+
Returns:
|
310
|
+
An Expectation object with the specified expectation_type and detailed rule information extracted from the
|
311
|
+
expectation_suite.
|
312
|
+
"""
|
313
|
+
expectation_rules = []
|
314
|
+
|
315
|
+
index = 0
|
316
|
+
for expectation_config in expectation_suite.expectations:
|
317
|
+
expectation_rules.append(
|
318
|
+
Rule(f"Rule-{index}")
|
319
|
+
.with_rule_type(expectation_config.expectation_type)
|
320
|
+
.with_arguments(expectation_config.kwargs)
|
321
|
+
)
|
322
|
+
index += 1
|
323
|
+
|
324
|
+
return (
|
325
|
+
Expectation(expectation_suite.expectation_suite_name)
|
326
|
+
.with_expectation_type(expectation_type)
|
327
|
+
.with_validation_engine_type(ValidationEngineType.GREAT_EXPECTATIONS)
|
328
|
+
.with_rule_details(expectation_rules)
|
329
|
+
)
|
330
|
+
|
331
|
+
|
332
|
+
def largest_matching_subset_of_primary_keys(left_feature_group, right_feature_group):
|
333
|
+
"""
|
334
|
+
Returns the largest matching subset of primary keys between the left feature group and right feature group.
|
335
|
+
|
336
|
+
Args:
|
337
|
+
left_feature_group: A feature group object containing primary keys.
|
338
|
+
right_feature_group: A feature group object containing primary keys.
|
339
|
+
|
340
|
+
Returns:
|
341
|
+
A set of primary key names that are common to both the left feature group and the input feature group.
|
342
|
+
"""
|
343
|
+
|
344
|
+
# Get the primary keys for each of the feature groups.
|
345
|
+
left_primary_keys = set(
|
346
|
+
item["name"] for item in left_feature_group.primary_keys.get("items")
|
347
|
+
)
|
348
|
+
right_primary_keys = set(
|
349
|
+
item["name"] for item in right_feature_group.primary_keys.get("items")
|
350
|
+
)
|
351
|
+
|
352
|
+
# Find the intersection of the two sets
|
353
|
+
common_keys = left_primary_keys.intersection(right_primary_keys)
|
354
|
+
|
355
|
+
return common_keys
|
356
|
+
|
357
|
+
|
358
|
+
def convert_pandas_datatype_with_schema(
|
359
|
+
raw_feature_details: List[dict], input_df: pd.DataFrame
|
360
|
+
) -> pd.DataFrame:
|
361
|
+
feature_detail_map = {}
|
362
|
+
columns_to_remove = []
|
363
|
+
for feature_details in raw_feature_details:
|
364
|
+
feature_detail_map[feature_details.get("name")] = feature_details
|
365
|
+
for column in input_df.columns:
|
366
|
+
if column in feature_detail_map.keys():
|
367
|
+
feature_details = feature_detail_map[column]
|
368
|
+
feature_type = feature_details.get("featureType")
|
369
|
+
pandas_type = map_feature_type_to_pandas(feature_type)
|
370
|
+
input_df[column] = (
|
371
|
+
input_df[column]
|
372
|
+
.astype(pandas_type)
|
373
|
+
.where(pd.notnull(input_df[column]), None)
|
374
|
+
)
|
375
|
+
else:
|
376
|
+
logger.warning(
|
377
|
+
"column" + column + "doesn't exist in the input feature details"
|
378
|
+
)
|
379
|
+
columns_to_remove.append(column)
|
380
|
+
return input_df.drop(columns=columns_to_remove)
|
381
|
+
|
382
|
+
|
383
|
+
def convert_spark_dataframe_with_schema(
|
384
|
+
raw_feature_details: List[dict], input_df: DataFrame
|
385
|
+
) -> DataFrame:
|
386
|
+
feature_detail_map = {}
|
387
|
+
columns_to_remove = []
|
388
|
+
for feature_details in raw_feature_details:
|
389
|
+
feature_detail_map[feature_details.get("name")] = feature_details
|
390
|
+
for column in input_df.columns:
|
391
|
+
if column not in feature_detail_map.keys():
|
392
|
+
logger.warning(
|
393
|
+
"column" + column + "doesn't exist in the input feature details"
|
394
|
+
)
|
395
|
+
columns_to_remove.append(column)
|
396
|
+
|
397
|
+
return input_df.drop(*columns_to_remove)
|
398
|
+
|
399
|
+
|
400
|
+
def validate_input_feature_details(input_feature_details, data_frame):
|
401
|
+
if isinstance(data_frame, pd.DataFrame):
|
402
|
+
return convert_pandas_datatype_with_schema(input_feature_details, data_frame)
|
403
|
+
return convert_spark_dataframe_with_schema(input_feature_details, data_frame)
|
File without changes
|
@@ -0,0 +1,129 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import uuid
|
8
|
+
|
9
|
+
import pandas as pd
|
10
|
+
|
11
|
+
from ads.common.decorator.runtime_dependency import OptionalDependency
|
12
|
+
|
13
|
+
try:
|
14
|
+
from great_expectations.core import ExpectationConfiguration
|
15
|
+
from great_expectations.core import ExpectationSuite, IDDict
|
16
|
+
from great_expectations.core.batch import BatchDefinition, Batch
|
17
|
+
from great_expectations.execution_engine import (
|
18
|
+
SparkDFExecutionEngine,
|
19
|
+
PandasExecutionEngine,
|
20
|
+
)
|
21
|
+
from great_expectations.validator.validator import Validator
|
22
|
+
except ModuleNotFoundError:
|
23
|
+
raise ModuleNotFoundError(
|
24
|
+
f"The `feature-store` module was not found. Please run `pip install "
|
25
|
+
f"{OptionalDependency.FEATURE_STORE}`."
|
26
|
+
)
|
27
|
+
except Exception as e:
|
28
|
+
raise
|
29
|
+
from ads.feature_store.common.enums import ExpectationType
|
30
|
+
|
31
|
+
|
32
|
+
class ExpectationService:
|
33
|
+
"""A utility class for defining and validating data quality expectations on dataframes using Great Expectations.
|
34
|
+
|
35
|
+
This class provides methods to define a set of data quality expectations based on a JSON string, add them to an
|
36
|
+
existing expectation suite, and validate a dataframe against the updated expectation suite. It also includes a
|
37
|
+
method to apply data quality validations on a dataframe and raise an exception if any expectation fails.
|
38
|
+
|
39
|
+
Methods
|
40
|
+
-------
|
41
|
+
apply_validations(expectation_details, expectation_suite_name, dataframe):
|
42
|
+
Applies a set of data quality validations to a dataframe based on a set of expectation rules parsed from a JSON
|
43
|
+
string. If any expectation fails, an exception is raised. This method can be used to enforce strict data quality
|
44
|
+
requirements on a dataframe.
|
45
|
+
"""
|
46
|
+
|
47
|
+
@staticmethod
|
48
|
+
def __add_validation_to_expectation_suite(
|
49
|
+
expectation_suite: ExpectationSuite, dataframe, expectations_rules
|
50
|
+
):
|
51
|
+
# Parse the JSON string into a list of expectations
|
52
|
+
# Iterate over the list of expectations and add them to the expectation suite
|
53
|
+
for expect in expectations_rules:
|
54
|
+
expectation_suite.add_expectation(
|
55
|
+
ExpectationConfiguration(
|
56
|
+
expectation_type=expect["ruleType"].lower(),
|
57
|
+
kwargs=expect["arguments"],
|
58
|
+
)
|
59
|
+
)
|
60
|
+
|
61
|
+
# Define the execution engine based on the dataframe type
|
62
|
+
execution_engine = (
|
63
|
+
PandasExecutionEngine()
|
64
|
+
if isinstance(dataframe, pd.DataFrame)
|
65
|
+
else SparkDFExecutionEngine(force_reuse_spark_context=True)
|
66
|
+
)
|
67
|
+
# Validate the dataframe against the updated expectation suite
|
68
|
+
validator = Validator(
|
69
|
+
execution_engine=execution_engine,
|
70
|
+
expectation_suite=expectation_suite,
|
71
|
+
batches=[
|
72
|
+
Batch(
|
73
|
+
data=dataframe,
|
74
|
+
batch_definition=BatchDefinition(
|
75
|
+
datasource_name="feature-ingestion-pipeline",
|
76
|
+
data_connector_name="feature-ingestion-pipeline",
|
77
|
+
data_asset_name="feature-ingestion-pipeline",
|
78
|
+
batch_identifiers=IDDict(ge_batch_id=str(uuid.uuid1())),
|
79
|
+
),
|
80
|
+
),
|
81
|
+
],
|
82
|
+
)
|
83
|
+
validation_result = validator.validate()
|
84
|
+
return validation_result
|
85
|
+
|
86
|
+
@staticmethod
|
87
|
+
def __validate_expectation_details(
|
88
|
+
expectation_details, feature_group, input_dataframe
|
89
|
+
):
|
90
|
+
# Initialize Expectation Suite
|
91
|
+
suite = ExpectationSuite(expectation_suite_name=feature_group)
|
92
|
+
suite.expectations = []
|
93
|
+
|
94
|
+
expectations_rules = expectation_details["createRuleDetails"]
|
95
|
+
expectation_response = ExpectationService.__add_validation_to_expectation_suite(
|
96
|
+
suite, input_dataframe, expectations_rules
|
97
|
+
)
|
98
|
+
return expectation_response
|
99
|
+
|
100
|
+
@staticmethod
|
101
|
+
def apply_validations(expectation_details, expectation_suite_name, dataframe):
|
102
|
+
"""Validate the dataframe against the expectations in expectation_details.
|
103
|
+
|
104
|
+
Parameters
|
105
|
+
----------
|
106
|
+
expectation_details : dict
|
107
|
+
The details of the expectations.
|
108
|
+
expectation_suite_name : str
|
109
|
+
The name of the expectation suite.
|
110
|
+
dataframe : Union[pd.DataFrame, pyspark.sql.DataFrame]
|
111
|
+
The data to validate.
|
112
|
+
|
113
|
+
Returns
|
114
|
+
-------
|
115
|
+
str
|
116
|
+
A string representation of the validation result.
|
117
|
+
"""
|
118
|
+
expectation_response = None
|
119
|
+
if (
|
120
|
+
expectation_details
|
121
|
+
and expectation_details.get("expectationType")
|
122
|
+
!= ExpectationType.NO_EXPECTATION
|
123
|
+
):
|
124
|
+
# Validate the Validations
|
125
|
+
expectation_response = ExpectationService.__validate_expectation_details(
|
126
|
+
expectation_details, expectation_suite_name, dataframe
|
127
|
+
)
|
128
|
+
|
129
|
+
return expectation_response
|