oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +40 -0
- ads/aqua/app.py +507 -0
- ads/aqua/cli.py +96 -0
- ads/aqua/client/__init__.py +3 -0
- ads/aqua/client/client.py +836 -0
- ads/aqua/client/openai_client.py +305 -0
- ads/aqua/common/__init__.py +5 -0
- ads/aqua/common/decorator.py +125 -0
- ads/aqua/common/entities.py +274 -0
- ads/aqua/common/enums.py +134 -0
- ads/aqua/common/errors.py +109 -0
- ads/aqua/common/utils.py +1295 -0
- ads/aqua/config/__init__.py +4 -0
- ads/aqua/config/container_config.py +247 -0
- ads/aqua/config/evaluation/__init__.py +4 -0
- ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
- ads/aqua/config/utils/__init__.py +4 -0
- ads/aqua/config/utils/serializer.py +339 -0
- ads/aqua/constants.py +116 -0
- ads/aqua/data.py +14 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation/__init__.py +8 -0
- ads/aqua/evaluation/constants.py +53 -0
- ads/aqua/evaluation/entities.py +186 -0
- ads/aqua/evaluation/errors.py +70 -0
- ads/aqua/evaluation/evaluation.py +1814 -0
- ads/aqua/extension/__init__.py +42 -0
- ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
- ads/aqua/extension/base_handler.py +90 -0
- ads/aqua/extension/common_handler.py +121 -0
- ads/aqua/extension/common_ws_msg_handler.py +36 -0
- ads/aqua/extension/deployment_handler.py +381 -0
- ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
- ads/aqua/extension/errors.py +30 -0
- ads/aqua/extension/evaluation_handler.py +129 -0
- ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
- ads/aqua/extension/finetune_handler.py +96 -0
- ads/aqua/extension/model_handler.py +390 -0
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +145 -0
- ads/aqua/extension/models_ws_msg_handler.py +50 -0
- ads/aqua/extension/ui_handler.py +300 -0
- ads/aqua/extension/ui_websocket_handler.py +130 -0
- ads/aqua/extension/utils.py +133 -0
- ads/aqua/finetuning/__init__.py +7 -0
- ads/aqua/finetuning/constants.py +23 -0
- ads/aqua/finetuning/entities.py +181 -0
- ads/aqua/finetuning/finetuning.py +749 -0
- ads/aqua/model/__init__.py +8 -0
- ads/aqua/model/constants.py +60 -0
- ads/aqua/model/entities.py +385 -0
- ads/aqua/model/enums.py +32 -0
- ads/aqua/model/model.py +2134 -0
- ads/aqua/model/utils.py +52 -0
- ads/aqua/modeldeployment/__init__.py +6 -0
- ads/aqua/modeldeployment/constants.py +10 -0
- ads/aqua/modeldeployment/deployment.py +1315 -0
- ads/aqua/modeldeployment/entities.py +653 -0
- ads/aqua/modeldeployment/utils.py +543 -0
- ads/aqua/resources/gpu_shapes_index.json +94 -0
- ads/aqua/server/__init__.py +4 -0
- ads/aqua/server/__main__.py +24 -0
- ads/aqua/server/app.py +47 -0
- ads/aqua/server/aqua_spec.yml +1291 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +476 -0
- ads/aqua/ui.py +519 -0
- ads/automl/__init__.py +9 -0
- ads/automl/driver.py +330 -0
- ads/automl/provider.py +975 -0
- ads/bds/__init__.py +5 -0
- ads/bds/auth.py +127 -0
- ads/bds/big_data_service.py +255 -0
- ads/catalog/__init__.py +19 -0
- ads/catalog/model.py +1576 -0
- ads/catalog/notebook.py +461 -0
- ads/catalog/project.py +468 -0
- ads/catalog/summary.py +178 -0
- ads/common/__init__.py +11 -0
- ads/common/analyzer.py +65 -0
- ads/common/artifact/.model-ignore +63 -0
- ads/common/artifact/__init__.py +10 -0
- ads/common/auth.py +1122 -0
- ads/common/card_identifier.py +83 -0
- ads/common/config.py +647 -0
- ads/common/data.py +165 -0
- ads/common/decorator/__init__.py +9 -0
- ads/common/decorator/argument_to_case.py +88 -0
- ads/common/decorator/deprecate.py +69 -0
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/decorator/runtime_dependency.py +178 -0
- ads/common/decorator/threaded.py +97 -0
- ads/common/decorator/utils.py +35 -0
- ads/common/dsc_file_system.py +303 -0
- ads/common/error.py +14 -0
- ads/common/extended_enum.py +81 -0
- ads/common/function/__init__.py +5 -0
- ads/common/function/fn_util.py +142 -0
- ads/common/function/func_conf.yaml +25 -0
- ads/common/ipython.py +76 -0
- ads/common/model.py +679 -0
- ads/common/model_artifact.py +1759 -0
- ads/common/model_artifact_schema.json +107 -0
- ads/common/model_export_util.py +664 -0
- ads/common/model_metadata.py +24 -0
- ads/common/object_storage_details.py +296 -0
- ads/common/oci_client.py +179 -0
- ads/common/oci_datascience.py +46 -0
- ads/common/oci_logging.py +1144 -0
- ads/common/oci_mixin.py +957 -0
- ads/common/oci_resource.py +136 -0
- ads/common/serializer.py +559 -0
- ads/common/utils.py +1852 -0
- ads/common/word_lists.py +1491 -0
- ads/common/work_request.py +189 -0
- ads/config.py +1 -0
- ads/data_labeling/__init__.py +13 -0
- ads/data_labeling/boundingbox.py +253 -0
- ads/data_labeling/constants.py +47 -0
- ads/data_labeling/data_labeling_service.py +244 -0
- ads/data_labeling/interface/__init__.py +5 -0
- ads/data_labeling/interface/loader.py +16 -0
- ads/data_labeling/interface/parser.py +16 -0
- ads/data_labeling/interface/reader.py +23 -0
- ads/data_labeling/loader/__init__.py +5 -0
- ads/data_labeling/loader/file_loader.py +241 -0
- ads/data_labeling/metadata.py +110 -0
- ads/data_labeling/mixin/__init__.py +5 -0
- ads/data_labeling/mixin/data_labeling.py +232 -0
- ads/data_labeling/ner.py +129 -0
- ads/data_labeling/parser/__init__.py +5 -0
- ads/data_labeling/parser/dls_record_parser.py +388 -0
- ads/data_labeling/parser/export_metadata_parser.py +94 -0
- ads/data_labeling/parser/export_record_parser.py +473 -0
- ads/data_labeling/reader/__init__.py +5 -0
- ads/data_labeling/reader/dataset_reader.py +574 -0
- ads/data_labeling/reader/dls_record_reader.py +121 -0
- ads/data_labeling/reader/export_record_reader.py +62 -0
- ads/data_labeling/reader/jsonl_reader.py +75 -0
- ads/data_labeling/reader/metadata_reader.py +203 -0
- ads/data_labeling/reader/record_reader.py +263 -0
- ads/data_labeling/record.py +52 -0
- ads/data_labeling/visualizer/__init__.py +5 -0
- ads/data_labeling/visualizer/image_visualizer.py +525 -0
- ads/data_labeling/visualizer/text_visualizer.py +357 -0
- ads/database/__init__.py +5 -0
- ads/database/connection.py +338 -0
- ads/dataset/__init__.py +10 -0
- ads/dataset/capabilities.md +51 -0
- ads/dataset/classification_dataset.py +339 -0
- ads/dataset/correlation.py +226 -0
- ads/dataset/correlation_plot.py +563 -0
- ads/dataset/dask_series.py +173 -0
- ads/dataset/dataframe_transformer.py +110 -0
- ads/dataset/dataset.py +1979 -0
- ads/dataset/dataset_browser.py +360 -0
- ads/dataset/dataset_with_target.py +995 -0
- ads/dataset/exception.py +25 -0
- ads/dataset/factory.py +987 -0
- ads/dataset/feature_engineering_transformer.py +35 -0
- ads/dataset/feature_selection.py +107 -0
- ads/dataset/forecasting_dataset.py +26 -0
- ads/dataset/helper.py +1450 -0
- ads/dataset/label_encoder.py +99 -0
- ads/dataset/mixin/__init__.py +5 -0
- ads/dataset/mixin/dataset_accessor.py +134 -0
- ads/dataset/pipeline.py +58 -0
- ads/dataset/plot.py +710 -0
- ads/dataset/progress.py +86 -0
- ads/dataset/recommendation.py +297 -0
- ads/dataset/recommendation_transformer.py +502 -0
- ads/dataset/regression_dataset.py +14 -0
- ads/dataset/sampled_dataset.py +1050 -0
- ads/dataset/target.py +98 -0
- ads/dataset/timeseries.py +18 -0
- ads/dbmixin/__init__.py +5 -0
- ads/dbmixin/db_pandas_accessor.py +153 -0
- ads/environment/__init__.py +9 -0
- ads/environment/ml_runtime.py +66 -0
- ads/evaluations/README.md +14 -0
- ads/evaluations/__init__.py +109 -0
- ads/evaluations/evaluation_plot.py +983 -0
- ads/evaluations/evaluator.py +1334 -0
- ads/evaluations/statistical_metrics.py +543 -0
- ads/experiments/__init__.py +9 -0
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +21 -0
- ads/explanations/base_explainer.py +142 -0
- ads/explanations/capabilities.md +83 -0
- ads/explanations/explainer.py +190 -0
- ads/explanations/mlx_global_explainer.py +1050 -0
- ads/explanations/mlx_interface.py +386 -0
- ads/explanations/mlx_local_explainer.py +287 -0
- ads/explanations/mlx_whatif_explainer.py +201 -0
- ads/feature_engineering/__init__.py +20 -0
- ads/feature_engineering/accessor/__init__.py +5 -0
- ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
- ads/feature_engineering/accessor/mixin/__init__.py +5 -0
- ads/feature_engineering/accessor/mixin/correlation.py +166 -0
- ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
- ads/feature_engineering/accessor/mixin/utils.py +65 -0
- ads/feature_engineering/accessor/series_accessor.py +431 -0
- ads/feature_engineering/adsimage/__init__.py +5 -0
- ads/feature_engineering/adsimage/image.py +192 -0
- ads/feature_engineering/adsimage/image_reader.py +170 -0
- ads/feature_engineering/adsimage/interface/__init__.py +5 -0
- ads/feature_engineering/adsimage/interface/reader.py +19 -0
- ads/feature_engineering/adsstring/__init__.py +7 -0
- ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
- ads/feature_engineering/adsstring/string/__init__.py +8 -0
- ads/feature_engineering/data_schema.json +57 -0
- ads/feature_engineering/dataset/__init__.py +5 -0
- ads/feature_engineering/dataset/zip_code_data.py +42062 -0
- ads/feature_engineering/exceptions.py +40 -0
- ads/feature_engineering/feature_type/__init__.py +133 -0
- ads/feature_engineering/feature_type/address.py +184 -0
- ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
- ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
- ads/feature_engineering/feature_type/adsstring/string.py +258 -0
- ads/feature_engineering/feature_type/base.py +58 -0
- ads/feature_engineering/feature_type/boolean.py +183 -0
- ads/feature_engineering/feature_type/category.py +146 -0
- ads/feature_engineering/feature_type/constant.py +137 -0
- ads/feature_engineering/feature_type/continuous.py +151 -0
- ads/feature_engineering/feature_type/creditcard.py +314 -0
- ads/feature_engineering/feature_type/datetime.py +190 -0
- ads/feature_engineering/feature_type/discrete.py +134 -0
- ads/feature_engineering/feature_type/document.py +43 -0
- ads/feature_engineering/feature_type/gis.py +251 -0
- ads/feature_engineering/feature_type/handler/__init__.py +5 -0
- ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
- ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
- ads/feature_engineering/feature_type/handler/warnings.py +128 -0
- ads/feature_engineering/feature_type/integer.py +142 -0
- ads/feature_engineering/feature_type/ip_address.py +144 -0
- ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
- ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
- ads/feature_engineering/feature_type/lat_long.py +256 -0
- ads/feature_engineering/feature_type/object.py +43 -0
- ads/feature_engineering/feature_type/ordinal.py +132 -0
- ads/feature_engineering/feature_type/phone_number.py +135 -0
- ads/feature_engineering/feature_type/string.py +171 -0
- ads/feature_engineering/feature_type/text.py +93 -0
- ads/feature_engineering/feature_type/unknown.py +43 -0
- ads/feature_engineering/feature_type/zip_code.py +164 -0
- ads/feature_engineering/feature_type_manager.py +406 -0
- ads/feature_engineering/schema.py +795 -0
- ads/feature_engineering/utils.py +245 -0
- ads/feature_store/.readthedocs.yaml +19 -0
- ads/feature_store/README.md +65 -0
- ads/feature_store/__init__.py +9 -0
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +339 -0
- ads/feature_store/common/exceptions.py +18 -0
- ads/feature_store/common/spark_session_singleton.py +125 -0
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
- ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
- ads/feature_store/common/utils/transformation_utils.py +82 -0
- ads/feature_store/common/utils/utility.py +403 -0
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +129 -0
- ads/feature_store/dataset.py +1230 -0
- ads/feature_store/dataset_job.py +530 -0
- ads/feature_store/docs/Dockerfile +7 -0
- ads/feature_store/docs/Makefile +44 -0
- ads/feature_store/docs/conf.py +28 -0
- ads/feature_store/docs/requirements.txt +14 -0
- ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
- ads/feature_store/docs/source/cicd.rst +137 -0
- ads/feature_store/docs/source/conf.py +86 -0
- ads/feature_store/docs/source/data_versioning.rst +33 -0
- ads/feature_store/docs/source/dataset.rst +388 -0
- ads/feature_store/docs/source/dataset_job.rst +27 -0
- ads/feature_store/docs/source/demo.rst +70 -0
- ads/feature_store/docs/source/entity.rst +78 -0
- ads/feature_store/docs/source/feature_group.rst +624 -0
- ads/feature_store/docs/source/feature_group_job.rst +29 -0
- ads/feature_store/docs/source/feature_store.rst +122 -0
- ads/feature_store/docs/source/feature_store_class.rst +123 -0
- ads/feature_store/docs/source/feature_validation.rst +66 -0
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +81 -0
- ads/feature_store/docs/source/module.rst +8 -0
- ads/feature_store/docs/source/notebook.rst +94 -0
- ads/feature_store/docs/source/overview.rst +47 -0
- ads/feature_store/docs/source/quickstart.rst +176 -0
- ads/feature_store/docs/source/release_notes.rst +194 -0
- ads/feature_store/docs/source/setup_feature_store.rst +81 -0
- ads/feature_store/docs/source/statistics.rst +58 -0
- ads/feature_store/docs/source/transformation.rst +199 -0
- ads/feature_store/docs/source/ui.rst +65 -0
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
- ads/feature_store/entity.py +718 -0
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
- ads/feature_store/execution_strategy/execution_strategy.py +113 -0
- ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
- ads/feature_store/feature.py +192 -0
- ads/feature_store/feature_group.py +1494 -0
- ads/feature_store/feature_group_expectation.py +346 -0
- ads/feature_store/feature_group_job.py +602 -0
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +180 -0
- ads/feature_store/feature_option_details.py +50 -0
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +99 -0
- ads/feature_store/feature_store.py +699 -0
- ads/feature_store/feature_store_registrar.py +518 -0
- ads/feature_store/input_feature_detail.py +149 -0
- ads/feature_store/mixin/__init__.py +4 -0
- ads/feature_store/mixin/oci_feature_store.py +145 -0
- ads/feature_store/model_details.py +73 -0
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +266 -0
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +298 -0
- ads/feature_store/query/join.py +161 -0
- ads/feature_store/query/query.py +403 -0
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +57 -0
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +68 -0
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +139 -0
- ads/feature_store/service/oci_dataset_job.py +199 -0
- ads/feature_store/service/oci_entity.py +125 -0
- ads/feature_store/service/oci_feature_group.py +164 -0
- ads/feature_store/service/oci_feature_group_job.py +214 -0
- ads/feature_store/service/oci_feature_store.py +182 -0
- ads/feature_store/service/oci_lineage.py +87 -0
- ads/feature_store/service/oci_transformation.py +104 -0
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +49 -0
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
- ads/feature_store/statistics/charts/box_plot.py +148 -0
- ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
- ads/feature_store/statistics/charts/probability_distribution.py +68 -0
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
- ads/feature_store/statistics/feature_stat.py +126 -0
- ads/feature_store/statistics/generic_feature_value.py +33 -0
- ads/feature_store/statistics/statistics.py +41 -0
- ads/feature_store/statistics_config.py +101 -0
- ads/feature_store/templates/feature_store_template.yaml +45 -0
- ads/feature_store/transformation.py +499 -0
- ads/feature_store/validation_output.py +57 -0
- ads/hpo/__init__.py +9 -0
- ads/hpo/_imports.py +91 -0
- ads/hpo/ads_search_space.py +439 -0
- ads/hpo/distributions.py +325 -0
- ads/hpo/objective.py +280 -0
- ads/hpo/search_cv.py +1657 -0
- ads/hpo/stopping_criterion.py +75 -0
- ads/hpo/tuner_artifact.py +413 -0
- ads/hpo/utils.py +91 -0
- ads/hpo/validation.py +140 -0
- ads/hpo/visualization/__init__.py +5 -0
- ads/hpo/visualization/_contour.py +23 -0
- ads/hpo/visualization/_edf.py +20 -0
- ads/hpo/visualization/_intermediate_values.py +21 -0
- ads/hpo/visualization/_optimization_history.py +25 -0
- ads/hpo/visualization/_parallel_coordinate.py +169 -0
- ads/hpo/visualization/_param_importances.py +26 -0
- ads/jobs/__init__.py +53 -0
- ads/jobs/ads_job.py +663 -0
- ads/jobs/builders/__init__.py +5 -0
- ads/jobs/builders/base.py +156 -0
- ads/jobs/builders/infrastructure/__init__.py +6 -0
- ads/jobs/builders/infrastructure/base.py +165 -0
- ads/jobs/builders/infrastructure/dataflow.py +1252 -0
- ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
- ads/jobs/builders/infrastructure/utils.py +65 -0
- ads/jobs/builders/runtimes/__init__.py +5 -0
- ads/jobs/builders/runtimes/artifact.py +338 -0
- ads/jobs/builders/runtimes/base.py +325 -0
- ads/jobs/builders/runtimes/container_runtime.py +242 -0
- ads/jobs/builders/runtimes/python_runtime.py +1016 -0
- ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
- ads/jobs/cli.py +104 -0
- ads/jobs/env_var_parser.py +131 -0
- ads/jobs/extension.py +160 -0
- ads/jobs/schema/__init__.py +5 -0
- ads/jobs/schema/infrastructure_schema.json +116 -0
- ads/jobs/schema/job_schema.json +42 -0
- ads/jobs/schema/runtime_schema.json +183 -0
- ads/jobs/schema/validator.py +141 -0
- ads/jobs/serializer.py +296 -0
- ads/jobs/templates/__init__.py +5 -0
- ads/jobs/templates/container.py +6 -0
- ads/jobs/templates/driver_notebook.py +177 -0
- ads/jobs/templates/driver_oci.py +500 -0
- ads/jobs/templates/driver_python.py +48 -0
- ads/jobs/templates/driver_pytorch.py +852 -0
- ads/jobs/templates/driver_utils.py +615 -0
- ads/jobs/templates/hostname_from_env.c +55 -0
- ads/jobs/templates/oci_metrics.py +181 -0
- ads/jobs/utils.py +104 -0
- ads/llm/__init__.py +28 -0
- ads/llm/autogen/__init__.py +2 -0
- ads/llm/autogen/constants.py +15 -0
- ads/llm/autogen/reports/__init__.py +2 -0
- ads/llm/autogen/reports/base.py +67 -0
- ads/llm/autogen/reports/data.py +103 -0
- ads/llm/autogen/reports/session.py +526 -0
- ads/llm/autogen/reports/templates/chat_box.html +13 -0
- ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
- ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
- ads/llm/autogen/reports/utils.py +56 -0
- ads/llm/autogen/v02/__init__.py +4 -0
- ads/llm/autogen/v02/client.py +295 -0
- ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
- ads/llm/autogen/v02/loggers/__init__.py +6 -0
- ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
- ads/llm/autogen/v02/loggers/session_logger.py +580 -0
- ads/llm/autogen/v02/loggers/utils.py +86 -0
- ads/llm/autogen/v02/runtime_logging.py +163 -0
- ads/llm/chain.py +268 -0
- ads/llm/chat_template.py +31 -0
- ads/llm/deploy.py +63 -0
- ads/llm/guardrails/__init__.py +5 -0
- ads/llm/guardrails/base.py +442 -0
- ads/llm/guardrails/huggingface.py +44 -0
- ads/llm/langchain/__init__.py +5 -0
- ads/llm/langchain/plugins/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
- ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
- ads/llm/langchain/plugins/llms/__init__.py +5 -0
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
- ads/llm/requirements.txt +3 -0
- ads/llm/serialize.py +219 -0
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +153 -0
- ads/llm/serializers/runnable_parallel.py +27 -0
- ads/llm/templates/score_chain.jinja2 +155 -0
- ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
- ads/model/__init__.py +52 -0
- ads/model/artifact.py +573 -0
- ads/model/artifact_downloader.py +254 -0
- ads/model/artifact_uploader.py +267 -0
- ads/model/base_properties.py +238 -0
- ads/model/common/.model-ignore +66 -0
- ads/model/common/__init__.py +5 -0
- ads/model/common/utils.py +142 -0
- ads/model/datascience_model.py +2635 -0
- ads/model/deployment/__init__.py +20 -0
- ads/model/deployment/common/__init__.py +5 -0
- ads/model/deployment/common/utils.py +308 -0
- ads/model/deployment/model_deployer.py +466 -0
- ads/model/deployment/model_deployment.py +1846 -0
- ads/model/deployment/model_deployment_infrastructure.py +671 -0
- ads/model/deployment/model_deployment_properties.py +493 -0
- ads/model/deployment/model_deployment_runtime.py +838 -0
- ads/model/extractor/__init__.py +5 -0
- ads/model/extractor/automl_extractor.py +74 -0
- ads/model/extractor/embedding_onnx_extractor.py +80 -0
- ads/model/extractor/huggingface_extractor.py +88 -0
- ads/model/extractor/keras_extractor.py +84 -0
- ads/model/extractor/lightgbm_extractor.py +93 -0
- ads/model/extractor/model_info_extractor.py +114 -0
- ads/model/extractor/model_info_extractor_factory.py +105 -0
- ads/model/extractor/pytorch_extractor.py +87 -0
- ads/model/extractor/sklearn_extractor.py +112 -0
- ads/model/extractor/spark_extractor.py +89 -0
- ads/model/extractor/tensorflow_extractor.py +85 -0
- ads/model/extractor/xgboost_extractor.py +94 -0
- ads/model/framework/__init__.py +5 -0
- ads/model/framework/automl_model.py +178 -0
- ads/model/framework/embedding_onnx_model.py +438 -0
- ads/model/framework/huggingface_model.py +399 -0
- ads/model/framework/lightgbm_model.py +266 -0
- ads/model/framework/pytorch_model.py +266 -0
- ads/model/framework/sklearn_model.py +250 -0
- ads/model/framework/spark_model.py +326 -0
- ads/model/framework/tensorflow_model.py +254 -0
- ads/model/framework/xgboost_model.py +258 -0
- ads/model/generic_model.py +3518 -0
- ads/model/model_artifact_boilerplate/README.md +381 -0
- ads/model/model_artifact_boilerplate/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
- ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
- ads/model/model_artifact_boilerplate/score.py +61 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_introspect.py +331 -0
- ads/model/model_metadata.py +1810 -0
- ads/model/model_metadata_mixin.py +460 -0
- ads/model/model_properties.py +63 -0
- ads/model/model_version_set.py +739 -0
- ads/model/runtime/__init__.py +5 -0
- ads/model/runtime/env_info.py +306 -0
- ads/model/runtime/model_deployment_details.py +37 -0
- ads/model/runtime/model_provenance_details.py +58 -0
- ads/model/runtime/runtime_info.py +81 -0
- ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
- ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
- ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
- ads/model/runtime/utils.py +201 -0
- ads/model/serde/__init__.py +5 -0
- ads/model/serde/common.py +40 -0
- ads/model/serde/model_input.py +547 -0
- ads/model/serde/model_serializer.py +1184 -0
- ads/model/service/__init__.py +5 -0
- ads/model/service/oci_datascience_model.py +1076 -0
- ads/model/service/oci_datascience_model_deployment.py +500 -0
- ads/model/service/oci_datascience_model_version_set.py +176 -0
- ads/model/transformer/__init__.py +5 -0
- ads/model/transformer/onnx_transformer.py +324 -0
- ads/mysqldb/__init__.py +5 -0
- ads/mysqldb/mysql_db.py +227 -0
- ads/opctl/__init__.py +18 -0
- ads/opctl/anomaly_detection.py +11 -0
- ads/opctl/backend/__init__.py +5 -0
- ads/opctl/backend/ads_dataflow.py +353 -0
- ads/opctl/backend/ads_ml_job.py +710 -0
- ads/opctl/backend/ads_ml_pipeline.py +164 -0
- ads/opctl/backend/ads_model_deployment.py +209 -0
- ads/opctl/backend/base.py +146 -0
- ads/opctl/backend/local.py +1053 -0
- ads/opctl/backend/marketplace/__init__.py +9 -0
- ads/opctl/backend/marketplace/helm_helper.py +173 -0
- ads/opctl/backend/marketplace/local_marketplace.py +271 -0
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
- ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
- ads/opctl/backend/marketplace/models/__init__.py +5 -0
- ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
- ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
- ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
- ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
- ads/opctl/cli.py +707 -0
- ads/opctl/cmds.py +869 -0
- ads/opctl/conda/__init__.py +5 -0
- ads/opctl/conda/cli.py +193 -0
- ads/opctl/conda/cmds.py +749 -0
- ads/opctl/conda/config.yaml +34 -0
- ads/opctl/conda/manifest_template.yaml +13 -0
- ads/opctl/conda/multipart_uploader.py +188 -0
- ads/opctl/conda/pack.py +89 -0
- ads/opctl/config/__init__.py +5 -0
- ads/opctl/config/base.py +57 -0
- ads/opctl/config/diagnostics/__init__.py +5 -0
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
- ads/opctl/config/merger.py +255 -0
- ads/opctl/config/resolver.py +297 -0
- ads/opctl/config/utils.py +79 -0
- ads/opctl/config/validator.py +17 -0
- ads/opctl/config/versioner.py +68 -0
- ads/opctl/config/yaml_parsers/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/base.py +58 -0
- ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
- ads/opctl/constants.py +66 -0
- ads/opctl/decorator/__init__.py +5 -0
- ads/opctl/decorator/common.py +129 -0
- ads/opctl/diagnostics/__init__.py +5 -0
- ads/opctl/diagnostics/__main__.py +25 -0
- ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
- ads/opctl/diagnostics/check_requirements.py +144 -0
- ads/opctl/diagnostics/requirement_exception.py +9 -0
- ads/opctl/distributed/README.md +109 -0
- ads/opctl/distributed/__init__.py +5 -0
- ads/opctl/distributed/certificates.py +32 -0
- ads/opctl/distributed/cli.py +207 -0
- ads/opctl/distributed/cmds.py +731 -0
- ads/opctl/distributed/common/__init__.py +5 -0
- ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
- ads/opctl/distributed/common/cluster_config_helper.py +103 -0
- ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
- ads/opctl/distributed/common/cluster_runner.py +54 -0
- ads/opctl/distributed/common/framework_factory.py +29 -0
- ads/opctl/docker/Dockerfile.job +103 -0
- ads/opctl/docker/Dockerfile.job.arm +107 -0
- ads/opctl/docker/Dockerfile.job.gpu +175 -0
- ads/opctl/docker/base-env.yaml +13 -0
- ads/opctl/docker/cuda.repo +6 -0
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +41 -0
- ads/opctl/docker/operator/Dockerfile.gpu +85 -0
- ads/opctl/docker/operator/cuda.repo +6 -0
- ads/opctl/docker/operator/environment.yaml +8 -0
- ads/opctl/forecast.py +11 -0
- ads/opctl/index.yaml +3 -0
- ads/opctl/model/__init__.py +5 -0
- ads/opctl/model/cli.py +65 -0
- ads/opctl/model/cmds.py +73 -0
- ads/opctl/operator/README.md +4 -0
- ads/opctl/operator/__init__.py +31 -0
- ads/opctl/operator/cli.py +344 -0
- ads/opctl/operator/cmd.py +596 -0
- ads/opctl/operator/common/__init__.py +5 -0
- ads/opctl/operator/common/backend_factory.py +460 -0
- ads/opctl/operator/common/const.py +27 -0
- ads/opctl/operator/common/data/synthetic.csv +16001 -0
- ads/opctl/operator/common/dictionary_merger.py +148 -0
- ads/opctl/operator/common/errors.py +42 -0
- ads/opctl/operator/common/operator_config.py +99 -0
- ads/opctl/operator/common/operator_loader.py +811 -0
- ads/opctl/operator/common/operator_schema.yaml +130 -0
- ads/opctl/operator/common/operator_yaml_generator.py +152 -0
- ads/opctl/operator/common/utils.py +208 -0
- ads/opctl/operator/lowcode/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
- ads/opctl/operator/lowcode/anomaly/README.md +207 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +167 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +116 -0
- ads/opctl/operator/lowcode/common/errors.py +47 -0
- ads/opctl/operator/lowcode/common/transformations.py +296 -0
- ads/opctl/operator/lowcode/common/utils.py +384 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
- ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
- ads/opctl/operator/lowcode/forecast/README.md +209 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
- ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
- ads/opctl/operator/lowcode/forecast/const.py +92 -0
- ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
- ads/opctl/operator/lowcode/forecast/errors.py +26 -0
- ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
- ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
- ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
- ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
- ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
- ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
- ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
- ads/opctl/operator/lowcode/forecast/utils.py +397 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
- ads/opctl/operator/lowcode/pii/MLoperator +17 -0
- ads/opctl/operator/lowcode/pii/README.md +208 -0
- ads/opctl/operator/lowcode/pii/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/__main__.py +78 -0
- ads/opctl/operator/lowcode/pii/cmd.py +39 -0
- ads/opctl/operator/lowcode/pii/constant.py +84 -0
- ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
- ads/opctl/operator/lowcode/pii/errors.py +27 -0
- ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
- ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
- ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
- ads/opctl/operator/lowcode/pii/model/report.py +487 -0
- ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
- ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
- ads/opctl/operator/lowcode/pii/utils.py +43 -0
- ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
- ads/opctl/operator/lowcode/recommender/README.md +206 -0
- ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
- ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
- ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
- ads/opctl/operator/lowcode/recommender/constant.py +30 -0
- ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
- ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
- ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
- ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
- ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
- ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
- ads/opctl/operator/lowcode/recommender/utils.py +13 -0
- ads/opctl/operator/runtime/__init__.py +5 -0
- ads/opctl/operator/runtime/const.py +17 -0
- ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
- ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/runtime.py +115 -0
- ads/opctl/schema.yaml.yml +36 -0
- ads/opctl/script.py +40 -0
- ads/opctl/spark/__init__.py +5 -0
- ads/opctl/spark/cli.py +43 -0
- ads/opctl/spark/cmds.py +147 -0
- ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
- ads/opctl/utils.py +344 -0
- ads/oracledb/__init__.py +5 -0
- ads/oracledb/oracle_db.py +346 -0
- ads/pipeline/__init__.py +39 -0
- ads/pipeline/ads_pipeline.py +2279 -0
- ads/pipeline/ads_pipeline_run.py +772 -0
- ads/pipeline/ads_pipeline_step.py +605 -0
- ads/pipeline/builders/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/custom_script.py +32 -0
- ads/pipeline/cli.py +119 -0
- ads/pipeline/extension.py +291 -0
- ads/pipeline/schema/__init__.py +5 -0
- ads/pipeline/schema/cs_step_schema.json +35 -0
- ads/pipeline/schema/ml_step_schema.json +31 -0
- ads/pipeline/schema/pipeline_schema.json +71 -0
- ads/pipeline/visualizer/__init__.py +5 -0
- ads/pipeline/visualizer/base.py +570 -0
- ads/pipeline/visualizer/graph_renderer.py +272 -0
- ads/pipeline/visualizer/text_renderer.py +84 -0
- ads/secrets/__init__.py +11 -0
- ads/secrets/adb.py +386 -0
- ads/secrets/auth_token.py +86 -0
- ads/secrets/big_data_service.py +365 -0
- ads/secrets/mysqldb.py +149 -0
- ads/secrets/oracledb.py +160 -0
- ads/secrets/secrets.py +407 -0
- ads/telemetry/__init__.py +7 -0
- ads/telemetry/base.py +69 -0
- ads/telemetry/client.py +122 -0
- ads/telemetry/telemetry.py +257 -0
- ads/templates/dataflow_pyspark.jinja2 +13 -0
- ads/templates/dataflow_sparksql.jinja2 +22 -0
- ads/templates/func.jinja2 +20 -0
- ads/templates/schemas/openapi.json +1740 -0
- ads/templates/score-pkl.jinja2 +173 -0
- ads/templates/score.jinja2 +322 -0
- ads/templates/score_embedding_onnx.jinja2 +202 -0
- ads/templates/score_generic.jinja2 +165 -0
- ads/templates/score_huggingface_pipeline.jinja2 +217 -0
- ads/templates/score_lightgbm.jinja2 +185 -0
- ads/templates/score_onnx.jinja2 +407 -0
- ads/templates/score_onnx_new.jinja2 +473 -0
- ads/templates/score_oracle_automl.jinja2 +185 -0
- ads/templates/score_pyspark.jinja2 +154 -0
- ads/templates/score_pytorch.jinja2 +219 -0
- ads/templates/score_scikit-learn.jinja2 +184 -0
- ads/templates/score_tensorflow.jinja2 +184 -0
- ads/templates/score_xgboost.jinja2 +178 -0
- ads/text_dataset/__init__.py +5 -0
- ads/text_dataset/backends.py +211 -0
- ads/text_dataset/dataset.py +445 -0
- ads/text_dataset/extractor.py +207 -0
- ads/text_dataset/options.py +53 -0
- ads/text_dataset/udfs.py +22 -0
- ads/text_dataset/utils.py +49 -0
- ads/type_discovery/__init__.py +9 -0
- ads/type_discovery/abstract_detector.py +21 -0
- ads/type_discovery/constant_detector.py +41 -0
- ads/type_discovery/continuous_detector.py +54 -0
- ads/type_discovery/credit_card_detector.py +99 -0
- ads/type_discovery/datetime_detector.py +92 -0
- ads/type_discovery/discrete_detector.py +118 -0
- ads/type_discovery/document_detector.py +146 -0
- ads/type_discovery/ip_detector.py +68 -0
- ads/type_discovery/latlon_detector.py +90 -0
- ads/type_discovery/phone_number_detector.py +63 -0
- ads/type_discovery/type_discovery_driver.py +87 -0
- ads/type_discovery/typed_feature.py +594 -0
- ads/type_discovery/unknown_detector.py +41 -0
- ads/type_discovery/zipcode_detector.py +48 -0
- ads/vault/__init__.py +7 -0
- ads/vault/vault.py +237 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/METADATA +150 -149
- oracle_ads-2.13.10rc0.dist-info/RECORD +858 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/WHEEL +1 -2
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/entry_points.txt +2 -1
- oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,137 @@
|
|
1
|
+
CI/CD: Small and Large Data Use Cases
|
2
|
+
**************************************
|
3
|
+
|
4
|
+
Introduction
|
5
|
+
============
|
6
|
+
|
7
|
+
This section elaborates on different data ingestion modes (Append, Overwrite, Upsert) used within a feature store context. Understanding these modes is crucial for efficient data management and integration.
|
8
|
+
|
9
|
+
|
10
|
+
.. image:: figures/cicd.png
|
11
|
+
|
12
|
+
|
13
|
+
Feature Store with ML Jobs (Small Data Use Cases)
|
14
|
+
=================================================
|
15
|
+
|
16
|
+
Utilizing ML jobs within the Feature Store caters to scenarios dealing with small data volumes.
|
17
|
+
|
18
|
+
**Usage Scenario**:
|
19
|
+
|
20
|
+
- **ML Job Integration**: Integrate ML jobs into the CI/CD pipeline for feature extraction, transformation, and loading using small datasets.
|
21
|
+
- **Automation with CI/CD**: Automate the deployment and execution of ML job scripts through CI/CD pipelines for feature management in the Feature Store environment.
|
22
|
+
|
23
|
+
.. seealso::
|
24
|
+
`Feature Store Creation and Ingestion with Jobs <https://github.com/oracle-samples/oci-data-science-ai-samples/tree/main/feature_store/tutorials/feature_store_creation_ingestion_with_jobs>`_
|
25
|
+
|
26
|
+
Feature Store with Dataflow (Large Data Use Cases)
|
27
|
+
==================================================
|
28
|
+
|
29
|
+
Dataflow is a robust solution when handling large-scale data processing within the Feature Store.
|
30
|
+
|
31
|
+
**Usage Scenario**:
|
32
|
+
|
33
|
+
- **Dataflow Integration**: Incorporate Dataflow capabilities within the CI/CD pipeline for efficient feature extraction, transformation, and loading with large datasets.
|
34
|
+
- **Scalable Processing**: Leverage Dataflow's scalable processing power to handle and manage extensive data volumes effectively.
|
35
|
+
|
36
|
+
.. seealso::
|
37
|
+
`Feature Store Creation and Ingestion with Dataflow <https://github.com/oracle-samples/oci-data-science-ai-samples/tree/main/feature_store/tutorials/feature_store_creation_ingestion_with_dataflow>`_
|
38
|
+
|
39
|
+
Modes of ingestion
|
40
|
+
===================
|
41
|
+
|
42
|
+
Append Mode
|
43
|
+
###########
|
44
|
+
|
45
|
+
In Append mode, new data is added to the existing dataset or table. This process involves appending new records to the end of the existing dataset without altering or removing pre-existing data. It's suitable for continuous addition of new records while preserving the integrity of the existing dataset.
|
46
|
+
|
47
|
+
.. code-block:: python3
|
48
|
+
|
49
|
+
from ads.feature_store.feature_group_job import BatchIngestionMode
|
50
|
+
from ads.feature_store.feature_group import FeatureGroup
|
51
|
+
|
52
|
+
feature_group = FeatureGroup.from_id("<unique_id>")
|
53
|
+
feature_group.materialise(airports_df, ingestion_mode=BatchIngestionMode.APPEND)
|
54
|
+
|
55
|
+
Overwrite Mode
|
56
|
+
##############
|
57
|
+
|
58
|
+
Overwrite mode replaces the entire existing dataset with the new data being saved. If the target table or dataset already exists, it is completely dropped and recreated with the new data. This mode is beneficial for refreshing the dataset entirely with the most recent data, discarding all previous records.
|
59
|
+
|
60
|
+
.. code-block:: python3
|
61
|
+
|
62
|
+
from ads.feature_store.feature_group_job import BatchIngestionMode
|
63
|
+
from ads.feature_store.feature_group import FeatureGroup
|
64
|
+
|
65
|
+
feature_group = FeatureGroup.from_id("<unique_id>")
|
66
|
+
feature_group.materialise(airports_df, ingestion_mode=BatchIngestionMode.OVERWRITE)
|
67
|
+
|
68
|
+
Upsert Mode (Merge Mode)
|
69
|
+
########################
|
70
|
+
|
71
|
+
Upsert mode, also known as merge mode, updates existing records in the dataset based on a specified condition or primary key. If a record with the same key exists, it is updated with the new data; otherwise, a new record is inserted. This mode helps in maintaining data consistency and avoiding duplicates.
|
72
|
+
|
73
|
+
.. code-block:: python3
|
74
|
+
|
75
|
+
from ads.feature_store.feature_group_job import BatchIngestionMode
|
76
|
+
from ads.feature_store.feature_group import FeatureGroup
|
77
|
+
|
78
|
+
feature_group = FeatureGroup.from_id("<unique_id>")
|
79
|
+
feature_group.materialise(airports_df, ingestion_mode=BatchIngestionMode.UPSERT)
|
80
|
+
|
81
|
+
Complete Mode
|
82
|
+
#############
|
83
|
+
|
84
|
+
Complete Mode involves rewriting the full output for each batch interval. It provides the complete result set for the computation performed on that batch interval. This mode is suitable when requiring a comprehensive, updated result set for every batch interval, regardless of whether it's a complete rewrite of the entire output.
|
85
|
+
|
86
|
+
.. code-block:: python3
|
87
|
+
|
88
|
+
from ads.feature_store.feature_group_job import StreamingIngestionMode
|
89
|
+
from ads.feature_store.feature_group import FeatureGroup
|
90
|
+
|
91
|
+
feature_group = FeatureGroup.from_id("<unique_id>")
|
92
|
+
feature_group.materialise_stream(df, ingestion_mode=StreamingIngestionMode.COMPLETE)
|
93
|
+
|
94
|
+
Update Mode
|
95
|
+
###########
|
96
|
+
|
97
|
+
Update Mode is specifically designed to capture only the changed records in the output. It outputs the rows that have been updated since the last batch, maintaining intermediate state and providing insights into the delta or changes between different batches. This mode is ideal for scenarios involving aggregations or incremental updates to the output data.
|
98
|
+
|
99
|
+
.. code-block:: python3
|
100
|
+
|
101
|
+
from ads.feature_store.feature_group_job import StreamingIngestionMode
|
102
|
+
from ads.feature_store.feature_group import FeatureGroup
|
103
|
+
|
104
|
+
feature_group = FeatureGroup.from_id("<unique_id>")
|
105
|
+
feature_group.materialise_stream(df, ingestion_mode=StreamingIngestionMode.UPDATE)
|
106
|
+
|
107
|
+
Append Mode
|
108
|
+
###########
|
109
|
+
|
110
|
+
Append Mode appends newly generated results from each micro-batch to the output sink. It works when the computation produces only new records and does not modify or update existing data in the output. This mode suits scenarios where the result set continuously grows, with each batch contributing new records without altering existing ones.
|
111
|
+
|
112
|
+
.. code-block:: python3
|
113
|
+
|
114
|
+
from ads.feature_store.feature_group_job import StreamingIngestionMode
|
115
|
+
from ads.feature_store.feature_group import FeatureGroup
|
116
|
+
|
117
|
+
feature_group = FeatureGroup.from_id("<unique_id>")
|
118
|
+
feature_group.materialise_stream(df, ingestion_mode=StreamingIngestionMode.APPEND)
|
119
|
+
|
120
|
+
|
121
|
+
Considerations for Usage
|
122
|
+
########################
|
123
|
+
|
124
|
+
- **Performance Implications:** Each mode has its performance considerations. Append tends to be faster as it merely adds new records, while Overwrite involves dropping and recreating the entire dataset, potentially impacting performance and resource usage.
|
125
|
+
- **Data Consistency:** Upsert mode helps in maintaining data consistency by updating existing records and inserting new ones, ensuring the integrity of the dataset.
|
126
|
+
|
127
|
+
Use Cases
|
128
|
+
###########
|
129
|
+
|
130
|
+
- **Append:** Real-time streaming data, continuous addition of new records without altering existing data.
|
131
|
+
- **Overwrite:** Periodic data refreshes or complete replacement of the dataset with the latest information.
|
132
|
+
- **Upsert:** Synchronizing datasets, updating existing records, and avoiding duplicates, commonly used in databases and data warehouses.
|
133
|
+
|
134
|
+
Conclusion
|
135
|
+
###########
|
136
|
+
|
137
|
+
Understanding the nuances of these ingestion modes is crucial for choosing the appropriate method based on specific requirements and use cases. It ensures efficient data management and integration within the feature store environment.
|
@@ -0,0 +1,86 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2022, 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import os
|
8
|
+
import sys
|
9
|
+
|
10
|
+
sys.path.insert(0, os.path.abspath("../../"))
|
11
|
+
|
12
|
+
version = "1.0.4"
|
13
|
+
release = version
|
14
|
+
|
15
|
+
|
16
|
+
# -- Project information -----------------------------------------------------
|
17
|
+
|
18
|
+
project = "oci_feature_store"
|
19
|
+
copyright = "Oracle and/or its affiliates 2022, 2023"
|
20
|
+
author = "Oracle Corporation"
|
21
|
+
|
22
|
+
# -- General configuration ---------------------------------------------------
|
23
|
+
|
24
|
+
# Add any Sphinx extension module names here, as strings. They can be
|
25
|
+
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
|
26
|
+
# ones.
|
27
|
+
extensions = [
|
28
|
+
"sphinx.ext.napoleon",
|
29
|
+
"sphinx.ext.autodoc",
|
30
|
+
"sphinx.ext.doctest",
|
31
|
+
"sphinx.ext.mathjax",
|
32
|
+
"sphinx.ext.ifconfig",
|
33
|
+
"sphinx.ext.autodoc",
|
34
|
+
"sphinx.ext.todo",
|
35
|
+
"sphinx.ext.extlinks",
|
36
|
+
"sphinx.ext.intersphinx",
|
37
|
+
"sphinx.ext.graphviz",
|
38
|
+
"nbsphinx",
|
39
|
+
"sphinx_code_tabs",
|
40
|
+
"sphinx_copybutton",
|
41
|
+
"sphinx.ext.duration",
|
42
|
+
"sphinx.ext.autosummary",
|
43
|
+
"sphinx.ext.intersphinx",
|
44
|
+
"sphinx.ext.viewcode",
|
45
|
+
"sphinx_autorun",
|
46
|
+
]
|
47
|
+
|
48
|
+
intersphinx_mapping = {
|
49
|
+
"python": ("https://docs.python.org/3/", None),
|
50
|
+
"sphinx": ("https://www.sphinx-doc.org/en/master/", None),
|
51
|
+
}
|
52
|
+
intersphinx_disabled_domains = ["std"]
|
53
|
+
|
54
|
+
|
55
|
+
# Add any paths that contain templates here, relative to this directory.
|
56
|
+
templates_path = ["_templates"]
|
57
|
+
|
58
|
+
# Unless we want to expose real buckets and namespaces
|
59
|
+
nbsphinx_allow_errors = True
|
60
|
+
|
61
|
+
# List of patterns, relative to source directory, that match files and
|
62
|
+
# directories to ignore when looking for source files.
|
63
|
+
# This pattern also affects html_static_path and html_extra_path.
|
64
|
+
exclude_patterns = ["build", "**.ipynb_checkpoints", "Thumbs.db", ".DS_Store"]
|
65
|
+
|
66
|
+
|
67
|
+
# -- Options for HTML output -------------------------------------------------
|
68
|
+
|
69
|
+
# The theme to use for HTML and HTML Help pages. See the documentation for
|
70
|
+
# a list of builtin themes.
|
71
|
+
#
|
72
|
+
html_theme = "furo"
|
73
|
+
language = "en"
|
74
|
+
|
75
|
+
html_theme_options = {
|
76
|
+
"light_logo": "logo-light-mode.png",
|
77
|
+
"dark_logo": "logo-dark-mode.png",
|
78
|
+
"collapse_navigation": True,
|
79
|
+
"sticky_navigation": True,
|
80
|
+
"navigation_depth": 4,
|
81
|
+
}
|
82
|
+
|
83
|
+
# Add any paths that contain custom static files (such as style sheets) here,
|
84
|
+
# relative to this directory. They are copied after the builtin static files,
|
85
|
+
# so a file named "default.css" will overwrite the builtin "default.css".
|
86
|
+
html_static_path = ["figures"]
|
@@ -0,0 +1,33 @@
|
|
1
|
+
Data Versioning
|
2
|
+
****************
|
3
|
+
|
4
|
+
Data versioning is a practice aimed at recording the various data commits integrated into a particular feature group and dataset. This involves tracking changes in data over time while maintaining consistent schema structures and feature definitions within a shared schema version. Note that, in Feature Store, data versioning features are available only for offline feature groups.
|
5
|
+
|
6
|
+
.. image:: figures/data_versioning.png
|
7
|
+
|
8
|
+
|
9
|
+
As Of
|
10
|
+
======
|
11
|
+
|
12
|
+
You can call the ``as_of()`` method of the ``FeatureGroup`` or ``Dataset`` instance to get the specified point in time and time traveled data.
|
13
|
+
|
14
|
+
The ``.as_of()`` method takes the following optional parameters:
|
15
|
+
|
16
|
+
- ``commit_timestamp: date-time``. The commit timestamp for the feature group.
|
17
|
+
- ``version_number: int``. The version number for the feature group.
|
18
|
+
|
19
|
+
.. code-block:: python3
|
20
|
+
|
21
|
+
# as_of feature group
|
22
|
+
df = feature_group.as_of(version_number=1)
|
23
|
+
|
24
|
+
|
25
|
+
History
|
26
|
+
=======
|
27
|
+
|
28
|
+
You can call the ``history()`` method of the ``FeatureGroup`` or ``Dataset`` instance to show the history of the feature group.
|
29
|
+
|
30
|
+
.. code-block:: python3
|
31
|
+
|
32
|
+
# Show history of feature group
|
33
|
+
df = feature_group.history()
|
@@ -0,0 +1,388 @@
|
|
1
|
+
Dataset
|
2
|
+
********
|
3
|
+
|
4
|
+
A dataset is a collection of features that are used together to train a model or perform model inference.
|
5
|
+
|
6
|
+
.. image:: figures/dataset.png
|
7
|
+
|
8
|
+
Define
|
9
|
+
======
|
10
|
+
|
11
|
+
In an ADS Feature Store module, you can use the Python API or a yaml file to define a dataset.
|
12
|
+
|
13
|
+
|
14
|
+
The following example defines a dataset and gives it a name. A ``Dataset`` instance is created.
|
15
|
+
|
16
|
+
.. tabs::
|
17
|
+
|
18
|
+
.. code-tab:: Python3
|
19
|
+
:caption: Python
|
20
|
+
|
21
|
+
from ads.feature_store.dataset import Dataset
|
22
|
+
|
23
|
+
dataset = (
|
24
|
+
Dataset
|
25
|
+
.with_name("<dataset_name>")
|
26
|
+
.with_entity_id(<entity_id>)
|
27
|
+
.with_feature_store_id("<feature_store_id>")
|
28
|
+
.with_description("<dataset_description>")
|
29
|
+
.with_compartment_id("<compartment_id>")
|
30
|
+
.with_query('SELECT col FROM <entity_id>.<feature_group_name>')
|
31
|
+
)
|
32
|
+
|
33
|
+
.. code-tab:: Python3
|
34
|
+
:caption: YAML
|
35
|
+
|
36
|
+
from ads.feature_store.dataset import Dataset
|
37
|
+
|
38
|
+
yaml_string = """
|
39
|
+
kind: dataset
|
40
|
+
spec:
|
41
|
+
compartmentId: ocid1.compartment..<unique_id>
|
42
|
+
description: <dataset_description>
|
43
|
+
name: <dataset_name>
|
44
|
+
featureStoreId: <feature_store_id>
|
45
|
+
type: dataset
|
46
|
+
"""
|
47
|
+
|
48
|
+
dataset = Dataset.from_yaml(yaml_string)
|
49
|
+
|
50
|
+
|
51
|
+
Create
|
52
|
+
======
|
53
|
+
|
54
|
+
Use the the ``create()`` method of the ``Dataset`` instance to create a dataset.
|
55
|
+
|
56
|
+
.. important::
|
57
|
+
|
58
|
+
This method does not persist any metadata or feature data in the Feature Store. To persist the dataset and save feature data including the metadata in the Feature Store, use the ``materialise()`` method with a dataframe. For simple queries with only one level of nesting, users do not need to define ``with_feature_groups``. However, in complex queries involving more than one level of nesting, users are required to define ``with_feature_groups``.
|
59
|
+
|
60
|
+
|
61
|
+
.. tabs::
|
62
|
+
|
63
|
+
.. code-tab:: Python3
|
64
|
+
:caption: Simple SQL
|
65
|
+
|
66
|
+
from ads.feature_store.dataset import Dataset
|
67
|
+
|
68
|
+
dataset = (
|
69
|
+
Dataset
|
70
|
+
.with_name("<dataset_name>")
|
71
|
+
.with_entity_id(<entity_id>)
|
72
|
+
.with_feature_store_id("<feature_store_id>")
|
73
|
+
.with_description("<dataset_description>")
|
74
|
+
.with_compartment_id("<compartment_id>")
|
75
|
+
.with_query('SELECT col FROM <entity_id>.<feature_group_name>')
|
76
|
+
)
|
77
|
+
|
78
|
+
dataset.create()
|
79
|
+
|
80
|
+
|
81
|
+
.. code-tab:: Python3
|
82
|
+
:caption: Complex SQL
|
83
|
+
|
84
|
+
from ads.feature_store.dataset import Dataset
|
85
|
+
from ads.feature_store.feature_group import FeatureGroup
|
86
|
+
|
87
|
+
feature_group = FeatureGroup.from_id("<unique_id>")
|
88
|
+
|
89
|
+
dataset = (
|
90
|
+
Dataset
|
91
|
+
.with_name("<dataset_name>")
|
92
|
+
.with_entity_id(<entity_id>)
|
93
|
+
.with_feature_store_id("<feature_store_id>")
|
94
|
+
.with_description("<dataset_description>")
|
95
|
+
.with_compartment_id("<compartment_id>")
|
96
|
+
.with_query('SELECT col FROM (SELECT col FROM <entity_id>.<feature_group_name> WHERE condition = 'some_condition') AS nested_table;')
|
97
|
+
.with_feature_groups([feature_group])
|
98
|
+
)
|
99
|
+
|
100
|
+
# Create an dataset
|
101
|
+
dataset.create()
|
102
|
+
|
103
|
+
|
104
|
+
Load
|
105
|
+
====
|
106
|
+
|
107
|
+
Use the ``from_id()`` method from the ``Dataset`` class to load an existing dataset by specifying the OCID. A ``Dataset`` instance is returned.
|
108
|
+
|
109
|
+
.. code-block:: python3
|
110
|
+
|
111
|
+
from ads.feature_store.dataset import Dataset
|
112
|
+
|
113
|
+
dataset = Dataset.from_id("<unique_id>")
|
114
|
+
|
115
|
+
Materialise
|
116
|
+
===========
|
117
|
+
|
118
|
+
Use the the ``materialise() -> DatasetJob`` method of the ``Dataset`` instance to load the data to dataset. To persist the dataset and save dataset data, including the metadata in the Feature Store, use ``materialise()``.
|
119
|
+
|
120
|
+
The ``.materialise()`` method has the following parameters:
|
121
|
+
|
122
|
+
- ``input_dataframe: Union[DataFrame, pd.DataFrame]``. The Spark dataframe or Pandas dataframe.
|
123
|
+
- ``from_timestamp: str(Optional)``. The from timestamp of the dataset.
|
124
|
+
- ``to_timestamp: str(Optional)``. The to timestamp of the dataset.
|
125
|
+
- ``feature_option_details: FeatureOptionDetails(Optional)``. The feature option details for the materialise operation.
|
126
|
+
- ``write_config_details: (merge_schema: bool, overwrite_schema: bool)``. The write configuration details for the feature option details.
|
127
|
+
- ``read_config_details: (version_as_of: int, timestamp_as_of: datetime)``. The read configuration details for the feature option details.
|
128
|
+
|
129
|
+
.. code-block:: python3
|
130
|
+
|
131
|
+
from ads.feature_store.dataset_job import DatasetJob
|
132
|
+
|
133
|
+
dataset_job: DatasetJob = dataset.materialise(dataframe)
|
134
|
+
|
135
|
+
.. seealso::
|
136
|
+
:ref:`Dataset Job`
|
137
|
+
|
138
|
+
|
139
|
+
Delete
|
140
|
+
======
|
141
|
+
|
142
|
+
Use the ``.delete()`` method on the ``Dataset`` instance to delete a dataset. A dataset can only be deleted when its associated entities are all deleted.
|
143
|
+
|
144
|
+
.. code-block:: python3
|
145
|
+
|
146
|
+
dataset.delete()
|
147
|
+
|
148
|
+
Get Last Dataset Job
|
149
|
+
====================
|
150
|
+
A dataset job is the processing instance of a dataset. Each dataset job includes validation and statistics results.
|
151
|
+
|
152
|
+
With a ``Dataset`` instance, you can get the last dataset job details using ``get_last_job()``.
|
153
|
+
|
154
|
+
.. code-block:: python3
|
155
|
+
|
156
|
+
dataset_job = dataset.get_last_job()
|
157
|
+
|
158
|
+
Save Expectation Entity
|
159
|
+
=======================
|
160
|
+
Feature Store allows you to define expectations on data being materialised into a dataset instance. With a ``Dataset`` instance, save the expectation details using ``with_expectation_suite()`` with the following parameters:
|
161
|
+
|
162
|
+
- ``expectation_suite: ExpectationSuite``. ``ExpectationSuite`` of the great expectation library.
|
163
|
+
- ``expectation_type: ExpectationType``. The type of expectation.
|
164
|
+
- ``ExpectationType.STRICT``: Fail the job if the expectation isn't met.
|
165
|
+
- ``ExpectationType.LENIENT``: Pass the job even if the expectation isn't met.
|
166
|
+
|
167
|
+
.. note::
|
168
|
+
|
169
|
+
`Great Expectations <https://docs.greatexpectations.io/docs/0.15.50/>`_ is an open source Python-based library that validates, documents, and profiles data. It automates testing, which is essential for managing complex code bases.
|
170
|
+
|
171
|
+
.. image:: figures/validation.png
|
172
|
+
|
173
|
+
.. code-block:: python3
|
174
|
+
|
175
|
+
from great_expectations.core import ExpectationSuite, ExpectationConfiguration
|
176
|
+
from ads.feature_store.common.enums import ExpectationType
|
177
|
+
from ads.feature_store.dataset import Dataset
|
178
|
+
|
179
|
+
dataset = Dataset.from_id("<unique_id>")
|
180
|
+
|
181
|
+
expectation_suite = ExpectationSuite(
|
182
|
+
expectation_suite_name="expectation_suite_name"
|
183
|
+
)
|
184
|
+
expectation_suite.add_expectation(
|
185
|
+
ExpectationConfiguration(
|
186
|
+
expectation_type="expect_column_values_to_not_be_null",
|
187
|
+
kwargs={"column": "<column>"},
|
188
|
+
)
|
189
|
+
|
190
|
+
dataset_resource = (
|
191
|
+
Dataset()
|
192
|
+
.with_description("dataset description")
|
193
|
+
.with_compartment_id(<compartment_id>)
|
194
|
+
.with_name(<name>)
|
195
|
+
.with_entity_id(entity_id)
|
196
|
+
.with_feature_store_id(feature_store_id)
|
197
|
+
.with_query(f"SELECT * FROM `{entity_id}`.{feature_group_name}")
|
198
|
+
.with_expectation_suite(
|
199
|
+
expectation_suite=expectation_suite,
|
200
|
+
expectation_type=ExpectationType.STRICT,
|
201
|
+
)
|
202
|
+
)
|
203
|
+
|
204
|
+
Use the the ``get_validation_output()`` method of the dataset instance to fetch validation results for a specific ingestion job.
|
205
|
+
The ``get_validation_output()`` method has the following optional parameters:
|
206
|
+
|
207
|
+
- ``job_id: string``. The ID of the dataset job.
|
208
|
+
|
209
|
+
``get_validation_output().to_pandas()`` Outputs the validation results for each expectation as a Pandas dataframe.
|
210
|
+
|
211
|
+
.. image:: figures/dataset_validation_results.png
|
212
|
+
|
213
|
+
``get_validation_output().to_summary()`` Outputs the overall summary of the validation as a Pandas dataframe.
|
214
|
+
|
215
|
+
.. image:: figures/dataset_validation_summary.png
|
216
|
+
|
217
|
+
.. seealso::
|
218
|
+
|
219
|
+
:ref:`Feature Validation`
|
220
|
+
|
221
|
+
Statistics Computation
|
222
|
+
========================
|
223
|
+
During the materialisation, Feature Store performs computation of statistical metrics for all the features by default. The computation is configured using a ``StatisticsConfig`` object, which is passed at the creation of the dataset, or it can be updated later.
|
224
|
+
|
225
|
+
.. code-block:: python3
|
226
|
+
|
227
|
+
# Define statistics configuration for selected features
|
228
|
+
from ads.feature_store.statistics_config import StatisticsConfig
|
229
|
+
stats_config = StatisticsConfig().with_is_enabled(True).with_columns(["column1", "column2"])
|
230
|
+
|
231
|
+
This is used with ``Dataset`` instance.
|
232
|
+
|
233
|
+
.. code-block:: python3
|
234
|
+
|
235
|
+
from ads.feature_store.dataset import Dataset
|
236
|
+
|
237
|
+
dataset = (
|
238
|
+
Dataset
|
239
|
+
.with_name("<dataset_name>")
|
240
|
+
.with_entity_id(<entity_id>)
|
241
|
+
.with_feature_store_id("<feature_store_id>")
|
242
|
+
.with_description("<dataset_description>")
|
243
|
+
.with_compartment_id("<compartment_id>")
|
244
|
+
.with_dataset_ingestion_mode(DatasetIngestionMode.SQL)
|
245
|
+
.with_query('SELECT col FROM <entity_id>.<feature_group_name>')
|
246
|
+
.with_statistics_config(stats_config)
|
247
|
+
)
|
248
|
+
|
249
|
+
Use the the ``get_statistics()`` method of the ``Dataset`` instance to fetch the metrics for a specific ingestion job.
|
250
|
+
|
251
|
+
The ``get_statistics()`` method has the following optional parameter:
|
252
|
+
|
253
|
+
- ``job_id: string``. The ID of the dataset job.
|
254
|
+
|
255
|
+
.. code-block:: python3
|
256
|
+
|
257
|
+
# Fetch stats results for a dataset job
|
258
|
+
df = dataset.get_statistics(job_id).to_pandas()
|
259
|
+
|
260
|
+
.. image:: figures/dataset_statistics.png
|
261
|
+
|
262
|
+
.. code-block:: python3
|
263
|
+
|
264
|
+
# Fetch and visualize stats for a dataset job
|
265
|
+
df = dataset.get_statistics(job_id).to_viz()
|
266
|
+
|
267
|
+
.. image:: figures/dataset_statistics_viz.png
|
268
|
+
|
269
|
+
|
270
|
+
.. seealso::
|
271
|
+
|
272
|
+
:ref:`Statistics`
|
273
|
+
|
274
|
+
|
275
|
+
Get features
|
276
|
+
============
|
277
|
+
Use the the ``get_features_df()`` method of the ``Dataset`` instance to fetch features in a dataset.
|
278
|
+
|
279
|
+
.. code-block:: python3
|
280
|
+
|
281
|
+
# Fetch features for a dataset
|
282
|
+
df = dataset.get_features_df()
|
283
|
+
df.show()
|
284
|
+
|
285
|
+
|
286
|
+
Preview
|
287
|
+
========
|
288
|
+
|
289
|
+
.. deprecated:: 1.0.3
|
290
|
+
Use :func:`as_of` instead.
|
291
|
+
|
292
|
+
Use the the ``preview()`` method of the ``Dataset`` instance to preview the dataset.
|
293
|
+
|
294
|
+
The ``.preview()`` method has the following optional parameters:
|
295
|
+
- ``timestamp: date-time``. The commit timestamp for the dataset.
|
296
|
+
- ``version_number: int``. The version number for the dataset.
|
297
|
+
- ``row_count: int``. Defaults to 10. The total number of rows to return.
|
298
|
+
|
299
|
+
.. code-block:: python3
|
300
|
+
|
301
|
+
# Preview dataset
|
302
|
+
df = dataset.preview(row_count=50)
|
303
|
+
df.show()
|
304
|
+
|
305
|
+
As Of
|
306
|
+
=======
|
307
|
+
|
308
|
+
Use the the ``as_of()`` method of the ``Dataset`` instance to get a specified point in time and time traveled data.
|
309
|
+
|
310
|
+
The ``.as_of()`` method has the following optional parameters:
|
311
|
+
|
312
|
+
- ``commit_timestamp: date-time``. The commit timestamp for the dataset.
|
313
|
+
- ``version_number: int``. The version number for the dataset.
|
314
|
+
|
315
|
+
.. code-block:: python3
|
316
|
+
|
317
|
+
# as_of feature group
|
318
|
+
df = dataset.as_of(version_number=1)
|
319
|
+
|
320
|
+
|
321
|
+
Restore
|
322
|
+
=======
|
323
|
+
Use the the ``restore()`` method of the ``Dataset`` instance to restore the dataset to a particular version and timestamp.
|
324
|
+
|
325
|
+
The ``.restore()`` method has the following optional parameters:
|
326
|
+
- ``timestamp: date-time``. The commit timestamp for the dataset.
|
327
|
+
- ``version_number: int``. The version number for the dataset.
|
328
|
+
|
329
|
+
.. code-block:: python3
|
330
|
+
|
331
|
+
# Restore the dataset to a particular version and timestamp
|
332
|
+
df = feature_group.restore(version_number=2)
|
333
|
+
df.show()
|
334
|
+
|
335
|
+
|
336
|
+
Profile
|
337
|
+
=======
|
338
|
+
Use the the ``profile()`` method of the ``Dataset`` instance to profile the dataset.
|
339
|
+
|
340
|
+
.. code-block:: python3
|
341
|
+
|
342
|
+
# Profile dataset
|
343
|
+
df = dataset.profile()
|
344
|
+
df.show()
|
345
|
+
|
346
|
+
|
347
|
+
History
|
348
|
+
=======
|
349
|
+
Use the the ``history()`` method of the ``Dataset`` instance to show the history of the dataset.
|
350
|
+
|
351
|
+
.. code-block:: python3
|
352
|
+
|
353
|
+
# Show history of dataset
|
354
|
+
df = dataset.history()
|
355
|
+
df.show()
|
356
|
+
|
357
|
+
|
358
|
+
Visualize Lineage
|
359
|
+
=================
|
360
|
+
|
361
|
+
Use the ``show()`` method on the ``Dataset`` instance to visualize the lineage of the dataset.
|
362
|
+
|
363
|
+
The ``show()`` method has the following optional parameter:
|
364
|
+
|
365
|
+
- ``rankdir: (str, optional)``. Defaults to ``LR``. The allowed values are ``TB`` or ``LR``. This parameter is applicable only for ``graph`` mode. It renders the direction of the graph as either top to bottom (TB) or left to right (LR).
|
366
|
+
|
367
|
+
|
368
|
+
.. code-block:: python3
|
369
|
+
|
370
|
+
dataset.show()
|
371
|
+
|
372
|
+
The following is an example of the output:
|
373
|
+
|
374
|
+
.. figure:: figures/dataset_lineage.png
|
375
|
+
:width: 400
|
376
|
+
|
377
|
+
|
378
|
+
Add Model Details
|
379
|
+
=================
|
380
|
+
|
381
|
+
Use the the ``add_models()`` method of the ``Dataset`` instance to add model IDs to the dataset.
|
382
|
+
The ``.add_models()`` method has the following parameter:
|
383
|
+
|
384
|
+
- ``model_details: ModelDetails``. Provide ``items: List[str]`` as parameter and model IDs are passed as items.
|
385
|
+
|
386
|
+
.. code-block:: python3
|
387
|
+
|
388
|
+
dataset.add_models(ModelDetails().with_items([<ocid1.datasciencemodel..<unique_id>]))
|
@@ -0,0 +1,27 @@
|
|
1
|
+
.. _Dataset Job:
|
2
|
+
|
3
|
+
Dataset Job
|
4
|
+
***********
|
5
|
+
|
6
|
+
A dataset job is the processing instance of a dataset. Each dataset job includes validation and statistics results.
|
7
|
+
|
8
|
+
|
9
|
+
Load
|
10
|
+
====
|
11
|
+
|
12
|
+
Use the ``from_id()`` method from the ``DatasetJob`` class to load an existing dataset job by specifying its OCID. A``DatasetJob`` instance is returned.
|
13
|
+
|
14
|
+
.. code-block:: python3
|
15
|
+
|
16
|
+
from ads.feature_store.dataset_job import DatasetJob
|
17
|
+
|
18
|
+
dataset_job = DatasetJob.from_id("<unique_id>")
|
19
|
+
|
20
|
+
Delete
|
21
|
+
======
|
22
|
+
|
23
|
+
Use the ``.delete()`` method on the ``DatasetJob`` instance to delete a dataset job. A dataset job can only be deleted when its associated entities are all deleted.
|
24
|
+
|
25
|
+
.. code-block:: python3
|
26
|
+
|
27
|
+
dataset_job.delete()
|