PyPI - oracle-ads - Versions diffs - 2.13.9rc0__py3-none-any.whl → 2.13.9rc1__py3-none-any.whl - Mend

oracle-ads 2.13.9rc0py3-none-any.whl → 2.13.9rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (857) hide show

ads/aqua/__init__.py +40 -0
ads/aqua/app.py +506 -0
ads/aqua/cli.py +96 -0
ads/aqua/client/__init__.py +3 -0
ads/aqua/client/client.py +836 -0
ads/aqua/client/openai_client.py +305 -0
ads/aqua/common/__init__.py +5 -0
ads/aqua/common/decorator.py +125 -0
ads/aqua/common/entities.py +269 -0
ads/aqua/common/enums.py +122 -0
ads/aqua/common/errors.py +109 -0
ads/aqua/common/utils.py +1285 -0
ads/aqua/config/__init__.py +4 -0
ads/aqua/config/container_config.py +248 -0
ads/aqua/config/evaluation/__init__.py +4 -0
ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
ads/aqua/config/utils/__init__.py +4 -0
ads/aqua/config/utils/serializer.py +339 -0
ads/aqua/constants.py +116 -0
ads/aqua/data.py +14 -0
ads/aqua/dummy_data/icon.txt +1 -0
ads/aqua/dummy_data/oci_model_deployments.json +56 -0
ads/aqua/dummy_data/oci_models.json +1 -0
ads/aqua/dummy_data/readme.md +26 -0
ads/aqua/evaluation/__init__.py +8 -0
ads/aqua/evaluation/constants.py +53 -0
ads/aqua/evaluation/entities.py +186 -0
ads/aqua/evaluation/errors.py +70 -0
ads/aqua/evaluation/evaluation.py +1814 -0
ads/aqua/extension/__init__.py +42 -0
ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
ads/aqua/extension/base_handler.py +90 -0
ads/aqua/extension/common_handler.py +121 -0
ads/aqua/extension/common_ws_msg_handler.py +36 -0
ads/aqua/extension/deployment_handler.py +298 -0
ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
ads/aqua/extension/errors.py +30 -0
ads/aqua/extension/evaluation_handler.py +129 -0
ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
ads/aqua/extension/finetune_handler.py +96 -0
ads/aqua/extension/model_handler.py +390 -0
ads/aqua/extension/models/__init__.py +0 -0
ads/aqua/extension/models/ws_models.py +145 -0
ads/aqua/extension/models_ws_msg_handler.py +50 -0
ads/aqua/extension/ui_handler.py +282 -0
ads/aqua/extension/ui_websocket_handler.py +130 -0
ads/aqua/extension/utils.py +133 -0
ads/aqua/finetuning/__init__.py +7 -0
ads/aqua/finetuning/constants.py +23 -0
ads/aqua/finetuning/entities.py +181 -0
ads/aqua/finetuning/finetuning.py +749 -0
ads/aqua/model/__init__.py +8 -0
ads/aqua/model/constants.py +60 -0
ads/aqua/model/entities.py +385 -0
ads/aqua/model/enums.py +32 -0
ads/aqua/model/model.py +2114 -0
ads/aqua/modeldeployment/__init__.py +8 -0
ads/aqua/modeldeployment/constants.py +10 -0
ads/aqua/modeldeployment/deployment.py +1326 -0
ads/aqua/modeldeployment/entities.py +653 -0
ads/aqua/modeldeployment/inference.py +74 -0
ads/aqua/modeldeployment/utils.py +543 -0
ads/aqua/resources/gpu_shapes_index.json +94 -0
ads/aqua/server/__init__.py +4 -0
ads/aqua/server/__main__.py +24 -0
ads/aqua/server/app.py +47 -0
ads/aqua/server/aqua_spec.yml +1291 -0
ads/aqua/training/__init__.py +4 -0
ads/aqua/training/exceptions.py +476 -0
ads/aqua/ui.py +499 -0
ads/automl/__init__.py +9 -0
ads/automl/driver.py +330 -0
ads/automl/provider.py +975 -0
ads/bds/__init__.py +5 -0
ads/bds/auth.py +127 -0
ads/bds/big_data_service.py +255 -0
ads/catalog/__init__.py +19 -0
ads/catalog/model.py +1576 -0
ads/catalog/notebook.py +461 -0
ads/catalog/project.py +468 -0
ads/catalog/summary.py +178 -0
ads/common/__init__.py +11 -0
ads/common/analyzer.py +65 -0
ads/common/artifact/.model-ignore +63 -0
ads/common/artifact/__init__.py +10 -0
ads/common/auth.py +1122 -0
ads/common/card_identifier.py +83 -0
ads/common/config.py +647 -0
ads/common/data.py +165 -0
ads/common/decorator/__init__.py +9 -0
ads/common/decorator/argument_to_case.py +88 -0
ads/common/decorator/deprecate.py +69 -0
ads/common/decorator/require_nonempty_arg.py +65 -0
ads/common/decorator/runtime_dependency.py +178 -0
ads/common/decorator/threaded.py +97 -0
ads/common/decorator/utils.py +35 -0
ads/common/dsc_file_system.py +303 -0
ads/common/error.py +14 -0
ads/common/extended_enum.py +81 -0
ads/common/function/__init__.py +5 -0
ads/common/function/fn_util.py +142 -0
ads/common/function/func_conf.yaml +25 -0
ads/common/ipython.py +76 -0
ads/common/model.py +679 -0
ads/common/model_artifact.py +1759 -0
ads/common/model_artifact_schema.json +107 -0
ads/common/model_export_util.py +664 -0
ads/common/model_metadata.py +24 -0
ads/common/object_storage_details.py +296 -0
ads/common/oci_client.py +175 -0
ads/common/oci_datascience.py +46 -0
ads/common/oci_logging.py +1144 -0
ads/common/oci_mixin.py +957 -0
ads/common/oci_resource.py +136 -0
ads/common/serializer.py +559 -0
ads/common/utils.py +1852 -0
ads/common/word_lists.py +1491 -0
ads/common/work_request.py +189 -0
ads/data_labeling/__init__.py +13 -0
ads/data_labeling/boundingbox.py +253 -0
ads/data_labeling/constants.py +47 -0
ads/data_labeling/data_labeling_service.py +244 -0
ads/data_labeling/interface/__init__.py +5 -0
ads/data_labeling/interface/loader.py +16 -0
ads/data_labeling/interface/parser.py +16 -0
ads/data_labeling/interface/reader.py +23 -0
ads/data_labeling/loader/__init__.py +5 -0
ads/data_labeling/loader/file_loader.py +241 -0
ads/data_labeling/metadata.py +110 -0
ads/data_labeling/mixin/__init__.py +5 -0
ads/data_labeling/mixin/data_labeling.py +232 -0
ads/data_labeling/ner.py +129 -0
ads/data_labeling/parser/__init__.py +5 -0
ads/data_labeling/parser/dls_record_parser.py +388 -0
ads/data_labeling/parser/export_metadata_parser.py +94 -0
ads/data_labeling/parser/export_record_parser.py +473 -0
ads/data_labeling/reader/__init__.py +5 -0
ads/data_labeling/reader/dataset_reader.py +574 -0
ads/data_labeling/reader/dls_record_reader.py +121 -0
ads/data_labeling/reader/export_record_reader.py +62 -0
ads/data_labeling/reader/jsonl_reader.py +75 -0
ads/data_labeling/reader/metadata_reader.py +203 -0
ads/data_labeling/reader/record_reader.py +263 -0
ads/data_labeling/record.py +52 -0
ads/data_labeling/visualizer/__init__.py +5 -0
ads/data_labeling/visualizer/image_visualizer.py +525 -0
ads/data_labeling/visualizer/text_visualizer.py +357 -0
ads/database/__init__.py +5 -0
ads/database/connection.py +338 -0
ads/dataset/__init__.py +10 -0
ads/dataset/capabilities.md +51 -0
ads/dataset/classification_dataset.py +339 -0
ads/dataset/correlation.py +226 -0
ads/dataset/correlation_plot.py +563 -0
ads/dataset/dask_series.py +173 -0
ads/dataset/dataframe_transformer.py +110 -0
ads/dataset/dataset.py +1979 -0
ads/dataset/dataset_browser.py +360 -0
ads/dataset/dataset_with_target.py +995 -0
ads/dataset/exception.py +25 -0
ads/dataset/factory.py +987 -0
ads/dataset/feature_engineering_transformer.py +35 -0
ads/dataset/feature_selection.py +107 -0
ads/dataset/forecasting_dataset.py +26 -0
ads/dataset/helper.py +1450 -0
ads/dataset/label_encoder.py +99 -0
ads/dataset/mixin/__init__.py +5 -0
ads/dataset/mixin/dataset_accessor.py +134 -0
ads/dataset/pipeline.py +58 -0
ads/dataset/plot.py +710 -0
ads/dataset/progress.py +86 -0
ads/dataset/recommendation.py +297 -0
ads/dataset/recommendation_transformer.py +502 -0
ads/dataset/regression_dataset.py +14 -0
ads/dataset/sampled_dataset.py +1050 -0
ads/dataset/target.py +98 -0
ads/dataset/timeseries.py +18 -0
ads/dbmixin/__init__.py +5 -0
ads/dbmixin/db_pandas_accessor.py +153 -0
ads/environment/__init__.py +9 -0
ads/environment/ml_runtime.py +66 -0
ads/evaluations/README.md +14 -0
ads/evaluations/__init__.py +109 -0
ads/evaluations/evaluation_plot.py +983 -0
ads/evaluations/evaluator.py +1334 -0
ads/evaluations/statistical_metrics.py +543 -0
ads/experiments/__init__.py +9 -0
ads/experiments/capabilities.md +0 -0
ads/explanations/__init__.py +21 -0
ads/explanations/base_explainer.py +142 -0
ads/explanations/capabilities.md +83 -0
ads/explanations/explainer.py +190 -0
ads/explanations/mlx_global_explainer.py +1050 -0
ads/explanations/mlx_interface.py +386 -0
ads/explanations/mlx_local_explainer.py +287 -0
ads/explanations/mlx_whatif_explainer.py +201 -0
ads/feature_engineering/__init__.py +20 -0
ads/feature_engineering/accessor/__init__.py +5 -0
ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
ads/feature_engineering/accessor/mixin/__init__.py +5 -0
ads/feature_engineering/accessor/mixin/correlation.py +166 -0
ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
ads/feature_engineering/accessor/mixin/utils.py +65 -0
ads/feature_engineering/accessor/series_accessor.py +431 -0
ads/feature_engineering/adsimage/__init__.py +5 -0
ads/feature_engineering/adsimage/image.py +192 -0
ads/feature_engineering/adsimage/image_reader.py +170 -0
ads/feature_engineering/adsimage/interface/__init__.py +5 -0
ads/feature_engineering/adsimage/interface/reader.py +19 -0
ads/feature_engineering/adsstring/__init__.py +7 -0
ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
ads/feature_engineering/adsstring/string/__init__.py +8 -0
ads/feature_engineering/data_schema.json +57 -0
ads/feature_engineering/dataset/__init__.py +5 -0
ads/feature_engineering/dataset/zip_code_data.py +42062 -0
ads/feature_engineering/exceptions.py +40 -0
ads/feature_engineering/feature_type/__init__.py +133 -0
ads/feature_engineering/feature_type/address.py +184 -0
ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
ads/feature_engineering/feature_type/adsstring/string.py +258 -0
ads/feature_engineering/feature_type/base.py +58 -0
ads/feature_engineering/feature_type/boolean.py +183 -0
ads/feature_engineering/feature_type/category.py +146 -0
ads/feature_engineering/feature_type/constant.py +137 -0
ads/feature_engineering/feature_type/continuous.py +151 -0
ads/feature_engineering/feature_type/creditcard.py +314 -0
ads/feature_engineering/feature_type/datetime.py +190 -0
ads/feature_engineering/feature_type/discrete.py +134 -0
ads/feature_engineering/feature_type/document.py +43 -0
ads/feature_engineering/feature_type/gis.py +251 -0
ads/feature_engineering/feature_type/handler/__init__.py +5 -0
ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
ads/feature_engineering/feature_type/handler/warnings.py +128 -0
ads/feature_engineering/feature_type/integer.py +142 -0
ads/feature_engineering/feature_type/ip_address.py +144 -0
ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
ads/feature_engineering/feature_type/lat_long.py +256 -0
ads/feature_engineering/feature_type/object.py +43 -0
ads/feature_engineering/feature_type/ordinal.py +132 -0
ads/feature_engineering/feature_type/phone_number.py +135 -0
ads/feature_engineering/feature_type/string.py +171 -0
ads/feature_engineering/feature_type/text.py +93 -0
ads/feature_engineering/feature_type/unknown.py +43 -0
ads/feature_engineering/feature_type/zip_code.py +164 -0
ads/feature_engineering/feature_type_manager.py +406 -0
ads/feature_engineering/schema.py +795 -0
ads/feature_engineering/utils.py +245 -0
ads/feature_store/.readthedocs.yaml +19 -0
ads/feature_store/README.md +65 -0
ads/feature_store/__init__.py +9 -0
ads/feature_store/common/__init__.py +0 -0
ads/feature_store/common/enums.py +339 -0
ads/feature_store/common/exceptions.py +18 -0
ads/feature_store/common/spark_session_singleton.py +125 -0
ads/feature_store/common/utils/__init__.py +0 -0
ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
ads/feature_store/common/utils/transformation_utils.py +82 -0
ads/feature_store/common/utils/utility.py +403 -0
ads/feature_store/data_validation/__init__.py +0 -0
ads/feature_store/data_validation/great_expectation.py +129 -0
ads/feature_store/dataset.py +1230 -0
ads/feature_store/dataset_job.py +530 -0
ads/feature_store/docs/Dockerfile +7 -0
ads/feature_store/docs/Makefile +44 -0
ads/feature_store/docs/conf.py +28 -0
ads/feature_store/docs/requirements.txt +14 -0
ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
ads/feature_store/docs/source/cicd.rst +137 -0
ads/feature_store/docs/source/conf.py +86 -0
ads/feature_store/docs/source/data_versioning.rst +33 -0
ads/feature_store/docs/source/dataset.rst +388 -0
ads/feature_store/docs/source/dataset_job.rst +27 -0
ads/feature_store/docs/source/demo.rst +70 -0
ads/feature_store/docs/source/entity.rst +78 -0
ads/feature_store/docs/source/feature_group.rst +624 -0
ads/feature_store/docs/source/feature_group_job.rst +29 -0
ads/feature_store/docs/source/feature_store.rst +122 -0
ads/feature_store/docs/source/feature_store_class.rst +123 -0
ads/feature_store/docs/source/feature_validation.rst +66 -0
ads/feature_store/docs/source/figures/cicd.png +0 -0
ads/feature_store/docs/source/figures/data_validation.png +0 -0
ads/feature_store/docs/source/figures/data_versioning.png +0 -0
ads/feature_store/docs/source/figures/dataset.gif +0 -0
ads/feature_store/docs/source/figures/dataset.png +0 -0
ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
ads/feature_store/docs/source/figures/entity.png +0 -0
ads/feature_store/docs/source/figures/feature_group.png +0 -0
ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
ads/feature_store/docs/source/figures/overview.png +0 -0
ads/feature_store/docs/source/figures/resource_manager.png +0 -0
ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
ads/feature_store/docs/source/figures/stats_1.png +0 -0
ads/feature_store/docs/source/figures/stats_2.png +0 -0
ads/feature_store/docs/source/figures/stats_d.png +0 -0
ads/feature_store/docs/source/figures/stats_fg.png +0 -0
ads/feature_store/docs/source/figures/transformation.png +0 -0
ads/feature_store/docs/source/figures/transformations.gif +0 -0
ads/feature_store/docs/source/figures/validation.png +0 -0
ads/feature_store/docs/source/figures/validation_fg.png +0 -0
ads/feature_store/docs/source/figures/validation_results.png +0 -0
ads/feature_store/docs/source/figures/validation_summary.png +0 -0
ads/feature_store/docs/source/index.rst +81 -0
ads/feature_store/docs/source/module.rst +8 -0
ads/feature_store/docs/source/notebook.rst +94 -0
ads/feature_store/docs/source/overview.rst +47 -0
ads/feature_store/docs/source/quickstart.rst +176 -0
ads/feature_store/docs/source/release_notes.rst +194 -0
ads/feature_store/docs/source/setup_feature_store.rst +81 -0
ads/feature_store/docs/source/statistics.rst +58 -0
ads/feature_store/docs/source/transformation.rst +199 -0
ads/feature_store/docs/source/ui.rst +65 -0
ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
ads/feature_store/entity.py +718 -0
ads/feature_store/execution_strategy/__init__.py +0 -0
ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
ads/feature_store/execution_strategy/engine/__init__.py +0 -0
ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
ads/feature_store/execution_strategy/execution_strategy.py +113 -0
ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
ads/feature_store/execution_strategy/spark/__init__.py +0 -0
ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
ads/feature_store/feature.py +192 -0
ads/feature_store/feature_group.py +1494 -0
ads/feature_store/feature_group_expectation.py +346 -0
ads/feature_store/feature_group_job.py +602 -0
ads/feature_store/feature_lineage/__init__.py +0 -0
ads/feature_store/feature_lineage/graphviz_service.py +180 -0
ads/feature_store/feature_option_details.py +50 -0
ads/feature_store/feature_statistics/__init__.py +0 -0
ads/feature_store/feature_statistics/statistics_service.py +99 -0
ads/feature_store/feature_store.py +699 -0
ads/feature_store/feature_store_registrar.py +518 -0
ads/feature_store/input_feature_detail.py +149 -0
ads/feature_store/mixin/__init__.py +4 -0
ads/feature_store/mixin/oci_feature_store.py +145 -0
ads/feature_store/model_details.py +73 -0
ads/feature_store/query/__init__.py +0 -0
ads/feature_store/query/filter.py +266 -0
ads/feature_store/query/generator/__init__.py +0 -0
ads/feature_store/query/generator/query_generator.py +298 -0
ads/feature_store/query/join.py +161 -0
ads/feature_store/query/query.py +403 -0
ads/feature_store/query/validator/__init__.py +0 -0
ads/feature_store/query/validator/query_validator.py +57 -0
ads/feature_store/response/__init__.py +0 -0
ads/feature_store/response/response_builder.py +68 -0
ads/feature_store/service/__init__.py +0 -0
ads/feature_store/service/oci_dataset.py +139 -0
ads/feature_store/service/oci_dataset_job.py +199 -0
ads/feature_store/service/oci_entity.py +125 -0
ads/feature_store/service/oci_feature_group.py +164 -0
ads/feature_store/service/oci_feature_group_job.py +214 -0
ads/feature_store/service/oci_feature_store.py +182 -0
ads/feature_store/service/oci_lineage.py +87 -0
ads/feature_store/service/oci_transformation.py +104 -0
ads/feature_store/statistics/__init__.py +0 -0
ads/feature_store/statistics/abs_feature_value.py +49 -0
ads/feature_store/statistics/charts/__init__.py +0 -0
ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
ads/feature_store/statistics/charts/box_plot.py +148 -0
ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
ads/feature_store/statistics/charts/probability_distribution.py +68 -0
ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
ads/feature_store/statistics/feature_stat.py +126 -0
ads/feature_store/statistics/generic_feature_value.py +33 -0
ads/feature_store/statistics/statistics.py +41 -0
ads/feature_store/statistics_config.py +101 -0
ads/feature_store/templates/feature_store_template.yaml +45 -0
ads/feature_store/transformation.py +499 -0
ads/feature_store/validation_output.py +57 -0
ads/hpo/__init__.py +9 -0
ads/hpo/_imports.py +91 -0
ads/hpo/ads_search_space.py +439 -0
ads/hpo/distributions.py +325 -0
ads/hpo/objective.py +280 -0
ads/hpo/search_cv.py +1657 -0
ads/hpo/stopping_criterion.py +75 -0
ads/hpo/tuner_artifact.py +413 -0
ads/hpo/utils.py +91 -0
ads/hpo/validation.py +140 -0
ads/hpo/visualization/__init__.py +5 -0
ads/hpo/visualization/_contour.py +23 -0
ads/hpo/visualization/_edf.py +20 -0
ads/hpo/visualization/_intermediate_values.py +21 -0
ads/hpo/visualization/_optimization_history.py +25 -0
ads/hpo/visualization/_parallel_coordinate.py +169 -0
ads/hpo/visualization/_param_importances.py +26 -0
ads/jobs/__init__.py +53 -0
ads/jobs/ads_job.py +663 -0
ads/jobs/builders/__init__.py +5 -0
ads/jobs/builders/base.py +156 -0
ads/jobs/builders/infrastructure/__init__.py +6 -0
ads/jobs/builders/infrastructure/base.py +165 -0
ads/jobs/builders/infrastructure/dataflow.py +1252 -0
ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
ads/jobs/builders/infrastructure/utils.py +65 -0
ads/jobs/builders/runtimes/__init__.py +5 -0
ads/jobs/builders/runtimes/artifact.py +338 -0
ads/jobs/builders/runtimes/base.py +325 -0
ads/jobs/builders/runtimes/container_runtime.py +242 -0
ads/jobs/builders/runtimes/python_runtime.py +1016 -0
ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
ads/jobs/cli.py +104 -0
ads/jobs/env_var_parser.py +131 -0
ads/jobs/extension.py +160 -0
ads/jobs/schema/__init__.py +5 -0
ads/jobs/schema/infrastructure_schema.json +116 -0
ads/jobs/schema/job_schema.json +42 -0
ads/jobs/schema/runtime_schema.json +183 -0
ads/jobs/schema/validator.py +141 -0
ads/jobs/serializer.py +296 -0
ads/jobs/templates/__init__.py +5 -0
ads/jobs/templates/container.py +6 -0
ads/jobs/templates/driver_notebook.py +177 -0
ads/jobs/templates/driver_oci.py +500 -0
ads/jobs/templates/driver_python.py +48 -0
ads/jobs/templates/driver_pytorch.py +852 -0
ads/jobs/templates/driver_utils.py +615 -0
ads/jobs/templates/hostname_from_env.c +55 -0
ads/jobs/templates/oci_metrics.py +181 -0
ads/jobs/utils.py +104 -0
ads/llm/__init__.py +28 -0
ads/llm/autogen/__init__.py +2 -0
ads/llm/autogen/constants.py +15 -0
ads/llm/autogen/reports/__init__.py +2 -0
ads/llm/autogen/reports/base.py +67 -0
ads/llm/autogen/reports/data.py +103 -0
ads/llm/autogen/reports/session.py +526 -0
ads/llm/autogen/reports/templates/chat_box.html +13 -0
ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
ads/llm/autogen/reports/utils.py +56 -0
ads/llm/autogen/v02/__init__.py +4 -0
ads/llm/autogen/v02/client.py +295 -0
ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
ads/llm/autogen/v02/loggers/__init__.py +6 -0
ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
ads/llm/autogen/v02/loggers/session_logger.py +580 -0
ads/llm/autogen/v02/loggers/utils.py +86 -0
ads/llm/autogen/v02/runtime_logging.py +163 -0
ads/llm/chain.py +268 -0
ads/llm/chat_template.py +31 -0
ads/llm/deploy.py +63 -0
ads/llm/guardrails/__init__.py +5 -0
ads/llm/guardrails/base.py +442 -0
ads/llm/guardrails/huggingface.py +44 -0
ads/llm/langchain/__init__.py +5 -0
ads/llm/langchain/plugins/__init__.py +5 -0
ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
ads/llm/langchain/plugins/llms/__init__.py +5 -0
ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
ads/llm/requirements.txt +3 -0
ads/llm/serialize.py +219 -0
ads/llm/serializers/__init__.py +0 -0
ads/llm/serializers/retrieval_qa.py +153 -0
ads/llm/serializers/runnable_parallel.py +27 -0
ads/llm/templates/score_chain.jinja2 +155 -0
ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
ads/model/__init__.py +52 -0
ads/model/artifact.py +573 -0
ads/model/artifact_downloader.py +254 -0
ads/model/artifact_uploader.py +267 -0
ads/model/base_properties.py +238 -0
ads/model/common/.model-ignore +66 -0
ads/model/common/__init__.py +5 -0
ads/model/common/utils.py +142 -0
ads/model/datascience_model.py +2635 -0
ads/model/deployment/__init__.py +20 -0
ads/model/deployment/common/__init__.py +5 -0
ads/model/deployment/common/utils.py +308 -0
ads/model/deployment/model_deployer.py +466 -0
ads/model/deployment/model_deployment.py +1846 -0
ads/model/deployment/model_deployment_infrastructure.py +671 -0
ads/model/deployment/model_deployment_properties.py +493 -0
ads/model/deployment/model_deployment_runtime.py +838 -0
ads/model/extractor/__init__.py +5 -0
ads/model/extractor/automl_extractor.py +74 -0
ads/model/extractor/embedding_onnx_extractor.py +80 -0
ads/model/extractor/huggingface_extractor.py +88 -0
ads/model/extractor/keras_extractor.py +84 -0
ads/model/extractor/lightgbm_extractor.py +93 -0
ads/model/extractor/model_info_extractor.py +114 -0
ads/model/extractor/model_info_extractor_factory.py +105 -0
ads/model/extractor/pytorch_extractor.py +87 -0
ads/model/extractor/sklearn_extractor.py +112 -0
ads/model/extractor/spark_extractor.py +89 -0
ads/model/extractor/tensorflow_extractor.py +85 -0
ads/model/extractor/xgboost_extractor.py +94 -0
ads/model/framework/__init__.py +5 -0
ads/model/framework/automl_model.py +178 -0
ads/model/framework/embedding_onnx_model.py +438 -0
ads/model/framework/huggingface_model.py +399 -0
ads/model/framework/lightgbm_model.py +266 -0
ads/model/framework/pytorch_model.py +266 -0
ads/model/framework/sklearn_model.py +250 -0
ads/model/framework/spark_model.py +326 -0
ads/model/framework/tensorflow_model.py +254 -0
ads/model/framework/xgboost_model.py +258 -0
ads/model/generic_model.py +3518 -0
ads/model/model_artifact_boilerplate/README.md +381 -0
ads/model/model_artifact_boilerplate/__init__.py +5 -0
ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
ads/model/model_artifact_boilerplate/score.py +61 -0
ads/model/model_file_description_schema.json +68 -0
ads/model/model_introspect.py +331 -0
ads/model/model_metadata.py +1810 -0
ads/model/model_metadata_mixin.py +460 -0
ads/model/model_properties.py +63 -0
ads/model/model_version_set.py +739 -0
ads/model/runtime/__init__.py +5 -0
ads/model/runtime/env_info.py +306 -0
ads/model/runtime/model_deployment_details.py +37 -0
ads/model/runtime/model_provenance_details.py +58 -0
ads/model/runtime/runtime_info.py +81 -0
ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
ads/model/runtime/utils.py +201 -0
ads/model/serde/__init__.py +5 -0
ads/model/serde/common.py +40 -0
ads/model/serde/model_input.py +547 -0
ads/model/serde/model_serializer.py +1184 -0
ads/model/service/__init__.py +5 -0
ads/model/service/oci_datascience_model.py +1076 -0
ads/model/service/oci_datascience_model_deployment.py +500 -0
ads/model/service/oci_datascience_model_version_set.py +176 -0
ads/model/transformer/__init__.py +5 -0
ads/model/transformer/onnx_transformer.py +324 -0
ads/mysqldb/__init__.py +5 -0
ads/mysqldb/mysql_db.py +227 -0
ads/opctl/__init__.py +18 -0
ads/opctl/anomaly_detection.py +11 -0
ads/opctl/backend/__init__.py +5 -0
ads/opctl/backend/ads_dataflow.py +353 -0
ads/opctl/backend/ads_ml_job.py +710 -0
ads/opctl/backend/ads_ml_pipeline.py +164 -0
ads/opctl/backend/ads_model_deployment.py +209 -0
ads/opctl/backend/base.py +146 -0
ads/opctl/backend/local.py +1053 -0
ads/opctl/backend/marketplace/__init__.py +9 -0
ads/opctl/backend/marketplace/helm_helper.py +173 -0
ads/opctl/backend/marketplace/local_marketplace.py +271 -0
ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
ads/opctl/backend/marketplace/models/__init__.py +5 -0
ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
ads/opctl/cli.py +707 -0
ads/opctl/cmds.py +869 -0
ads/opctl/conda/__init__.py +5 -0
ads/opctl/conda/cli.py +193 -0
ads/opctl/conda/cmds.py +749 -0
ads/opctl/conda/config.yaml +34 -0
ads/opctl/conda/manifest_template.yaml +13 -0
ads/opctl/conda/multipart_uploader.py +188 -0
ads/opctl/conda/pack.py +89 -0
ads/opctl/config/__init__.py +5 -0
ads/opctl/config/base.py +57 -0
ads/opctl/config/diagnostics/__init__.py +5 -0
ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
ads/opctl/config/merger.py +255 -0
ads/opctl/config/resolver.py +297 -0
ads/opctl/config/utils.py +79 -0
ads/opctl/config/validator.py +17 -0
ads/opctl/config/versioner.py +68 -0
ads/opctl/config/yaml_parsers/__init__.py +7 -0
ads/opctl/config/yaml_parsers/base.py +58 -0
ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
ads/opctl/constants.py +66 -0
ads/opctl/decorator/__init__.py +5 -0
ads/opctl/decorator/common.py +129 -0
ads/opctl/diagnostics/__init__.py +5 -0
ads/opctl/diagnostics/__main__.py +25 -0
ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
ads/opctl/diagnostics/check_requirements.py +144 -0
ads/opctl/diagnostics/requirement_exception.py +9 -0
ads/opctl/distributed/README.md +109 -0
ads/opctl/distributed/__init__.py +5 -0
ads/opctl/distributed/certificates.py +32 -0
ads/opctl/distributed/cli.py +207 -0
ads/opctl/distributed/cmds.py +731 -0
ads/opctl/distributed/common/__init__.py +5 -0
ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
ads/opctl/distributed/common/cluster_config_helper.py +103 -0
ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
ads/opctl/distributed/common/cluster_runner.py +54 -0
ads/opctl/distributed/common/framework_factory.py +29 -0
ads/opctl/docker/Dockerfile.job +103 -0
ads/opctl/docker/Dockerfile.job.arm +107 -0
ads/opctl/docker/Dockerfile.job.gpu +175 -0
ads/opctl/docker/base-env.yaml +13 -0
ads/opctl/docker/cuda.repo +6 -0
ads/opctl/docker/operator/.dockerignore +0 -0
ads/opctl/docker/operator/Dockerfile +41 -0
ads/opctl/docker/operator/Dockerfile.gpu +85 -0
ads/opctl/docker/operator/cuda.repo +6 -0
ads/opctl/docker/operator/environment.yaml +8 -0
ads/opctl/forecast.py +11 -0
ads/opctl/index.yaml +3 -0
ads/opctl/model/__init__.py +5 -0
ads/opctl/model/cli.py +65 -0
ads/opctl/model/cmds.py +73 -0
ads/opctl/operator/README.md +4 -0
ads/opctl/operator/__init__.py +31 -0
ads/opctl/operator/cli.py +344 -0
ads/opctl/operator/cmd.py +596 -0
ads/opctl/operator/common/__init__.py +5 -0
ads/opctl/operator/common/backend_factory.py +460 -0
ads/opctl/operator/common/const.py +27 -0
ads/opctl/operator/common/data/synthetic.csv +16001 -0
ads/opctl/operator/common/dictionary_merger.py +148 -0
ads/opctl/operator/common/errors.py +42 -0
ads/opctl/operator/common/operator_config.py +99 -0
ads/opctl/operator/common/operator_loader.py +811 -0
ads/opctl/operator/common/operator_schema.yaml +130 -0
ads/opctl/operator/common/operator_yaml_generator.py +152 -0
ads/opctl/operator/common/utils.py +208 -0
ads/opctl/operator/lowcode/__init__.py +5 -0
ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
ads/opctl/operator/lowcode/anomaly/README.md +207 -0
ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
ads/opctl/operator/lowcode/anomaly/const.py +167 -0
ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
ads/opctl/operator/lowcode/common/__init__.py +5 -0
ads/opctl/operator/lowcode/common/const.py +10 -0
ads/opctl/operator/lowcode/common/data.py +116 -0
ads/opctl/operator/lowcode/common/errors.py +47 -0
ads/opctl/operator/lowcode/common/transformations.py +296 -0
ads/opctl/operator/lowcode/common/utils.py +384 -0
ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
ads/opctl/operator/lowcode/forecast/README.md +209 -0
ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
ads/opctl/operator/lowcode/forecast/const.py +92 -0
ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
ads/opctl/operator/lowcode/forecast/errors.py +26 -0
ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
ads/opctl/operator/lowcode/forecast/model/prophet.py +445 -0
ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
ads/opctl/operator/lowcode/forecast/utils.py +397 -0
ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
ads/opctl/operator/lowcode/pii/MLoperator +17 -0
ads/opctl/operator/lowcode/pii/README.md +208 -0
ads/opctl/operator/lowcode/pii/__init__.py +5 -0
ads/opctl/operator/lowcode/pii/__main__.py +78 -0
ads/opctl/operator/lowcode/pii/cmd.py +39 -0
ads/opctl/operator/lowcode/pii/constant.py +84 -0
ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
ads/opctl/operator/lowcode/pii/errors.py +27 -0
ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
ads/opctl/operator/lowcode/pii/model/report.py +487 -0
ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
ads/opctl/operator/lowcode/pii/utils.py +43 -0
ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
ads/opctl/operator/lowcode/recommender/README.md +206 -0
ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
ads/opctl/operator/lowcode/recommender/constant.py +30 -0
ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
ads/opctl/operator/lowcode/recommender/utils.py +13 -0
ads/opctl/operator/runtime/__init__.py +5 -0
ads/opctl/operator/runtime/const.py +17 -0
ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
ads/opctl/operator/runtime/runtime.py +115 -0
ads/opctl/schema.yaml.yml +36 -0
ads/opctl/script.py +40 -0
ads/opctl/spark/__init__.py +5 -0
ads/opctl/spark/cli.py +43 -0
ads/opctl/spark/cmds.py +147 -0
ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
ads/opctl/utils.py +344 -0
ads/oracledb/__init__.py +5 -0
ads/oracledb/oracle_db.py +346 -0
ads/pipeline/__init__.py +39 -0
ads/pipeline/ads_pipeline.py +2279 -0
ads/pipeline/ads_pipeline_run.py +772 -0
ads/pipeline/ads_pipeline_step.py +605 -0
ads/pipeline/builders/__init__.py +5 -0
ads/pipeline/builders/infrastructure/__init__.py +5 -0
ads/pipeline/builders/infrastructure/custom_script.py +32 -0
ads/pipeline/cli.py +119 -0
ads/pipeline/extension.py +291 -0
ads/pipeline/schema/__init__.py +5 -0
ads/pipeline/schema/cs_step_schema.json +35 -0
ads/pipeline/schema/ml_step_schema.json +31 -0
ads/pipeline/schema/pipeline_schema.json +71 -0
ads/pipeline/visualizer/__init__.py +5 -0
ads/pipeline/visualizer/base.py +570 -0
ads/pipeline/visualizer/graph_renderer.py +272 -0
ads/pipeline/visualizer/text_renderer.py +84 -0
ads/secrets/__init__.py +11 -0
ads/secrets/adb.py +386 -0
ads/secrets/auth_token.py +86 -0
ads/secrets/big_data_service.py +365 -0
ads/secrets/mysqldb.py +149 -0
ads/secrets/oracledb.py +160 -0
ads/secrets/secrets.py +407 -0
ads/telemetry/__init__.py +7 -0
ads/telemetry/base.py +69 -0
ads/telemetry/client.py +125 -0
ads/telemetry/telemetry.py +257 -0
ads/templates/dataflow_pyspark.jinja2 +13 -0
ads/templates/dataflow_sparksql.jinja2 +22 -0
ads/templates/func.jinja2 +20 -0
ads/templates/schemas/openapi.json +1740 -0
ads/templates/score-pkl.jinja2 +173 -0
ads/templates/score.jinja2 +322 -0
ads/templates/score_embedding_onnx.jinja2 +202 -0
ads/templates/score_generic.jinja2 +165 -0
ads/templates/score_huggingface_pipeline.jinja2 +217 -0
ads/templates/score_lightgbm.jinja2 +185 -0
ads/templates/score_onnx.jinja2 +407 -0
ads/templates/score_onnx_new.jinja2 +473 -0
ads/templates/score_oracle_automl.jinja2 +185 -0
ads/templates/score_pyspark.jinja2 +154 -0
ads/templates/score_pytorch.jinja2 +219 -0
ads/templates/score_scikit-learn.jinja2 +184 -0
ads/templates/score_tensorflow.jinja2 +184 -0
ads/templates/score_xgboost.jinja2 +178 -0
ads/text_dataset/__init__.py +5 -0
ads/text_dataset/backends.py +211 -0
ads/text_dataset/dataset.py +445 -0
ads/text_dataset/extractor.py +207 -0
ads/text_dataset/options.py +53 -0
ads/text_dataset/udfs.py +22 -0
ads/text_dataset/utils.py +49 -0
ads/type_discovery/__init__.py +9 -0
ads/type_discovery/abstract_detector.py +21 -0
ads/type_discovery/constant_detector.py +41 -0
ads/type_discovery/continuous_detector.py +54 -0
ads/type_discovery/credit_card_detector.py +99 -0
ads/type_discovery/datetime_detector.py +92 -0
ads/type_discovery/discrete_detector.py +118 -0
ads/type_discovery/document_detector.py +146 -0
ads/type_discovery/ip_detector.py +68 -0
ads/type_discovery/latlon_detector.py +90 -0
ads/type_discovery/phone_number_detector.py +63 -0
ads/type_discovery/type_discovery_driver.py +87 -0
ads/type_discovery/typed_feature.py +594 -0
ads/type_discovery/unknown_detector.py +41 -0
ads/type_discovery/zipcode_detector.py +48 -0
ads/vault/__init__.py +7 -0
ads/vault/vault.py +237 -0
{oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/METADATA +150 -150
oracle_ads-2.13.9rc1.dist-info/RECORD +858 -0
{oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/WHEEL +1 -2
{oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/entry_points.txt +2 -1
oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
{oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/licenses/LICENSE.txt +0 -0

ads/dataset/dataset_with_target.py ADDED Viewed

@@ -0,0 +1,995 @@
+#!/usr/bin/env python
+# -*- coding: utf-8; -*-
+# Copyright (c) 2020, 2023 Oracle and/or its affiliates.
+# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
+from __future__ import absolute_import, print_function
+import abc
+import importlib
+from collections import defaultdict
+from numbers import Number
+from typing import Tuple, Union
+import pandas as pd
+from ads.common import utils, logger
+from ads.common.data import ADSData
+from ads.common.decorator.runtime_dependency import (
+    runtime_dependency,
+    OptionalDependency,
+)
+from ads.dataset import helper
+from ads.dataset.dataset import ADSDataset
+from ads.dataset.feature_engineering_transformer import FeatureEngineeringTransformer
+from ads.dataset.feature_selection import FeatureImportance
+from ads.dataset.helper import (
+    DatasetDefaults,
+    deprecate_default_value,
+    deprecate_variable,
+    generate_sample,
+    get_target_type,
+    is_text_data,
+)
+from ads.dataset.label_encoder import DataFrameLabelEncoder
+from ads.dataset.pipeline import TransformerPipeline
+from ads.dataset.progress import DummyProgressBar
+from ads.dataset.recommendation import Recommendation
+from ads.dataset.recommendation_transformer import RecommendationTransformer
+from ads.dataset.target import TargetVariable
+from ads.type_discovery.typed_feature import (
+    CategoricalTypedFeature,
+    ContinuousTypedFeature,
+    DocumentTypedFeature,
+    GISTypedFeature,
+    OrdinalTypedFeature,
+    TypedFeature,
+    DateTimeTypedFeature,
+    TypedFeature
+)
+from sklearn.model_selection import train_test_split
+from pandas.io.formats.printing import pprint_thing
+from sklearn.preprocessing import FunctionTransformer
+from abc import ABCMeta
+class ADSDatasetWithTarget(ADSDataset, metaclass=ABCMeta):
+    """
+    This class provides APIs for preparing dataset for modeling.
+    """
+    def __init__(
+        self,
+        df,
+        target,
+        sampled_df=None,
+        shape=None,
+        target_type=None,
+        sample_max_rows=-1,
+        type_discovery=True,
+        types={},
+        parent=None,
+        name="",
+        metadata=None,
+        transformer_pipeline=None,
+        description=None,
+        progress=DummyProgressBar(),
+        **kwargs,
+    ):
+        self.recommendation_transformer = None
+        if shape is None:
+            shape = df.shape
+        if sampled_df is None:
+            sampled_df = generate_sample(
+                df,
+                shape[0],
+                DatasetDefaults.sampling_confidence_level,
+                DatasetDefaults.sampling_confidence_interval,
+                **kwargs,
+            )
+        if parent is None:
+            cols = sampled_df.columns.tolist()
+            cols.insert(0, cols.pop(cols.index(target)))
+            ADSDataset.__init__(
+                self,
+                df,
+                sampled_df[[*cols]],
+                shape,
+                name=name,
+                description=description,
+                type_discovery=type_discovery,
+                types=types,
+                progress=progress,
+                metadata=metadata,
+                transformer_pipeline=transformer_pipeline,
+                sample_max_rows=sample_max_rows,
+            )
+        else:
+            self.__dict__ = parent.__dict__.copy()
+            cols = self.sampled_df.columns.tolist()
+            cols.insert(0, cols.pop(cols.index(target)))
+            self.sampled_df = parent.sampled_df[[*cols]]
+            # if parent has already been built, just reorder the columns to display the plot for target at beginning
+            if parent.correlation is None:
+                self.corr_futures = parent.corr_futures
+            else:
+                corr_cols = parent.sampled_df.select_dtypes(
+                    exclude=["object"]
+                ).columns.values.tolist()
+                corr_cols.insert(0, corr_cols.pop(corr_cols.index(target)))
+                self.correlation = parent.correlation.reindex(corr_cols)[[corr_cols]]
+            self.feature_types = parent.feature_types
+            self.feature_dist_html_dict = {}
+            if len(parent.feature_dist_html_dict) > 0:
+                parent_feature_dist_html_dict = parent.feature_dist_html_dict.copy()
+                self.feature_dist_html_dict = {
+                    target: parent_feature_dist_html_dict.pop(target)
+                }
+                self.feature_dist_html_dict.update(parent_feature_dist_html_dict)
+        # drop all rows where target is nan
+        target = target.strip().replace(" ", "_")
+        #
+        # as an optimization only dropna and regenerate sample when the target
+        # has na values
+        #
+        if self.df[target].isna().sum():
+            #
+            # remove rows for which the target is null
+            #
+            self.df = self.df.dropna(subset=[target])
+            #
+            # we cannot simply drop null values from the sampled_df after a change
+            # to the df - we must rebuild the sample from the new df
+            #
+            self.sampled_df = helper.generate_sample(
+                self.df,
+                sampled_df.shape[0],
+                helper.DatasetDefaults.sampling_confidence_level,
+                helper.DatasetDefaults.sampling_confidence_interval,
+            )
+            #
+            # after regenerating the sample we need to move the target back to the head
+            #
+            cols = self.sampled_df.columns.tolist()
+            cols.insert(0, cols.pop(cols.index(target)))
+            self.sampled_df = self.sampled_df[[*cols]]
+        if target_type is None:
+            target_type = get_target_type(target, sampled_df, **kwargs)
+        self.target = TargetVariable(self, target, target_type)
+        # remove target from type discovery conversion
+        for step in self.transformer_pipeline.steps:
+            if (
+                step[0] == "type_discovery"
+                and self.target.name in step[1].kw_args["dtypes"]
+            ):
+                step[1].kw_args["dtypes"].pop(self.target.name)
+    @staticmethod
+    def from_dataframe(
+        df: pd.DataFrame,
+        target: str,
+        sampled_df: pd.DataFrame = None,
+        shape: Tuple[int, int] = None,
+        target_type: TypedFeature = None,
+        positive_class=None,
+        **init_kwargs,
+    ):
+        from ads.dataset.classification_dataset import (
+            BinaryClassificationDataset,
+            BinaryTextClassificationDataset,
+            MultiClassClassificationDataset,
+            MultiClassTextClassificationDataset
+        )
+        from ads.dataset.forecasting_dataset import ForecastingDataset
+        from ads.dataset.regression_dataset import RegressionDataset
+        if sampled_df is None:
+            sampled_df = generate_sample(
+                df,
+                (shape or df.shape)[0],
+                DatasetDefaults.sampling_confidence_level,
+                DatasetDefaults.sampling_confidence_interval,
+                **init_kwargs,
+            )
+        if target not in df:
+            raise ValueError(
+                f"{target} column doesn't exist in data frame. Specify a valid one instead."
+            )
+        if target_type is None:
+            target_type = get_target_type(target, sampled_df, **init_kwargs)
+        if len(df[target].dropna()) == 0:
+            logger.warning(
+                "It is not recommended to use an empty column as the target variable."
+            )
+            raise ValueError(
+                f"We do not support using empty columns as the chosen target"
+            )
+        if utils.is_same_class(target_type, ContinuousTypedFeature):
+            return RegressionDataset(
+                df=df,
+                sampled_df=sampled_df,
+                target=target,
+                target_type=target_type,
+                shape=shape,
+                **init_kwargs,
+            )
+        elif utils.is_same_class(
+            target_type, DateTimeTypedFeature
+        ) or df.index.dtype.name.startswith("datetime"):
+            return ForecastingDataset(
+                df=df,
+                sampled_df=sampled_df,
+                target=target,
+                target_type=target_type,
+                shape=shape,
+                **init_kwargs,
+            )
+        # Adding ordinal typed feature, but ultimately we should rethink how we want to model this type
+        elif utils.is_same_class(target_type, CategoricalTypedFeature) or utils.is_same_class(
+            target_type, OrdinalTypedFeature
+        ):
+            if target_type.meta_data["internal"]["unique"] == 2:
+                if is_text_data(sampled_df, target):
+                    return BinaryTextClassificationDataset(
+                        df=df,
+                        sampled_df=sampled_df,
+                        target=target,
+                        shape=shape,
+                        target_type=target_type,
+                        positive_class=positive_class,
+                        **init_kwargs,
+                    )
+                return BinaryClassificationDataset(
+                    df=df,
+                    sampled_df=sampled_df,
+                    target=target,
+                    shape=shape,
+                    target_type=target_type,
+                    positive_class=positive_class,
+                    **init_kwargs,
+                )
+            else:
+                if is_text_data(sampled_df, target):
+                    return MultiClassTextClassificationDataset(
+                        df=df,
+                        sampled_df=sampled_df,
+                        target=target,
+                        target_type=target_type,
+                        shape=shape,
+                        **init_kwargs,
+                    )
+                return MultiClassClassificationDataset(
+                    df=df,
+                    sampled_df=sampled_df,
+                    target=target,
+                    target_type=target_type,
+                    shape=shape,
+                    **init_kwargs,
+                )
+        elif (
+            utils.is_same_class(target, DocumentTypedFeature)
+            or "text" in target_type["type"]
+            or "text" in target
+        ):
+            raise ValueError(
+                f"The column {target} cannot be used as the target column."
+            )
+        elif (
+            utils.is_same_class(target_type, GISTypedFeature)
+            or "coord" in target_type["type"]
+            or "coord" in target
+        ):
+            raise ValueError(
+                f"The column {target} cannot be used as the target column."
+            )
+        # This is to catch constant columns that are boolean. Added as a fix for pd.isnull(), and datasets with a
+        #   binary target, but only data on one instance
+        elif target_type and target_type["low_level_type"] == "bool":
+            return BinaryClassificationDataset(
+                df=df,
+                sampled_df=sampled_df,
+                target=target,
+                shape=shape,
+                target_type=target_type,
+                positive_class=positive_class,
+                **init_kwargs,
+            )
+        raise ValueError(
+            f"Unable to identify problem type. Specify the data type of {target} using 'types'. "
+            f"For example, types = {{{target}: 'category'}}"
+        )
+    def rename_columns(self, columns):
+        """
+        Returns a dataset with columns renamed.
+        """
+        if isinstance(columns, list):
+            assert len(columns) == len(
+                self.columns.values
+            ), "columns length do not match the dataset"
+            columns = dict(zip(self.columns.values, columns))
+        assert isinstance(columns, dict)
+        new_target = None
+        if self.target.name in columns:
+            new_target = columns[self.target.name]
+        return self.rename(columns=columns, _new_target=new_target)
+    def select_best_features(self, score_func=None, k=12):
+        """
+        Return new dataset containing only the top k features.
+        Parameters
+        ----------
+        k: int, default 12
+            The top 'k' features to select.
+        score_func: function
+            Scoring function to use to rank the features. This scoring function should take a 2d array X(features)
+            and an array like y(target) and return a numeric score for each feature in the same order as X.
+        Notes
+        -----
+        See also https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_regression.html
+        and https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_classif.html
+        Examples
+        --------
+        >>> ds = DatasetBrowser("sklearn").open("iris")
+        >>> ds_small = ds.select_best_features(k=2)
+        """
+        tf = self._get_best_features_transformer(score_func=score_func, k=k)
+        return self._build_new_dataset(
+            tf[1].transform(self.df), tf[1].transform(self.sampled_df), transformers=tf
+        )
+    def auto_transform(
+        self,
+        correlation_threshold: float = 0.7,
+        frac: float = 1.0,
+        sample_size=1.0,
+        correlation_methods: Union[str, list] = "pearson",
+    ):
+        """
+        Return transformed dataset with several optimizations applied automatically.
+        The optimizations include:
+        - Dropping constant and primary key columns, which has no predictive quality,
+        - Imputation, to fill in missing values in noisy data:
+            - For continuous variables, fill with mean if less than 40% is missing, else drop,
+            - For categorical variables, fill with most frequent if less than 40% is missing, else drop,
+        - Dropping strongly co-correlated columns that tend to produce less generalizable models.
+        Parameters
+        ----------
+        correlation_threshold: float, defaults to 0.7. It must be between 0 and 1, inclusive
+            the correlation threshold where columns with correlation higher than the threshold will
+            be considered as strongly co-correlated and recommended to be taken care of.
+        frac: Is superseded by sample_size
+        sample_size: float, defaults to 1.0. Float, Range -> (0, 1]
+            What fraction of the data should be used in the calculation?
+        correlation_methods: Union[list, str], defaults to 'pearson'
+            - 'pearson': Use Pearson's Correlation between continuous features,
+            - 'cramers v': Use Cramer's V correlations between categorical features,
+            - 'correlation ratio': Use Correlation Ratio Correlation between categorical and continuous features,
+            - 'all': Is equivalent to ['pearson', 'cramers v', 'correlation ratio'].
+            Or a list containing any combination of these methods, for example, ['pearson', 'cramers v'].
+        Returns
+        -------
+        transformed_dataset : ADSDatasetWithTarget
+        Examples
+        --------
+        >>> ds_clean = ds.auto_transform()
+        """
+        frac = deprecate_default_value(
+            frac,
+            None,
+            1,
+            "<code>frac=None</code> is deprecated. Use <code>sample_size=1.0</code> instead.",
+            FutureWarning,
+        )
+        if frac != 1.0:
+            deprecate_frac = deprecate_variable(
+                frac,
+                sample_size,
+                "<code>frac</code> is superseded by <code>sample_size</code>.",
+                DeprecationWarning,
+            )
+            if sample_size == 1.0:
+                sample_size = deprecate_frac
+        if correlation_threshold > 1 or correlation_threshold < 0:
+            raise AssertionError("correlation_threshold has to be between 0 and 1.")
+        with utils.get_progress_bar(5) as progress:
+            df, sampled_df, transformer_pipeline = self._transform(
+                progress=progress,
+                correlation_threshold=correlation_threshold,
+                frac=sample_size,
+                correlation_methods=correlation_methods,
+            )
+            return self._build_new_dataset(
+                df,
+                sampled_df=sampled_df,
+                transformers=transformer_pipeline.steps,
+                progress=progress,
+            )
+    def visualize_transforms(self):
+        """
+        Render a representation of the dataset's transform DAG.
+        """
+        helper.visualize_transformation(
+            self.transformer_pipeline,
+            text="- rows: {}\\l- columns: {}\\l".format(*self.shape),
+        )
+    def _suggested_code(self, action, recommendation_type, variable):
+        if action == "Drop":
+            return ".drop_columns([{}])".format('"' + variable + '"')
+        if action == "Do nothing":
+            return ""
+        if "Drop " in action:
+            return ".drop_columns([{}])".format('"' + action.split(" ")[1] + '"')
+        if action == "Down-sample":
+            return ".down_sample()"
+        if action == "Up-sample":
+            if importlib.util.find_spec("imblearn") is None:
+                return ".up_sample(sampler='default') \\n `pip install imbalanced-learn` to use default up-sampler."
+            else:
+                return ".up_sample(sampler='default')"
+        if recommendation_type == "positive_class" and action != "Do nothing":
+            return ".set_positive_class({}, missing_value=False)".format(
+                '"' + action + '"'
+            )
+        if recommendation_type == "imputation":
+            fill_val = helper.get_fill_val(
+                self.feature_types, variable, action, constant="constant"
+            )
+            fill_val = (
+                fill_val if isinstance(fill_val, Number) else '"' + fill_val + '"'
+            )
+            return ".fillna({}{}: {}{})".format(
+                "{", '"' + variable + '"', fill_val, "}"
+            )
+        else:
+            return ""
+    def suggest_recommendations(
+        self,
+        correlation_methods: Union[str, list] = "pearson",
+        print_code: bool = True,
+        correlation_threshold: float = 0.7,
+        overwrite: bool = None,
+        force_recompute: bool = False,
+        frac: float = 1.0,
+        sample_size: float = 1.0,
+        **kwargs,
+    ):
+        """
+        Returns a pandas dataframe with suggestions for dataset optimization. This includes:
+        - Identifying constant and primary key columns, which has no predictive quality,
+        - Imputation, to fill in missing values in noisy data:
+            - For continuous variables, fill with mean if less than 40% is missing, else drop,
+            - For categorical variables, fill with most frequent if less than 40% is missing, else drop,
+        - Identifying strongly co-correlated columns that tend to produce less generalizable models,
+        - Automatically balancing dataset for classification problems using up or down sampling.
+        Parameters
+        ----------
+        correlation_methods: Union[list, str], default to 'pearson'
+            - 'pearson': Use Pearson's Correlation between continuous features,
+            - 'cramers v': Use Cramer's V correlations between categorical features,
+            - 'correlation ratio': Use Correlation Ratio Correlation between categorical and continuous features,
+            - 'all': Is equivalent to ['pearson', 'cramers v', 'correlation ratio'].
+            Or a list containing any combination of these methods, for example, ['pearson', 'cramers v']
+        print_code: bool, Defaults to True
+            Print Python code for the suggested actions.
+        correlation_threshold: float. Defaults to 0.7. It must be between 0 and 1, inclusive
+            the correlation threshold where columns with correlation higher than the threshold will
+            be considered as strongly co-correated and recommended to be taken care of.
+        frac: Is superseded by sample_size
+        sample_size: float, defaults to 1.0. Float, Range -> (0, 1]
+            What fraction of the data should be used in the calculation?
+        overwrite:
+            Is deprecated and replaced by force_recompute.
+        force_recompute: bool, default to be False
+            - If False, it calculates the correlation matrix if there is no cached correlation matrix. Otherwise,
+              it returns the cached correlation matrix.
+            - If True, it calculates the correlation matrix regardless whether there is cached result or not.
+        Returns
+        -------
+        suggestion dataframe : pandas.DataFrame
+        Examples
+        --------
+        >>> suggestion_df = ds.suggest_recommendations(correlation_threshold=0.7)
+        """
+        frac = deprecate_default_value(
+            frac,
+            None,
+            1,
+            "<code>frac=None</code> is deprecated. Use <code>sample_size=1.0</code>.",
+            FutureWarning,
+        )
+        if frac != 1.0:
+            deprecate_frac = deprecate_variable(
+                frac,
+                sample_size,
+                "<code>frac</code> is superseded by <code>sample_size</code>.",
+                DeprecationWarning,
+            )
+            if sample_size == 1.0:
+                sample_size = deprecate_frac
+        force_recompute = deprecate_variable(
+            overwrite,
+            force_recompute,
+            f"<code>overwrite=None</code> is deprecated. Use <code>force_recompute</code> instead.",
+            DeprecationWarning,
+        )
+        recommended = self._get_recommendations_transformer(
+            auto_transform=False,
+            correlation_threshold=correlation_threshold,
+            correlation_methods=correlation_methods,
+            force_recompute=force_recompute,
+            frac=sample_size,
+            **kwargs,
+        ).fit(self.sampled_df)
+        if len(recommended.reco_dict_) == 0:
+            logger.info("No recommendations.")
+            return pd.DataFrame()
+        column_names = [
+            "Message",
+            "Variables",
+            "Action",
+            "Selected Action",
+            "Recommendation Type",
+        ]
+        df_dict = defaultdict(list)
+        for recommendation_type, column_dict in recommended.reco_dict_.items():
+            if recommendation_type == "constant_column":
+                n_constant = len(column_dict)
+                df_dict["Recommendation Type"].extend(
+                    [recommendation_type] * n_constant
+                )
+                df_dict["Variables"].extend(column_dict)
+                df_dict["Message"].extend(["Constant Column"] * n_constant)
+                df_dict["Action"].extend(["Drop"] * n_constant)
+                df_dict["Selected Action"].extend(["Drop"] * n_constant)
+                continue
+            for column, details_dict in column_dict.items():
+                max_length = len(details_dict["Action"])
+                for key, value in details_dict.items():
+                    if isinstance(value, list):
+                        df_dict[key].extend(value)
+                    else:
+                        df_dict[key].extend([value] * max_length)
+                df_dict["Recommendation Type"].extend(
+                    [recommendation_type] * max_length
+                )
+                df_dict["Variables"].extend([column] * max_length)
+        suggestions_df = pd.DataFrame.from_dict(df_dict)[column_names]
+        suggestions_df["Code"] = suggestions_df.apply(
+            lambda x: self._suggested_code(
+                x["Action"], x["Recommendation Type"], x["Variables"]
+            ),
+            axis=1,
+        )
+        suggestion_df = (
+            suggestions_df.drop(columns=["Recommendation Type"])
+            .rename(columns={"Selected Action": "Suggested"})
+            .set_index(["Message", "Variables", "Suggested", "Action"])
+            .fillna("")
+        )
+        if print_code:
+            columns_to_impute = {}
+            columns_to_drop = []
+            consolidated_code = ""
+            suggestion_df_ = suggestion_df.reset_index()
+            suggested_code = suggestion_df_.loc[
+                suggestion_df_.Suggested == suggestion_df_.Action
+            ].Code.unique()
+            for code in suggested_code:
+                if ".drop_columns" in code:
+                    columns_to_drop.append(code.split("[")[1].split("]")[0][1:-1])
+                elif ".fillna" in code:
+                    impute_pair = code.split("{")[1].split("}")[0]
+                    columns_to_impute[impute_pair.split(":")[0].replace('"', "")] = (
+                        float(impute_pair.split(":")[1].strip())
+                        if impute_pair.split(":")[1].strip().replace(".", "").isdigit()
+                        else impute_pair.split(":")[1].strip().replace('"', "")
+                    )
+                else:
+                    consolidated_code += code
+            consolidated_code = (
+                "No more!" if len(consolidated_code) == 0 else consolidated_code
+            )
+            logger.info(f"Suggested columns to drop: {columns_to_drop}.")
+            logger.info(f"Suggested columns to impute: {columns_to_impute}.")
+            logger.info(f"Others: {consolidated_code}.")
+        return suggestion_df
+    @runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
+    def get_recommendations(
+        self,
+        correlation_methods: str = "pearson",
+        correlation_threshold: float = 0.7,
+        frac: float = 1.0,
+        sample_size: float = 1.0,
+        overwrite: bool = None,
+        force_recompute: bool = False,
+        display_format: str = "widget",
+    ):
+        """
+        Generate recommendations for dataset optimization. This includes:
+        - Identifying constant and primary key columns, which has no predictive quality,
+        - Imputation, to fill in missing values in noisy data:
+            - For continuous variables, fill with mean if less than 40% is missing, else drop,
+            - For categorical variables, fill with most frequent if less than 40% is missing, else drop,
+        - Identifying strongly co-correlated columns that tend to produce less generalizable models,
+        - Automatically balancing dataset for classification problems using up or down sampling.
+        Parameters
+        ----------
+        correlation_methods: Union[list, str], default to 'pearson'
+            - 'pearson': Use Pearson's Correlation between continuous features,
+            - 'cramers v': Use Cramer's V correlations between categorical features,
+            - 'correlation ratio': Use Correlation Ratio Correlation between categorical and continuous features,
+            - 'all': Is equivalent to ['pearson', 'cramers v', 'correlation ratio'].
+            Or a list containing any combination of these methods, for example, ['pearson', 'cramers v'].
+        correlation_threshold: float, defaults to 0.7. It must be between 0 and 1, inclusive
+            The correlation threshold where columns with correlation higher than the threshold will
+            be considered as strongly co-correlated and recommended to be taken care of.
+        frac: Is superseded by sample_size
+        sample_size: float, defaults to 1.0. Float, Range -> (0, 1]
+            What fraction of the data should be used in the calculation?
+        overwrite:
+            Is deprecated and replaced by force_recompute.
+        force_recompute: bool, default to be False
+            - If False, it calculates the correlation matrix if there is no cached correlation matrix. Otherwise,
+              it returns the cached correlation matrix.
+            - If True, it calculates the correlation matrix regardless whether there is cached result or not.
+        display_format: string, defaults to 'widget'.
+            Should be either 'widget' or 'table'. If 'widget',
+            a GUI style interface is popped out; if 'table', a table of suggestions is shown.
+        """
+        frac = deprecate_default_value(
+            frac,
+            None,
+            1,
+            "<code>frac=None</code> is superseded by <code>sample_size=1.0</code>.",
+            FutureWarning,
+        )
+        if frac != 1.0:
+            deprecate_frac = deprecate_variable(
+                frac,
+                sample_size,
+                "<code>frac</code> is superseded by <code>sample_size</code>.",
+                DeprecationWarning,
+            )
+            if sample_size == 1.0:
+                sample_size = deprecate_frac
+        force_recompute = deprecate_variable(
+            overwrite,
+            force_recompute,
+            f"<code>overwrite=None</code> is deprecated. Use <code>force_recompute</code> instead.",
+            DeprecationWarning,
+        )
+        if display_format == "widget":
+            recommended = Recommendation(
+                self,
+                self._get_recommendations_transformer(
+                    auto_transform=False,
+                    correlation_threshold=correlation_threshold,
+                    correlation_methods=correlation_methods,
+                    frac=sample_size,
+                    force_recompute=force_recompute,
+                ).fit(self.sampled_df),
+            )
+            if len(recommended.reco_dict) == 0:
+                logger.info("No recommendations.")
+            return recommended
+        elif display_format == "table":
+            df_suggestion = self.suggest_recommendations(
+                correlation_threshold=correlation_threshold,
+                frac=sample_size,
+                force_recompute=force_recompute,
+            )
+            from IPython.display import HTML, display
+            display(
+                HTML(
+                    df_suggestion.to_html()
+                    .replace(" `", "<code>")
+                    .replace("` ", "</code>")
+                    .replace("\\n", "<br>")
+                )
+            )
+    def get_transformed_dataset(self):
+        """
+        Return the transformed dataset with the recommendations applied.
+        This method should be called after applying the recommendations using the Recommendation#show_in_notebook() API.
+        """
+        if hasattr(self, "new_ds"):
+            return self.new_ds
+        logger.info(
+            "Use `get_recommendations()` to view or update recommendation or `auto_tranform()` first."
+        )
+        logger.warning(
+            "`get_transformed_dataset` is deprecated and will be removed in a future release."
+        )
+        return
+    def type_of_target(self):
+        """
+        Return the target type for the dataset.
+        Returns
+        -------
+        target_type: TypedFeature
+            an object of TypedFeature
+        Examples
+        --------
+        >>> ds = ds.set_target('target_class')
+        >>> assert(ds.type_of_target() == 'categorical')
+        """
+        return self.target.type
+    def train_test_split(self, test_size=0.1, random_state=utils.random_state):
+        """
+        Splits  dataset to train and test data.
+        Parameters
+        ----------
+        test_size: Union[float, int], optional, default=0.1
+        random_state: Union[int, RandomState], optional, default=None
+                - If int, random_state is the seed used by the random number generator;
+                - If RandomState instance, random_state is the random number generator;
+                - If None, the random number generator is the RandomState instance used by np.random.
+        Returns
+        -------
+        train_data, test_data: tuple
+            tuple of ADSData instances
+        Examples
+        --------
+        >>> ds = DatasetFactory.open("data.csv")
+        >>> train, test = ds.train_test_split()
+        """
+        X_train, X_test, y_train, y_test = train_test_split(
+            self.df.drop(self.target.name, axis=1),
+            self.df[self.target.name],
+            test_size=test_size,
+            train_size=1 - test_size,
+            random_state=random_state,
+        )
+        train = ADSData.build(
+            X=X_train, y=y_train, name="Train Data", dataset_type=self.__class__
+        )
+        train.transformer_pipeline = self.transformer_pipeline
+        test = ADSData.build(
+            X=X_test, y=y_test, name="Test Data", dataset_type=self.__class__
+        )
+        return train, test
+    def train_validation_test_split(
+        self, test_size=0.1, validation_size=0.1, random_state=utils.random_state
+    ):
+        """
+        Splits  dataset to train, validation and test data.
+        Parameters
+        ----------
+        test_size: Union[float, int], optional, default=0.1
+        validation_size: Union[float, int], optional, default=0.1
+        random_state: Union[int, RandomState], optional, default=None
+                - If int, random_state is the seed used by the random number generator;
+                - If RandomState instance, random_state is the random number generator;
+                - If None, the random number generator is the RandomState instance used by np.random.
+        Returns
+        -------
+        train_data, validation_data, test_data: tuple
+            tuple of ADSData instances
+        Examples
+        --------
+        >>> ds = DatasetFactory.open("data.csv")
+        >>> train, valid, test = ds.train_validation_test_split()
+        """
+        train, test = self.train_test_split(
+            test_size=test_size, random_state=random_state
+        )
+        X_train, X_valid, y_train, y_valid = train_test_split(
+            train.X, train.y, test_size=validation_size, random_state=random_state
+        )
+        train.X = X_train
+        train.y = y_train
+        valid = ADSData.build(
+            X=X_valid, y=y_valid, name="Validation Data", dataset_type=self.__class__
+        )
+        return train, valid, test
+    """
+    Internal methods
+    """
+    def __repr__(self):
+        rows, cols = self.shape
+        return f"{self.__class__.__name__}(target: {self.target.name}) {rows:,} rows, {cols:,} columns"
+    def _transform(
+        self,
+        progress=DummyProgressBar(),
+        fix_imbalance=True,
+        correlation_threshold=0.7,
+        frac=None,
+        correlation_methods="pearson",
+    ):
+        progress.update("Building the transformer pipeline")
+        if self.recommendation_transformer is None:
+            transformer_pipeline = TransformerPipeline(
+                steps=[
+                    (
+                        "recommendations",
+                        self._get_recommendations_transformer(
+                            fix_imbalance=fix_imbalance,
+                            correlation_threshold=correlation_threshold,
+                            frac=frac,
+                            correlation_methods=correlation_methods,
+                        ),
+                    ),
+                    (
+                        "feature_engineering",
+                        FeatureEngineeringTransformer(
+                            feature_metadata=self.feature_types
+                        ),
+                    ),
+                ]
+            )
+        else:
+            # recommendations are already generated using get_recommendations().show_in_notebook() API
+            transformer_pipeline = TransformerPipeline(
+                steps=[
+                    (
+                        "feature_engineering",
+                        FeatureEngineeringTransformer(
+                            feature_metadata=self.feature_types
+                        ),
+                    )
+                ]
+            )
+            transformer_pipeline.steps = [
+                ("recommendations", self.recommendation_transformer)
+            ] + transformer_pipeline.steps
+        sampled_df = self.sampled_df.copy()
+        self.recommendation_transformer = None
+        df = self.df.copy()
+        for step in transformer_pipeline.steps:
+            progress.update("Applying transformation for %s" % step[0])
+            sampled_df = step[1].fit_transform(sampled_df)
+            df = step[1].transform(df, progress=progress, fit_transform=True)
+        return df, sampled_df, transformer_pipeline
+    def _get_best_features(self, score_func=None, k=12):
+        if isinstance(self.target.type, DateTimeTypedFeature):
+            return FeatureImportance._get_feature_ranking(
+                self.sampled_df.copy(),
+                self.target.name,
+                self.type_of_target(),
+                score_func=score_func,
+                k=k,
+            )
+        else:
+            return FeatureImportance._get_feature_ranking(
+                self.sampled_df.copy(),
+                self.target.name,
+                self.type_of_target(),
+                score_func=score_func,
+                k=k,
+            )
+    def _get_best_features_transformer(self, score_func=None, k=12):
+        feature_set = self._get_best_features(k=k, score_func=score_func)[
+            "features"
+        ].tolist()
+        def _select_features(df, feature_names, target):
+            if target in df.columns:
+                feature_names = feature_names + [target]
+            return df[feature_names]
+        return (
+            "select_{0}_best_features".format(k),
+            FunctionTransformer(
+                func=_select_features,
+                validate=False,
+                kw_args={"feature_names": feature_set, "target": self.target.name},
+            ).fit(self.sampled_df),
+        )
+    def _get_recommendations_transformer(
+        self,
+        fix_imbalance=True,
+        auto_transform=True,
+        correlation_threshold=0.7,
+        **kwargs,
+    ):
+        force_recompute = kwargs.pop("force_recompute", False)
+        frac = kwargs.pop("frac", 1)
+        correlation_methods = kwargs.pop("correlation_methods", "pearson")
+        return RecommendationTransformer(
+            feature_metadata=self.feature_types,
+            correlation=self.corr(
+                force_recompute=force_recompute,
+                frac=frac,
+                correlation_methods=correlation_methods,
+                **kwargs,
+            ),
+            target=self.target.name,
+            target_type=self.target.type,
+            is_balanced=self.target.is_balanced(),
+            feature_ranking=self._get_best_features(k=len(self.sampled_df)),
+            fix_imbalance=fix_imbalance,
+            len=self.__len__(),
+            auto_transform=auto_transform,
+            correlation_threshold=correlation_threshold,
+        )

oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.9rc1__py3-none-any.whl

oracle-ads 2.13.9rc0py3-none-any.whl → 2.13.9rc1py3-none-any.whl