oracle-ads 2.13.8__py3-none-any.whl → 2.13.9rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/METADATA +151 -151
- oracle_ads-2.13.9rc0.dist-info/RECORD +9 -0
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/WHEEL +2 -1
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/entry_points.txt +1 -2
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +1 -0
- ads/aqua/__init__.py +0 -40
- ads/aqua/app.py +0 -506
- ads/aqua/cli.py +0 -96
- ads/aqua/client/__init__.py +0 -3
- ads/aqua/client/client.py +0 -836
- ads/aqua/client/openai_client.py +0 -305
- ads/aqua/common/__init__.py +0 -5
- ads/aqua/common/decorator.py +0 -125
- ads/aqua/common/entities.py +0 -266
- ads/aqua/common/enums.py +0 -122
- ads/aqua/common/errors.py +0 -109
- ads/aqua/common/utils.py +0 -1285
- ads/aqua/config/__init__.py +0 -4
- ads/aqua/config/container_config.py +0 -248
- ads/aqua/config/evaluation/__init__.py +0 -4
- ads/aqua/config/evaluation/evaluation_service_config.py +0 -147
- ads/aqua/config/utils/__init__.py +0 -4
- ads/aqua/config/utils/serializer.py +0 -339
- ads/aqua/constants.py +0 -114
- ads/aqua/data.py +0 -14
- ads/aqua/dummy_data/icon.txt +0 -1
- ads/aqua/dummy_data/oci_model_deployments.json +0 -56
- ads/aqua/dummy_data/oci_models.json +0 -1
- ads/aqua/dummy_data/readme.md +0 -26
- ads/aqua/evaluation/__init__.py +0 -8
- ads/aqua/evaluation/constants.py +0 -53
- ads/aqua/evaluation/entities.py +0 -186
- ads/aqua/evaluation/errors.py +0 -70
- ads/aqua/evaluation/evaluation.py +0 -1814
- ads/aqua/extension/__init__.py +0 -42
- ads/aqua/extension/aqua_ws_msg_handler.py +0 -76
- ads/aqua/extension/base_handler.py +0 -90
- ads/aqua/extension/common_handler.py +0 -121
- ads/aqua/extension/common_ws_msg_handler.py +0 -36
- ads/aqua/extension/deployment_handler.py +0 -298
- ads/aqua/extension/deployment_ws_msg_handler.py +0 -54
- ads/aqua/extension/errors.py +0 -30
- ads/aqua/extension/evaluation_handler.py +0 -129
- ads/aqua/extension/evaluation_ws_msg_handler.py +0 -61
- ads/aqua/extension/finetune_handler.py +0 -96
- ads/aqua/extension/model_handler.py +0 -390
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +0 -145
- ads/aqua/extension/models_ws_msg_handler.py +0 -50
- ads/aqua/extension/ui_handler.py +0 -282
- ads/aqua/extension/ui_websocket_handler.py +0 -130
- ads/aqua/extension/utils.py +0 -133
- ads/aqua/finetuning/__init__.py +0 -7
- ads/aqua/finetuning/constants.py +0 -23
- ads/aqua/finetuning/entities.py +0 -181
- ads/aqua/finetuning/finetuning.py +0 -731
- ads/aqua/model/__init__.py +0 -8
- ads/aqua/model/constants.py +0 -60
- ads/aqua/model/entities.py +0 -306
- ads/aqua/model/enums.py +0 -30
- ads/aqua/model/model.py +0 -2080
- ads/aqua/modeldeployment/__init__.py +0 -8
- ads/aqua/modeldeployment/constants.py +0 -10
- ads/aqua/modeldeployment/deployment.py +0 -1324
- ads/aqua/modeldeployment/entities.py +0 -653
- ads/aqua/modeldeployment/inference.py +0 -74
- ads/aqua/modeldeployment/utils.py +0 -543
- ads/aqua/resources/gpu_shapes_index.json +0 -94
- ads/aqua/server/__init__.py +0 -4
- ads/aqua/server/__main__.py +0 -24
- ads/aqua/server/app.py +0 -47
- ads/aqua/server/aqua_spec.yml +0 -1291
- ads/aqua/training/__init__.py +0 -4
- ads/aqua/training/exceptions.py +0 -476
- ads/aqua/ui.py +0 -499
- ads/automl/__init__.py +0 -9
- ads/automl/driver.py +0 -330
- ads/automl/provider.py +0 -975
- ads/bds/__init__.py +0 -5
- ads/bds/auth.py +0 -127
- ads/bds/big_data_service.py +0 -255
- ads/catalog/__init__.py +0 -19
- ads/catalog/model.py +0 -1576
- ads/catalog/notebook.py +0 -461
- ads/catalog/project.py +0 -468
- ads/catalog/summary.py +0 -178
- ads/common/__init__.py +0 -11
- ads/common/analyzer.py +0 -65
- ads/common/artifact/.model-ignore +0 -63
- ads/common/artifact/__init__.py +0 -10
- ads/common/auth.py +0 -1122
- ads/common/card_identifier.py +0 -83
- ads/common/config.py +0 -647
- ads/common/data.py +0 -165
- ads/common/decorator/__init__.py +0 -9
- ads/common/decorator/argument_to_case.py +0 -88
- ads/common/decorator/deprecate.py +0 -69
- ads/common/decorator/require_nonempty_arg.py +0 -65
- ads/common/decorator/runtime_dependency.py +0 -178
- ads/common/decorator/threaded.py +0 -97
- ads/common/decorator/utils.py +0 -35
- ads/common/dsc_file_system.py +0 -303
- ads/common/error.py +0 -14
- ads/common/extended_enum.py +0 -81
- ads/common/function/__init__.py +0 -5
- ads/common/function/fn_util.py +0 -142
- ads/common/function/func_conf.yaml +0 -25
- ads/common/ipython.py +0 -76
- ads/common/model.py +0 -679
- ads/common/model_artifact.py +0 -1759
- ads/common/model_artifact_schema.json +0 -107
- ads/common/model_export_util.py +0 -664
- ads/common/model_metadata.py +0 -24
- ads/common/object_storage_details.py +0 -296
- ads/common/oci_client.py +0 -175
- ads/common/oci_datascience.py +0 -46
- ads/common/oci_logging.py +0 -1144
- ads/common/oci_mixin.py +0 -957
- ads/common/oci_resource.py +0 -136
- ads/common/serializer.py +0 -559
- ads/common/utils.py +0 -1852
- ads/common/word_lists.py +0 -1491
- ads/common/work_request.py +0 -189
- ads/data_labeling/__init__.py +0 -13
- ads/data_labeling/boundingbox.py +0 -253
- ads/data_labeling/constants.py +0 -47
- ads/data_labeling/data_labeling_service.py +0 -244
- ads/data_labeling/interface/__init__.py +0 -5
- ads/data_labeling/interface/loader.py +0 -16
- ads/data_labeling/interface/parser.py +0 -16
- ads/data_labeling/interface/reader.py +0 -23
- ads/data_labeling/loader/__init__.py +0 -5
- ads/data_labeling/loader/file_loader.py +0 -241
- ads/data_labeling/metadata.py +0 -110
- ads/data_labeling/mixin/__init__.py +0 -5
- ads/data_labeling/mixin/data_labeling.py +0 -232
- ads/data_labeling/ner.py +0 -129
- ads/data_labeling/parser/__init__.py +0 -5
- ads/data_labeling/parser/dls_record_parser.py +0 -388
- ads/data_labeling/parser/export_metadata_parser.py +0 -94
- ads/data_labeling/parser/export_record_parser.py +0 -473
- ads/data_labeling/reader/__init__.py +0 -5
- ads/data_labeling/reader/dataset_reader.py +0 -574
- ads/data_labeling/reader/dls_record_reader.py +0 -121
- ads/data_labeling/reader/export_record_reader.py +0 -62
- ads/data_labeling/reader/jsonl_reader.py +0 -75
- ads/data_labeling/reader/metadata_reader.py +0 -203
- ads/data_labeling/reader/record_reader.py +0 -263
- ads/data_labeling/record.py +0 -52
- ads/data_labeling/visualizer/__init__.py +0 -5
- ads/data_labeling/visualizer/image_visualizer.py +0 -525
- ads/data_labeling/visualizer/text_visualizer.py +0 -357
- ads/database/__init__.py +0 -5
- ads/database/connection.py +0 -338
- ads/dataset/__init__.py +0 -10
- ads/dataset/capabilities.md +0 -51
- ads/dataset/classification_dataset.py +0 -339
- ads/dataset/correlation.py +0 -226
- ads/dataset/correlation_plot.py +0 -563
- ads/dataset/dask_series.py +0 -173
- ads/dataset/dataframe_transformer.py +0 -110
- ads/dataset/dataset.py +0 -1979
- ads/dataset/dataset_browser.py +0 -360
- ads/dataset/dataset_with_target.py +0 -995
- ads/dataset/exception.py +0 -25
- ads/dataset/factory.py +0 -987
- ads/dataset/feature_engineering_transformer.py +0 -35
- ads/dataset/feature_selection.py +0 -107
- ads/dataset/forecasting_dataset.py +0 -26
- ads/dataset/helper.py +0 -1450
- ads/dataset/label_encoder.py +0 -99
- ads/dataset/mixin/__init__.py +0 -5
- ads/dataset/mixin/dataset_accessor.py +0 -134
- ads/dataset/pipeline.py +0 -58
- ads/dataset/plot.py +0 -710
- ads/dataset/progress.py +0 -86
- ads/dataset/recommendation.py +0 -297
- ads/dataset/recommendation_transformer.py +0 -502
- ads/dataset/regression_dataset.py +0 -14
- ads/dataset/sampled_dataset.py +0 -1050
- ads/dataset/target.py +0 -98
- ads/dataset/timeseries.py +0 -18
- ads/dbmixin/__init__.py +0 -5
- ads/dbmixin/db_pandas_accessor.py +0 -153
- ads/environment/__init__.py +0 -9
- ads/environment/ml_runtime.py +0 -66
- ads/evaluations/README.md +0 -14
- ads/evaluations/__init__.py +0 -109
- ads/evaluations/evaluation_plot.py +0 -983
- ads/evaluations/evaluator.py +0 -1334
- ads/evaluations/statistical_metrics.py +0 -543
- ads/experiments/__init__.py +0 -9
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +0 -21
- ads/explanations/base_explainer.py +0 -142
- ads/explanations/capabilities.md +0 -83
- ads/explanations/explainer.py +0 -190
- ads/explanations/mlx_global_explainer.py +0 -1050
- ads/explanations/mlx_interface.py +0 -386
- ads/explanations/mlx_local_explainer.py +0 -287
- ads/explanations/mlx_whatif_explainer.py +0 -201
- ads/feature_engineering/__init__.py +0 -20
- ads/feature_engineering/accessor/__init__.py +0 -5
- ads/feature_engineering/accessor/dataframe_accessor.py +0 -535
- ads/feature_engineering/accessor/mixin/__init__.py +0 -5
- ads/feature_engineering/accessor/mixin/correlation.py +0 -166
- ads/feature_engineering/accessor/mixin/eda_mixin.py +0 -266
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +0 -85
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +0 -211
- ads/feature_engineering/accessor/mixin/utils.py +0 -65
- ads/feature_engineering/accessor/series_accessor.py +0 -431
- ads/feature_engineering/adsimage/__init__.py +0 -5
- ads/feature_engineering/adsimage/image.py +0 -192
- ads/feature_engineering/adsimage/image_reader.py +0 -170
- ads/feature_engineering/adsimage/interface/__init__.py +0 -5
- ads/feature_engineering/adsimage/interface/reader.py +0 -19
- ads/feature_engineering/adsstring/__init__.py +0 -7
- ads/feature_engineering/adsstring/oci_language/__init__.py +0 -8
- ads/feature_engineering/adsstring/string/__init__.py +0 -8
- ads/feature_engineering/data_schema.json +0 -57
- ads/feature_engineering/dataset/__init__.py +0 -5
- ads/feature_engineering/dataset/zip_code_data.py +0 -42062
- ads/feature_engineering/exceptions.py +0 -40
- ads/feature_engineering/feature_type/__init__.py +0 -133
- ads/feature_engineering/feature_type/address.py +0 -184
- ads/feature_engineering/feature_type/adsstring/__init__.py +0 -5
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +0 -164
- ads/feature_engineering/feature_type/adsstring/oci_language.py +0 -93
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +0 -5
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +0 -47
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +0 -96
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +0 -221
- ads/feature_engineering/feature_type/adsstring/string.py +0 -258
- ads/feature_engineering/feature_type/base.py +0 -58
- ads/feature_engineering/feature_type/boolean.py +0 -183
- ads/feature_engineering/feature_type/category.py +0 -146
- ads/feature_engineering/feature_type/constant.py +0 -137
- ads/feature_engineering/feature_type/continuous.py +0 -151
- ads/feature_engineering/feature_type/creditcard.py +0 -314
- ads/feature_engineering/feature_type/datetime.py +0 -190
- ads/feature_engineering/feature_type/discrete.py +0 -134
- ads/feature_engineering/feature_type/document.py +0 -43
- ads/feature_engineering/feature_type/gis.py +0 -251
- ads/feature_engineering/feature_type/handler/__init__.py +0 -5
- ads/feature_engineering/feature_type/handler/feature_validator.py +0 -524
- ads/feature_engineering/feature_type/handler/feature_warning.py +0 -319
- ads/feature_engineering/feature_type/handler/warnings.py +0 -128
- ads/feature_engineering/feature_type/integer.py +0 -142
- ads/feature_engineering/feature_type/ip_address.py +0 -144
- ads/feature_engineering/feature_type/ip_address_v4.py +0 -138
- ads/feature_engineering/feature_type/ip_address_v6.py +0 -138
- ads/feature_engineering/feature_type/lat_long.py +0 -256
- ads/feature_engineering/feature_type/object.py +0 -43
- ads/feature_engineering/feature_type/ordinal.py +0 -132
- ads/feature_engineering/feature_type/phone_number.py +0 -135
- ads/feature_engineering/feature_type/string.py +0 -171
- ads/feature_engineering/feature_type/text.py +0 -93
- ads/feature_engineering/feature_type/unknown.py +0 -43
- ads/feature_engineering/feature_type/zip_code.py +0 -164
- ads/feature_engineering/feature_type_manager.py +0 -406
- ads/feature_engineering/schema.py +0 -795
- ads/feature_engineering/utils.py +0 -245
- ads/feature_store/.readthedocs.yaml +0 -19
- ads/feature_store/README.md +0 -65
- ads/feature_store/__init__.py +0 -9
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +0 -339
- ads/feature_store/common/exceptions.py +0 -18
- ads/feature_store/common/spark_session_singleton.py +0 -125
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +0 -72
- ads/feature_store/common/utils/feature_schema_mapper.py +0 -283
- ads/feature_store/common/utils/transformation_utils.py +0 -82
- ads/feature_store/common/utils/utility.py +0 -403
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +0 -129
- ads/feature_store/dataset.py +0 -1230
- ads/feature_store/dataset_job.py +0 -530
- ads/feature_store/docs/Dockerfile +0 -7
- ads/feature_store/docs/Makefile +0 -44
- ads/feature_store/docs/conf.py +0 -28
- ads/feature_store/docs/requirements.txt +0 -14
- ads/feature_store/docs/source/ads.feature_store.query.rst +0 -20
- ads/feature_store/docs/source/cicd.rst +0 -137
- ads/feature_store/docs/source/conf.py +0 -86
- ads/feature_store/docs/source/data_versioning.rst +0 -33
- ads/feature_store/docs/source/dataset.rst +0 -388
- ads/feature_store/docs/source/dataset_job.rst +0 -27
- ads/feature_store/docs/source/demo.rst +0 -70
- ads/feature_store/docs/source/entity.rst +0 -78
- ads/feature_store/docs/source/feature_group.rst +0 -624
- ads/feature_store/docs/source/feature_group_job.rst +0 -29
- ads/feature_store/docs/source/feature_store.rst +0 -122
- ads/feature_store/docs/source/feature_store_class.rst +0 -123
- ads/feature_store/docs/source/feature_validation.rst +0 -66
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +0 -81
- ads/feature_store/docs/source/module.rst +0 -8
- ads/feature_store/docs/source/notebook.rst +0 -94
- ads/feature_store/docs/source/overview.rst +0 -47
- ads/feature_store/docs/source/quickstart.rst +0 -176
- ads/feature_store/docs/source/release_notes.rst +0 -194
- ads/feature_store/docs/source/setup_feature_store.rst +0 -81
- ads/feature_store/docs/source/statistics.rst +0 -58
- ads/feature_store/docs/source/transformation.rst +0 -199
- ads/feature_store/docs/source/ui.rst +0 -65
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +0 -66
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +0 -192
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +0 -338
- ads/feature_store/entity.py +0 -718
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +0 -375
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +0 -316
- ads/feature_store/execution_strategy/execution_strategy.py +0 -113
- ads/feature_store/execution_strategy/execution_strategy_provider.py +0 -47
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +0 -618
- ads/feature_store/feature.py +0 -192
- ads/feature_store/feature_group.py +0 -1494
- ads/feature_store/feature_group_expectation.py +0 -346
- ads/feature_store/feature_group_job.py +0 -602
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +0 -180
- ads/feature_store/feature_option_details.py +0 -50
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +0 -99
- ads/feature_store/feature_store.py +0 -699
- ads/feature_store/feature_store_registrar.py +0 -518
- ads/feature_store/input_feature_detail.py +0 -149
- ads/feature_store/mixin/__init__.py +0 -4
- ads/feature_store/mixin/oci_feature_store.py +0 -145
- ads/feature_store/model_details.py +0 -73
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +0 -266
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +0 -298
- ads/feature_store/query/join.py +0 -161
- ads/feature_store/query/query.py +0 -403
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +0 -57
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +0 -68
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +0 -139
- ads/feature_store/service/oci_dataset_job.py +0 -199
- ads/feature_store/service/oci_entity.py +0 -125
- ads/feature_store/service/oci_feature_group.py +0 -164
- ads/feature_store/service/oci_feature_group_job.py +0 -214
- ads/feature_store/service/oci_feature_store.py +0 -182
- ads/feature_store/service/oci_lineage.py +0 -87
- ads/feature_store/service/oci_transformation.py +0 -104
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +0 -49
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +0 -37
- ads/feature_store/statistics/charts/box_plot.py +0 -148
- ads/feature_store/statistics/charts/frequency_distribution.py +0 -65
- ads/feature_store/statistics/charts/probability_distribution.py +0 -68
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +0 -98
- ads/feature_store/statistics/feature_stat.py +0 -126
- ads/feature_store/statistics/generic_feature_value.py +0 -33
- ads/feature_store/statistics/statistics.py +0 -41
- ads/feature_store/statistics_config.py +0 -101
- ads/feature_store/templates/feature_store_template.yaml +0 -45
- ads/feature_store/transformation.py +0 -499
- ads/feature_store/validation_output.py +0 -57
- ads/hpo/__init__.py +0 -9
- ads/hpo/_imports.py +0 -91
- ads/hpo/ads_search_space.py +0 -439
- ads/hpo/distributions.py +0 -325
- ads/hpo/objective.py +0 -280
- ads/hpo/search_cv.py +0 -1657
- ads/hpo/stopping_criterion.py +0 -75
- ads/hpo/tuner_artifact.py +0 -413
- ads/hpo/utils.py +0 -91
- ads/hpo/validation.py +0 -140
- ads/hpo/visualization/__init__.py +0 -5
- ads/hpo/visualization/_contour.py +0 -23
- ads/hpo/visualization/_edf.py +0 -20
- ads/hpo/visualization/_intermediate_values.py +0 -21
- ads/hpo/visualization/_optimization_history.py +0 -25
- ads/hpo/visualization/_parallel_coordinate.py +0 -169
- ads/hpo/visualization/_param_importances.py +0 -26
- ads/jobs/__init__.py +0 -53
- ads/jobs/ads_job.py +0 -663
- ads/jobs/builders/__init__.py +0 -5
- ads/jobs/builders/base.py +0 -156
- ads/jobs/builders/infrastructure/__init__.py +0 -6
- ads/jobs/builders/infrastructure/base.py +0 -165
- ads/jobs/builders/infrastructure/dataflow.py +0 -1252
- ads/jobs/builders/infrastructure/dsc_job.py +0 -1894
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +0 -1233
- ads/jobs/builders/infrastructure/utils.py +0 -65
- ads/jobs/builders/runtimes/__init__.py +0 -5
- ads/jobs/builders/runtimes/artifact.py +0 -338
- ads/jobs/builders/runtimes/base.py +0 -325
- ads/jobs/builders/runtimes/container_runtime.py +0 -242
- ads/jobs/builders/runtimes/python_runtime.py +0 -1016
- ads/jobs/builders/runtimes/pytorch_runtime.py +0 -204
- ads/jobs/cli.py +0 -104
- ads/jobs/env_var_parser.py +0 -131
- ads/jobs/extension.py +0 -160
- ads/jobs/schema/__init__.py +0 -5
- ads/jobs/schema/infrastructure_schema.json +0 -116
- ads/jobs/schema/job_schema.json +0 -42
- ads/jobs/schema/runtime_schema.json +0 -183
- ads/jobs/schema/validator.py +0 -141
- ads/jobs/serializer.py +0 -296
- ads/jobs/templates/__init__.py +0 -5
- ads/jobs/templates/container.py +0 -6
- ads/jobs/templates/driver_notebook.py +0 -177
- ads/jobs/templates/driver_oci.py +0 -500
- ads/jobs/templates/driver_python.py +0 -48
- ads/jobs/templates/driver_pytorch.py +0 -852
- ads/jobs/templates/driver_utils.py +0 -615
- ads/jobs/templates/hostname_from_env.c +0 -55
- ads/jobs/templates/oci_metrics.py +0 -181
- ads/jobs/utils.py +0 -104
- ads/llm/__init__.py +0 -28
- ads/llm/autogen/__init__.py +0 -2
- ads/llm/autogen/constants.py +0 -15
- ads/llm/autogen/reports/__init__.py +0 -2
- ads/llm/autogen/reports/base.py +0 -67
- ads/llm/autogen/reports/data.py +0 -103
- ads/llm/autogen/reports/session.py +0 -526
- ads/llm/autogen/reports/templates/chat_box.html +0 -13
- ads/llm/autogen/reports/templates/chat_box_lt.html +0 -5
- ads/llm/autogen/reports/templates/chat_box_rt.html +0 -6
- ads/llm/autogen/reports/utils.py +0 -56
- ads/llm/autogen/v02/__init__.py +0 -4
- ads/llm/autogen/v02/client.py +0 -295
- ads/llm/autogen/v02/log_handlers/__init__.py +0 -2
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +0 -83
- ads/llm/autogen/v02/loggers/__init__.py +0 -6
- ads/llm/autogen/v02/loggers/metric_logger.py +0 -320
- ads/llm/autogen/v02/loggers/session_logger.py +0 -580
- ads/llm/autogen/v02/loggers/utils.py +0 -86
- ads/llm/autogen/v02/runtime_logging.py +0 -163
- ads/llm/chain.py +0 -268
- ads/llm/chat_template.py +0 -31
- ads/llm/deploy.py +0 -63
- ads/llm/guardrails/__init__.py +0 -5
- ads/llm/guardrails/base.py +0 -442
- ads/llm/guardrails/huggingface.py +0 -44
- ads/llm/langchain/__init__.py +0 -5
- ads/llm/langchain/plugins/__init__.py +0 -5
- ads/llm/langchain/plugins/chat_models/__init__.py +0 -5
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +0 -1027
- ads/llm/langchain/plugins/embeddings/__init__.py +0 -4
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +0 -184
- ads/llm/langchain/plugins/llms/__init__.py +0 -5
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +0 -979
- ads/llm/requirements.txt +0 -3
- ads/llm/serialize.py +0 -219
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +0 -153
- ads/llm/serializers/runnable_parallel.py +0 -27
- ads/llm/templates/score_chain.jinja2 +0 -155
- ads/llm/templates/tool_chat_template_hermes.jinja +0 -130
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +0 -94
- ads/model/__init__.py +0 -52
- ads/model/artifact.py +0 -573
- ads/model/artifact_downloader.py +0 -254
- ads/model/artifact_uploader.py +0 -267
- ads/model/base_properties.py +0 -238
- ads/model/common/.model-ignore +0 -66
- ads/model/common/__init__.py +0 -5
- ads/model/common/utils.py +0 -142
- ads/model/datascience_model.py +0 -2635
- ads/model/deployment/__init__.py +0 -20
- ads/model/deployment/common/__init__.py +0 -5
- ads/model/deployment/common/utils.py +0 -308
- ads/model/deployment/model_deployer.py +0 -466
- ads/model/deployment/model_deployment.py +0 -1846
- ads/model/deployment/model_deployment_infrastructure.py +0 -671
- ads/model/deployment/model_deployment_properties.py +0 -493
- ads/model/deployment/model_deployment_runtime.py +0 -838
- ads/model/extractor/__init__.py +0 -5
- ads/model/extractor/automl_extractor.py +0 -74
- ads/model/extractor/embedding_onnx_extractor.py +0 -80
- ads/model/extractor/huggingface_extractor.py +0 -88
- ads/model/extractor/keras_extractor.py +0 -84
- ads/model/extractor/lightgbm_extractor.py +0 -93
- ads/model/extractor/model_info_extractor.py +0 -114
- ads/model/extractor/model_info_extractor_factory.py +0 -105
- ads/model/extractor/pytorch_extractor.py +0 -87
- ads/model/extractor/sklearn_extractor.py +0 -112
- ads/model/extractor/spark_extractor.py +0 -89
- ads/model/extractor/tensorflow_extractor.py +0 -85
- ads/model/extractor/xgboost_extractor.py +0 -94
- ads/model/framework/__init__.py +0 -5
- ads/model/framework/automl_model.py +0 -178
- ads/model/framework/embedding_onnx_model.py +0 -438
- ads/model/framework/huggingface_model.py +0 -399
- ads/model/framework/lightgbm_model.py +0 -266
- ads/model/framework/pytorch_model.py +0 -266
- ads/model/framework/sklearn_model.py +0 -250
- ads/model/framework/spark_model.py +0 -326
- ads/model/framework/tensorflow_model.py +0 -254
- ads/model/framework/xgboost_model.py +0 -258
- ads/model/generic_model.py +0 -3518
- ads/model/model_artifact_boilerplate/README.md +0 -381
- ads/model/model_artifact_boilerplate/__init__.py +0 -5
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +0 -5
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +0 -427
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +0 -2
- ads/model/model_artifact_boilerplate/runtime.yaml +0 -7
- ads/model/model_artifact_boilerplate/score.py +0 -61
- ads/model/model_file_description_schema.json +0 -68
- ads/model/model_introspect.py +0 -331
- ads/model/model_metadata.py +0 -1810
- ads/model/model_metadata_mixin.py +0 -460
- ads/model/model_properties.py +0 -63
- ads/model/model_version_set.py +0 -739
- ads/model/runtime/__init__.py +0 -5
- ads/model/runtime/env_info.py +0 -306
- ads/model/runtime/model_deployment_details.py +0 -37
- ads/model/runtime/model_provenance_details.py +0 -58
- ads/model/runtime/runtime_info.py +0 -81
- ads/model/runtime/schemas/inference_env_info_schema.yaml +0 -16
- ads/model/runtime/schemas/model_provenance_schema.yaml +0 -36
- ads/model/runtime/schemas/training_env_info_schema.yaml +0 -16
- ads/model/runtime/utils.py +0 -201
- ads/model/serde/__init__.py +0 -5
- ads/model/serde/common.py +0 -40
- ads/model/serde/model_input.py +0 -547
- ads/model/serde/model_serializer.py +0 -1184
- ads/model/service/__init__.py +0 -5
- ads/model/service/oci_datascience_model.py +0 -1076
- ads/model/service/oci_datascience_model_deployment.py +0 -500
- ads/model/service/oci_datascience_model_version_set.py +0 -176
- ads/model/transformer/__init__.py +0 -5
- ads/model/transformer/onnx_transformer.py +0 -324
- ads/mysqldb/__init__.py +0 -5
- ads/mysqldb/mysql_db.py +0 -227
- ads/opctl/__init__.py +0 -18
- ads/opctl/anomaly_detection.py +0 -11
- ads/opctl/backend/__init__.py +0 -5
- ads/opctl/backend/ads_dataflow.py +0 -353
- ads/opctl/backend/ads_ml_job.py +0 -710
- ads/opctl/backend/ads_ml_pipeline.py +0 -164
- ads/opctl/backend/ads_model_deployment.py +0 -209
- ads/opctl/backend/base.py +0 -146
- ads/opctl/backend/local.py +0 -1053
- ads/opctl/backend/marketplace/__init__.py +0 -9
- ads/opctl/backend/marketplace/helm_helper.py +0 -173
- ads/opctl/backend/marketplace/local_marketplace.py +0 -271
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +0 -71
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +0 -44
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +0 -24
- ads/opctl/backend/marketplace/marketplace_utils.py +0 -212
- ads/opctl/backend/marketplace/models/__init__.py +0 -5
- ads/opctl/backend/marketplace/models/bearer_token.py +0 -94
- ads/opctl/backend/marketplace/models/marketplace_type.py +0 -70
- ads/opctl/backend/marketplace/models/ocir_details.py +0 -56
- ads/opctl/backend/marketplace/prerequisite_checker.py +0 -238
- ads/opctl/cli.py +0 -707
- ads/opctl/cmds.py +0 -869
- ads/opctl/conda/__init__.py +0 -5
- ads/opctl/conda/cli.py +0 -193
- ads/opctl/conda/cmds.py +0 -749
- ads/opctl/conda/config.yaml +0 -34
- ads/opctl/conda/manifest_template.yaml +0 -13
- ads/opctl/conda/multipart_uploader.py +0 -188
- ads/opctl/conda/pack.py +0 -89
- ads/opctl/config/__init__.py +0 -5
- ads/opctl/config/base.py +0 -57
- ads/opctl/config/diagnostics/__init__.py +0 -5
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +0 -62
- ads/opctl/config/merger.py +0 -255
- ads/opctl/config/resolver.py +0 -297
- ads/opctl/config/utils.py +0 -79
- ads/opctl/config/validator.py +0 -17
- ads/opctl/config/versioner.py +0 -68
- ads/opctl/config/yaml_parsers/__init__.py +0 -7
- ads/opctl/config/yaml_parsers/base.py +0 -58
- ads/opctl/config/yaml_parsers/distributed/__init__.py +0 -7
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +0 -201
- ads/opctl/constants.py +0 -66
- ads/opctl/decorator/__init__.py +0 -5
- ads/opctl/decorator/common.py +0 -129
- ads/opctl/diagnostics/__init__.py +0 -5
- ads/opctl/diagnostics/__main__.py +0 -25
- ads/opctl/diagnostics/check_distributed_job_requirements.py +0 -212
- ads/opctl/diagnostics/check_requirements.py +0 -144
- ads/opctl/diagnostics/requirement_exception.py +0 -9
- ads/opctl/distributed/README.md +0 -109
- ads/opctl/distributed/__init__.py +0 -5
- ads/opctl/distributed/certificates.py +0 -32
- ads/opctl/distributed/cli.py +0 -207
- ads/opctl/distributed/cmds.py +0 -731
- ads/opctl/distributed/common/__init__.py +0 -5
- ads/opctl/distributed/common/abstract_cluster_provider.py +0 -449
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +0 -88
- ads/opctl/distributed/common/cluster_config_helper.py +0 -103
- ads/opctl/distributed/common/cluster_provider_factory.py +0 -21
- ads/opctl/distributed/common/cluster_runner.py +0 -54
- ads/opctl/distributed/common/framework_factory.py +0 -29
- ads/opctl/docker/Dockerfile.job +0 -103
- ads/opctl/docker/Dockerfile.job.arm +0 -107
- ads/opctl/docker/Dockerfile.job.gpu +0 -175
- ads/opctl/docker/base-env.yaml +0 -13
- ads/opctl/docker/cuda.repo +0 -6
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +0 -41
- ads/opctl/docker/operator/Dockerfile.gpu +0 -85
- ads/opctl/docker/operator/cuda.repo +0 -6
- ads/opctl/docker/operator/environment.yaml +0 -8
- ads/opctl/forecast.py +0 -11
- ads/opctl/index.yaml +0 -3
- ads/opctl/model/__init__.py +0 -5
- ads/opctl/model/cli.py +0 -65
- ads/opctl/model/cmds.py +0 -73
- ads/opctl/operator/README.md +0 -4
- ads/opctl/operator/__init__.py +0 -31
- ads/opctl/operator/cli.py +0 -344
- ads/opctl/operator/cmd.py +0 -596
- ads/opctl/operator/common/__init__.py +0 -5
- ads/opctl/operator/common/backend_factory.py +0 -460
- ads/opctl/operator/common/const.py +0 -27
- ads/opctl/operator/common/data/synthetic.csv +0 -16001
- ads/opctl/operator/common/dictionary_merger.py +0 -148
- ads/opctl/operator/common/errors.py +0 -42
- ads/opctl/operator/common/operator_config.py +0 -99
- ads/opctl/operator/common/operator_loader.py +0 -811
- ads/opctl/operator/common/operator_schema.yaml +0 -130
- ads/opctl/operator/common/operator_yaml_generator.py +0 -152
- ads/opctl/operator/common/utils.py +0 -208
- ads/opctl/operator/lowcode/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/MLoperator +0 -16
- ads/opctl/operator/lowcode/anomaly/README.md +0 -207
- ads/opctl/operator/lowcode/anomaly/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/__main__.py +0 -103
- ads/opctl/operator/lowcode/anomaly/cmd.py +0 -35
- ads/opctl/operator/lowcode/anomaly/const.py +0 -167
- ads/opctl/operator/lowcode/anomaly/environment.yaml +0 -10
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +0 -146
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +0 -162
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +0 -99
- ads/opctl/operator/lowcode/anomaly/model/autots.py +0 -115
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +0 -404
- ads/opctl/operator/lowcode/anomaly/model/factory.py +0 -110
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +0 -78
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +0 -78
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +0 -120
- ads/opctl/operator/lowcode/anomaly/model/tods.py +0 -119
- ads/opctl/operator/lowcode/anomaly/operator_config.py +0 -127
- ads/opctl/operator/lowcode/anomaly/schema.yaml +0 -401
- ads/opctl/operator/lowcode/anomaly/utils.py +0 -88
- ads/opctl/operator/lowcode/common/__init__.py +0 -5
- ads/opctl/operator/lowcode/common/const.py +0 -10
- ads/opctl/operator/lowcode/common/data.py +0 -116
- ads/opctl/operator/lowcode/common/errors.py +0 -47
- ads/opctl/operator/lowcode/common/transformations.py +0 -296
- ads/opctl/operator/lowcode/common/utils.py +0 -293
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +0 -13
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +0 -30
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +0 -5
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +0 -116
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +0 -85
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +0 -15
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +0 -4
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +0 -32
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +0 -43
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +0 -120
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +0 -34
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +0 -386
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +0 -160
- ads/opctl/operator/lowcode/forecast/MLoperator +0 -25
- ads/opctl/operator/lowcode/forecast/README.md +0 -209
- ads/opctl/operator/lowcode/forecast/__init__.py +0 -5
- ads/opctl/operator/lowcode/forecast/__main__.py +0 -89
- ads/opctl/operator/lowcode/forecast/cmd.py +0 -40
- ads/opctl/operator/lowcode/forecast/const.py +0 -92
- ads/opctl/operator/lowcode/forecast/environment.yaml +0 -20
- ads/opctl/operator/lowcode/forecast/errors.py +0 -26
- ads/opctl/operator/lowcode/forecast/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/forecast/model/arima.py +0 -279
- ads/opctl/operator/lowcode/forecast/model/automlx.py +0 -542
- ads/opctl/operator/lowcode/forecast/model/autots.py +0 -312
- ads/opctl/operator/lowcode/forecast/model/base_model.py +0 -863
- ads/opctl/operator/lowcode/forecast/model/factory.py +0 -106
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +0 -492
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +0 -243
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +0 -486
- ads/opctl/operator/lowcode/forecast/model/prophet.py +0 -445
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +0 -244
- ads/opctl/operator/lowcode/forecast/operator_config.py +0 -234
- ads/opctl/operator/lowcode/forecast/schema.yaml +0 -506
- ads/opctl/operator/lowcode/forecast/utils.py +0 -413
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +0 -7
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +0 -285
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +0 -246
- ads/opctl/operator/lowcode/pii/MLoperator +0 -17
- ads/opctl/operator/lowcode/pii/README.md +0 -208
- ads/opctl/operator/lowcode/pii/__init__.py +0 -5
- ads/opctl/operator/lowcode/pii/__main__.py +0 -78
- ads/opctl/operator/lowcode/pii/cmd.py +0 -39
- ads/opctl/operator/lowcode/pii/constant.py +0 -84
- ads/opctl/operator/lowcode/pii/environment.yaml +0 -17
- ads/opctl/operator/lowcode/pii/errors.py +0 -27
- ads/opctl/operator/lowcode/pii/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/pii/model/factory.py +0 -82
- ads/opctl/operator/lowcode/pii/model/guardrails.py +0 -167
- ads/opctl/operator/lowcode/pii/model/pii.py +0 -145
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +0 -34
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +0 -34
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +0 -35
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +0 -225
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +0 -73
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +0 -26
- ads/opctl/operator/lowcode/pii/model/report.py +0 -487
- ads/opctl/operator/lowcode/pii/operator_config.py +0 -95
- ads/opctl/operator/lowcode/pii/schema.yaml +0 -108
- ads/opctl/operator/lowcode/pii/utils.py +0 -43
- ads/opctl/operator/lowcode/recommender/MLoperator +0 -16
- ads/opctl/operator/lowcode/recommender/README.md +0 -206
- ads/opctl/operator/lowcode/recommender/__init__.py +0 -5
- ads/opctl/operator/lowcode/recommender/__main__.py +0 -82
- ads/opctl/operator/lowcode/recommender/cmd.py +0 -33
- ads/opctl/operator/lowcode/recommender/constant.py +0 -30
- ads/opctl/operator/lowcode/recommender/environment.yaml +0 -11
- ads/opctl/operator/lowcode/recommender/model/base_model.py +0 -212
- ads/opctl/operator/lowcode/recommender/model/factory.py +0 -56
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +0 -25
- ads/opctl/operator/lowcode/recommender/model/svd.py +0 -106
- ads/opctl/operator/lowcode/recommender/operator_config.py +0 -81
- ads/opctl/operator/lowcode/recommender/schema.yaml +0 -265
- ads/opctl/operator/lowcode/recommender/utils.py +0 -13
- ads/opctl/operator/runtime/__init__.py +0 -5
- ads/opctl/operator/runtime/const.py +0 -17
- ads/opctl/operator/runtime/container_runtime_schema.yaml +0 -50
- ads/opctl/operator/runtime/marketplace_runtime.py +0 -50
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +0 -21
- ads/opctl/operator/runtime/python_runtime_schema.yaml +0 -21
- ads/opctl/operator/runtime/runtime.py +0 -115
- ads/opctl/schema.yaml.yml +0 -36
- ads/opctl/script.py +0 -40
- ads/opctl/spark/__init__.py +0 -5
- ads/opctl/spark/cli.py +0 -43
- ads/opctl/spark/cmds.py +0 -147
- ads/opctl/templates/diagnostic_report_template.jinja2 +0 -102
- ads/opctl/utils.py +0 -344
- ads/oracledb/__init__.py +0 -5
- ads/oracledb/oracle_db.py +0 -346
- ads/pipeline/__init__.py +0 -39
- ads/pipeline/ads_pipeline.py +0 -2279
- ads/pipeline/ads_pipeline_run.py +0 -772
- ads/pipeline/ads_pipeline_step.py +0 -605
- ads/pipeline/builders/__init__.py +0 -5
- ads/pipeline/builders/infrastructure/__init__.py +0 -5
- ads/pipeline/builders/infrastructure/custom_script.py +0 -32
- ads/pipeline/cli.py +0 -119
- ads/pipeline/extension.py +0 -291
- ads/pipeline/schema/__init__.py +0 -5
- ads/pipeline/schema/cs_step_schema.json +0 -35
- ads/pipeline/schema/ml_step_schema.json +0 -31
- ads/pipeline/schema/pipeline_schema.json +0 -71
- ads/pipeline/visualizer/__init__.py +0 -5
- ads/pipeline/visualizer/base.py +0 -570
- ads/pipeline/visualizer/graph_renderer.py +0 -272
- ads/pipeline/visualizer/text_renderer.py +0 -84
- ads/secrets/__init__.py +0 -11
- ads/secrets/adb.py +0 -386
- ads/secrets/auth_token.py +0 -86
- ads/secrets/big_data_service.py +0 -365
- ads/secrets/mysqldb.py +0 -149
- ads/secrets/oracledb.py +0 -160
- ads/secrets/secrets.py +0 -407
- ads/telemetry/__init__.py +0 -7
- ads/telemetry/base.py +0 -69
- ads/telemetry/client.py +0 -125
- ads/telemetry/telemetry.py +0 -257
- ads/templates/dataflow_pyspark.jinja2 +0 -13
- ads/templates/dataflow_sparksql.jinja2 +0 -22
- ads/templates/func.jinja2 +0 -20
- ads/templates/schemas/openapi.json +0 -1740
- ads/templates/score-pkl.jinja2 +0 -173
- ads/templates/score.jinja2 +0 -322
- ads/templates/score_embedding_onnx.jinja2 +0 -202
- ads/templates/score_generic.jinja2 +0 -165
- ads/templates/score_huggingface_pipeline.jinja2 +0 -217
- ads/templates/score_lightgbm.jinja2 +0 -185
- ads/templates/score_onnx.jinja2 +0 -407
- ads/templates/score_onnx_new.jinja2 +0 -473
- ads/templates/score_oracle_automl.jinja2 +0 -185
- ads/templates/score_pyspark.jinja2 +0 -154
- ads/templates/score_pytorch.jinja2 +0 -219
- ads/templates/score_scikit-learn.jinja2 +0 -184
- ads/templates/score_tensorflow.jinja2 +0 -184
- ads/templates/score_xgboost.jinja2 +0 -178
- ads/text_dataset/__init__.py +0 -5
- ads/text_dataset/backends.py +0 -211
- ads/text_dataset/dataset.py +0 -445
- ads/text_dataset/extractor.py +0 -207
- ads/text_dataset/options.py +0 -53
- ads/text_dataset/udfs.py +0 -22
- ads/text_dataset/utils.py +0 -49
- ads/type_discovery/__init__.py +0 -9
- ads/type_discovery/abstract_detector.py +0 -21
- ads/type_discovery/constant_detector.py +0 -41
- ads/type_discovery/continuous_detector.py +0 -54
- ads/type_discovery/credit_card_detector.py +0 -99
- ads/type_discovery/datetime_detector.py +0 -92
- ads/type_discovery/discrete_detector.py +0 -118
- ads/type_discovery/document_detector.py +0 -146
- ads/type_discovery/ip_detector.py +0 -68
- ads/type_discovery/latlon_detector.py +0 -90
- ads/type_discovery/phone_number_detector.py +0 -63
- ads/type_discovery/type_discovery_driver.py +0 -87
- ads/type_discovery/typed_feature.py +0 -594
- ads/type_discovery/unknown_detector.py +0 -41
- ads/type_discovery/zipcode_detector.py +0 -48
- ads/vault/__init__.py +0 -7
- ads/vault/vault.py +0 -237
- oracle_ads-2.13.8.dist-info/RECORD +0 -858
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/licenses/LICENSE.txt +0 -0
ads/evaluations/evaluator.py
DELETED
@@ -1,1334 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8; -*-
|
3
|
-
|
4
|
-
# Copyright (c) 2020, 2023 Oracle and/or its affiliates.
|
5
|
-
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
-
|
7
|
-
from cycler import cycler
|
8
|
-
import logging
|
9
|
-
import matplotlib as mpl
|
10
|
-
import numpy as np
|
11
|
-
from numpy.typing import ArrayLike
|
12
|
-
import pandas as pd
|
13
|
-
import re
|
14
|
-
from sklearn.preprocessing import LabelEncoder
|
15
|
-
import tempfile
|
16
|
-
from typing import List, Any
|
17
|
-
|
18
|
-
logging.getLogger("matplotlib").setLevel(logging.WARNING)
|
19
|
-
mpl.rcParams["image.cmap"] = "BuGn"
|
20
|
-
mpl.rcParams["axes.prop_cycle"] = cycler(
|
21
|
-
color=["teal", "blueviolet", "forestgreen", "peru", "y", "dodgerblue", "r"]
|
22
|
-
)
|
23
|
-
|
24
|
-
from ads.common.data import ADSData
|
25
|
-
from ads.common.decorator.runtime_dependency import (
|
26
|
-
runtime_dependency,
|
27
|
-
OptionalDependency,
|
28
|
-
)
|
29
|
-
from ads.common.decorator.deprecate import deprecated
|
30
|
-
from ads.common import logger
|
31
|
-
from ads.common.model import ADSModel
|
32
|
-
from ads.common.model_metadata import UseCaseType
|
33
|
-
from ads.dataset.dataset_with_target import ADSDatasetWithTarget
|
34
|
-
from ads.evaluations.evaluation_plot import EvaluationPlot
|
35
|
-
from ads.evaluations.statistical_metrics import (
|
36
|
-
ModelEvaluator,
|
37
|
-
DEFAULT_BIN_CLASS_METRICS,
|
38
|
-
DEFAULT_MULTI_CLASS_METRICS,
|
39
|
-
DEFAULT_REG_METRICS,
|
40
|
-
DEFAULT_BIN_CLASS_LABELS_MAP,
|
41
|
-
DEFAULT_MULTI_CLASS_LABELS_MAP,
|
42
|
-
DEFAULT_REG_LABELS_MAP,
|
43
|
-
)
|
44
|
-
from ads.model.generic_model import GenericModel, VERIFY_STATUS_NAME
|
45
|
-
|
46
|
-
METRICS_TO_MINIMIZE = ["hamming_loss", "hinge_loss", "mse", "mae"]
|
47
|
-
POSITIVE_CLASS_NAMES = ["yes", "y", "t", "true", "1"]
|
48
|
-
|
49
|
-
|
50
|
-
class Evaluator(object):
|
51
|
-
"""
|
52
|
-
BETA FEATURE
|
53
|
-
Evaluator is the new and preferred way to evaluate a model of list of models.
|
54
|
-
It contains a superset of the features of the soon-to-be-deprecated ADSEvaluator.
|
55
|
-
|
56
|
-
Methods
|
57
|
-
-------
|
58
|
-
display()
|
59
|
-
Shows all plots and metrics within the jupyter notebook.
|
60
|
-
html()
|
61
|
-
Returns the raw string of the html report
|
62
|
-
save(filename)
|
63
|
-
Saves the html report to the provided file location.
|
64
|
-
add_model(model)
|
65
|
-
Adds a model to the existsing report. See documentation for more details.
|
66
|
-
add_metric(metric_fn)
|
67
|
-
Adds a metric to the existsing report. See documentation for more details.
|
68
|
-
add_plot(plotting_fn)
|
69
|
-
Adds a plot to the existing report. See documentation for more details.
|
70
|
-
|
71
|
-
"""
|
72
|
-
|
73
|
-
def __init__(
|
74
|
-
self,
|
75
|
-
models: List[GenericModel],
|
76
|
-
X: ArrayLike,
|
77
|
-
y: ArrayLike,
|
78
|
-
y_preds: List[ArrayLike] = None,
|
79
|
-
y_scores: List[ArrayLike] = None,
|
80
|
-
X_train: ArrayLike = None,
|
81
|
-
y_train: ArrayLike = None,
|
82
|
-
classes: List = None,
|
83
|
-
positive_class: str = None,
|
84
|
-
legend_labels: dict = None,
|
85
|
-
use_case_type: UseCaseType = None,
|
86
|
-
):
|
87
|
-
"""Creates an ads evaluator object.
|
88
|
-
|
89
|
-
Parameters
|
90
|
-
----------
|
91
|
-
models : ads.model.GenericModel instance
|
92
|
-
Test data to evaluate model on.
|
93
|
-
The object can be built using from one of the framworks supported in `ads.model.framework`
|
94
|
-
X : DataFrame-like
|
95
|
-
The data used to make a prediction.
|
96
|
-
Can be set to None if `y_preds` is given. (And `y_scores` for more thorough analysis).
|
97
|
-
y : array-like
|
98
|
-
The true values corresponding to the input data
|
99
|
-
y_preds : list of array-like, optional
|
100
|
-
The predictions from each model in the same order as the models
|
101
|
-
y_scores : list of array-like, optional
|
102
|
-
The predict_probas from each model in the same order as the models
|
103
|
-
X_train : DataFrame-like, optional
|
104
|
-
The data used to train the model
|
105
|
-
y_train : array-like, optional
|
106
|
-
The true values corresponding to the input training data
|
107
|
-
positive_class : str or int, optional
|
108
|
-
The class to report metrics for binary dataset. If the target classes is True or False,
|
109
|
-
positive_class will be set to True by default. If the dataset is multiclass or multilabel,
|
110
|
-
this will be ignored.
|
111
|
-
legend_labels : dict, optional
|
112
|
-
List of legend labels. Defaults to `None`.
|
113
|
-
If legend_labels not specified class names will be used for plots.
|
114
|
-
classes : List or None, optional
|
115
|
-
A List of the possible labels for y, when evaluating a classification use case
|
116
|
-
use_case_type : str, optional
|
117
|
-
The type of problem this model is solving. This can be set during `prepare()`.
|
118
|
-
Examples: "binary_classification", "regression", "multinomial_classification"
|
119
|
-
Full list of supported types can be found here: `ads.common.model_metadata.UseCaseType`
|
120
|
-
|
121
|
-
Examples
|
122
|
-
--------
|
123
|
-
>>> import tempfile
|
124
|
-
>>> from ads.evaluations.evaluator import Evaluator
|
125
|
-
>>> from sklearn.tree import DecisionTreeClassifier
|
126
|
-
>>> from sklearn.datasets import make_classification
|
127
|
-
>>> from sklearn.model_selection import train_test_split
|
128
|
-
>>> from ads.model.framework.sklearn_model import SklearnModel
|
129
|
-
>>> from ads.common.model_metadata import UseCaseType
|
130
|
-
>>>
|
131
|
-
>>> X, y = make_classification(n_samples=1000)
|
132
|
-
>>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
|
133
|
-
>>> est = DecisionTreeClassifier().fit(X_train, y_train)
|
134
|
-
>>> model = SklearnModel(estimator=est, artifact_dir=tempfile.mkdtemp())
|
135
|
-
>>> model.prepare(
|
136
|
-
inference_conda_env="generalml_p38_cpu_v1",
|
137
|
-
training_conda_env="generalml_p38_cpu_v1",
|
138
|
-
X_sample=X_test,
|
139
|
-
y_sample=y_test,
|
140
|
-
use_case_type=UseCaseType.BINARY_CLASSIFICATION,
|
141
|
-
)
|
142
|
-
>>> report = Evaluator([my_model], X=X_test, y=y_test)
|
143
|
-
>>> report.display()
|
144
|
-
|
145
|
-
"""
|
146
|
-
self._verify_models(models)
|
147
|
-
self.X, self.y, self.X_train, self.y_train = X, y, X_train, y_train
|
148
|
-
self.legend_labels = legend_labels
|
149
|
-
self.positive_class = positive_class
|
150
|
-
|
151
|
-
self._determine_problem_type(models, use_case_type)
|
152
|
-
self._determine_classes(classes)
|
153
|
-
|
154
|
-
self.model_names = []
|
155
|
-
self.evaluation = pd.DataFrame()
|
156
|
-
self.add_models(models, y_preds=y_preds, y_scores=y_scores)
|
157
|
-
|
158
|
-
def _verify_models(self, models):
|
159
|
-
assert isinstance(
|
160
|
-
models, list
|
161
|
-
), f"The `models` argument must be a list of models, instead got: {models}"
|
162
|
-
for m in models:
|
163
|
-
if not isinstance(m, GenericModel):
|
164
|
-
raise ValueError(
|
165
|
-
f"Please register and prepare model {m} with ads. More information here: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_registration/introduction.html#register"
|
166
|
-
)
|
167
|
-
sum_stat = m.summary_status().reset_index()
|
168
|
-
if (
|
169
|
-
sum_stat.loc[sum_stat["Step"] == VERIFY_STATUS_NAME, "Status"]
|
170
|
-
== "Not Available"
|
171
|
-
).any():
|
172
|
-
raise ValueError(
|
173
|
-
f"Model {m} has not been prepared, and `verify` cannot be run (including the pre and post processing from the score.py). This may cause issues. Prepare the model in accordance with the documentation: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_registration/model_artifact.html#prepare-the-model-artifact"
|
174
|
-
)
|
175
|
-
|
176
|
-
def _determine_problem_type(self, models, use_case_type):
|
177
|
-
if use_case_type is not None:
|
178
|
-
self.problem_type = use_case_type
|
179
|
-
problem_type = models[0].metadata_taxonomy["UseCaseType"].value
|
180
|
-
if problem_type is not None:
|
181
|
-
for m in models:
|
182
|
-
assert (
|
183
|
-
problem_type == m.metadata_taxonomy["UseCaseType"].value
|
184
|
-
), f"Cannot compare models of different Use Case types. The first model is of type {problem_type}, while model: {m} is of Use Case type: {m.metadata_taxonomy['UseCaseType'].value}"
|
185
|
-
self.problem_type = problem_type
|
186
|
-
else:
|
187
|
-
if not models[0].schema_output.keys:
|
188
|
-
raise ValueError(
|
189
|
-
f"The Use Case Type of this model, {models[0]}, is ambigious. Please re-run Evaluator with `use_case_type` set to a valid type (full list found here: ads.common.model_metadata.UseCaseType). To avoid setting this in the future, set the `use_case_type` when preparing the model. Or update your model's use_case_type attribute here: `model.metadata_taxonomy['UseCaseType'].value` More information here: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_registration/model_metadata.html#taxonomy-metadata"
|
190
|
-
)
|
191
|
-
logger.warn(
|
192
|
-
f"The Use Case Type of this model, {models[0]}, is ambigious. Please set the `model.metadata_taxonomy['UseCaseType'].value` attribute to one of the options in ads.common.model_metadata.UseCaseType"
|
193
|
-
)
|
194
|
-
|
195
|
-
output_col = models[0].schema_output.keys[0]
|
196
|
-
if models[0].schema_output[output_col].feature_type != "Continuous":
|
197
|
-
if len(np.unique(self.y)) == 2:
|
198
|
-
self.problem_type = UseCaseType.BINARY_CLASSIFICATION
|
199
|
-
else:
|
200
|
-
self.problem_type = UseCaseType.MULTINOMIAL_CLASSIFICATION
|
201
|
-
else:
|
202
|
-
self.problem_type = UseCaseType.REGRESSION
|
203
|
-
logger.info(f"Set Use Case Type to: {self.problem_type}")
|
204
|
-
|
205
|
-
def _determine_classes(self, classes):
|
206
|
-
if self.problem_type in [UseCaseType.REGRESSION]:
|
207
|
-
self.classes = []
|
208
|
-
self.metrics_to_show = DEFAULT_REG_METRICS
|
209
|
-
self.is_classifier = False
|
210
|
-
else:
|
211
|
-
self.is_classifier = True
|
212
|
-
self.classes = (
|
213
|
-
classes or np.unique(self.y_train)
|
214
|
-
if self.y_train is not None
|
215
|
-
else np.unique(self.y)
|
216
|
-
)
|
217
|
-
self.num_classes = len(self.classes)
|
218
|
-
if len(self.classes) == 2:
|
219
|
-
self.metrics_to_show = DEFAULT_BIN_CLASS_METRICS
|
220
|
-
if (
|
221
|
-
self.positive_class is None
|
222
|
-
or self.positive_class not in self.classes
|
223
|
-
):
|
224
|
-
self.positive_class = next(
|
225
|
-
(
|
226
|
-
x
|
227
|
-
for x in list(self.classes)
|
228
|
-
if str(x).lower() in POSITIVE_CLASS_NAMES
|
229
|
-
),
|
230
|
-
self.classes[0],
|
231
|
-
)
|
232
|
-
logger.info(
|
233
|
-
f"Using {self.positive_class} as the positive class. Use `positive_class` to set this value."
|
234
|
-
)
|
235
|
-
else:
|
236
|
-
self.metrics_to_show = DEFAULT_MULTI_CLASS_METRICS
|
237
|
-
|
238
|
-
def _get_model_name(self, model):
|
239
|
-
name = str(model.algorithm) + "_" + str(model.framework)
|
240
|
-
name_edit = re.sub(r" ?\([^)]+\)", "", name)
|
241
|
-
if name_edit in self.model_names:
|
242
|
-
name_edit += "_1"
|
243
|
-
num_tries = 1
|
244
|
-
while name_edit in self.model_names:
|
245
|
-
num_tries += 1
|
246
|
-
name_edit = name_edit[:-1] + str(num_tries)
|
247
|
-
self.model_names.append(name_edit)
|
248
|
-
return name_edit
|
249
|
-
|
250
|
-
def _score_data(self, model, X):
|
251
|
-
y_pred = model.verify(X)["prediction"]
|
252
|
-
|
253
|
-
y_score = None
|
254
|
-
# we will compute y_score only for binary classification cases because only for binary classification can
|
255
|
-
# we use it for ROC Curves and AUC
|
256
|
-
if self.is_classifier and hasattr(model.estimator, "predict_proba"):
|
257
|
-
if len(self.classes) == 2:
|
258
|
-
# positive label index is assumed to be 0 if the ADSModel does not have a positive class defined
|
259
|
-
positive_class_index = 0
|
260
|
-
# For prediction probability, we only consider the positive class.
|
261
|
-
if self.positive_class is not None:
|
262
|
-
if self.positive_class not in list(self.classes):
|
263
|
-
raise ValueError(
|
264
|
-
"Invalid positive class '%s' for model %s. Positive class should be one of %s."
|
265
|
-
% (
|
266
|
-
self.positive_class,
|
267
|
-
model.estimator.__class__.__name__,
|
268
|
-
list(self.classes),
|
269
|
-
)
|
270
|
-
)
|
271
|
-
positive_class_index = list(self.classes).index(self.positive_class)
|
272
|
-
y_score = model.estimator.predict_proba(X)[:, positive_class_index]
|
273
|
-
else:
|
274
|
-
y_score = model.estimator.predict_proba(X)
|
275
|
-
return y_pred, y_score
|
276
|
-
|
277
|
-
def add_models(
|
278
|
-
self,
|
279
|
-
models: List[GenericModel],
|
280
|
-
y_preds: List[Any] = None,
|
281
|
-
y_scores: List[Any] = None,
|
282
|
-
):
|
283
|
-
"""Add a model to an existing Evaluator to avoid re-calculating the values.
|
284
|
-
|
285
|
-
Parameters
|
286
|
-
----------
|
287
|
-
models : List[ads.model.GenericModel]
|
288
|
-
Test data to evaluate model on.
|
289
|
-
The object can be built using from one of the framworks supported in `ads.model.framework`
|
290
|
-
y_preds : list of array-like, optional
|
291
|
-
The predictions from each model in the same order as the models
|
292
|
-
y_scores : list of array-like, optional
|
293
|
-
The predict_probas from each model in the same order as the models
|
294
|
-
|
295
|
-
Returns
|
296
|
-
-------
|
297
|
-
self
|
298
|
-
|
299
|
-
Examples
|
300
|
-
--------
|
301
|
-
>>> evaluator = Evaluator(models = [model1, model2], X=X, y=y)
|
302
|
-
>>> evaluator.add_models(models = [model3])
|
303
|
-
"""
|
304
|
-
|
305
|
-
assert isinstance(models, List), "The `models` parameter must be of type list."
|
306
|
-
if self.is_classifier:
|
307
|
-
self._le = LabelEncoder().fit(self.y)
|
308
|
-
for i, m in enumerate(models):
|
309
|
-
m_name = self._get_model_name(m)
|
310
|
-
|
311
|
-
if y_preds is None:
|
312
|
-
y_pred, y_score = self._score_data(m, self.X)
|
313
|
-
else:
|
314
|
-
y_pred = y_preds[i]
|
315
|
-
y_score = y_scores[i] if isinstance(y_scores, list) else None
|
316
|
-
if self.is_classifier:
|
317
|
-
y_true, y_pred = self._le.transform(self.y), self._le.transform(y_pred)
|
318
|
-
classes = self._le.transform(self.classes)
|
319
|
-
pos_class = None
|
320
|
-
if len(self.classes) == 2:
|
321
|
-
pos_class = self._le.transform([self.positive_class])[0]
|
322
|
-
else:
|
323
|
-
y_true, y_pred, classes, pos_class = self.y, y_pred, None, None
|
324
|
-
new_model_metrics = ModelEvaluator(
|
325
|
-
y_true=y_true,
|
326
|
-
y_pred=y_pred,
|
327
|
-
model_name=m_name,
|
328
|
-
classes=classes,
|
329
|
-
y_score=y_score,
|
330
|
-
positive_class=pos_class,
|
331
|
-
).get_metrics()
|
332
|
-
self.evaluation = pd.concat(
|
333
|
-
[self.evaluation, new_model_metrics], axis=1, sort=False
|
334
|
-
)
|
335
|
-
return self
|
336
|
-
|
337
|
-
@runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
|
338
|
-
@runtime_dependency(
|
339
|
-
module="ipywidgets",
|
340
|
-
object="HTML",
|
341
|
-
install_from=OptionalDependency.NOTEBOOK,
|
342
|
-
)
|
343
|
-
def display(
|
344
|
-
self,
|
345
|
-
plots=None,
|
346
|
-
perfect=False,
|
347
|
-
baseline=True,
|
348
|
-
legend_labels=None,
|
349
|
-
precision=4,
|
350
|
-
metrics_labels=None,
|
351
|
-
):
|
352
|
-
"""Visualize evaluation report.
|
353
|
-
|
354
|
-
Parameters
|
355
|
-
----------
|
356
|
-
plots : list, optional
|
357
|
-
Filter the plots that are displayed. Defaults to None. The name of the plots are as below:
|
358
|
-
|
359
|
-
- regression - residuals_qq, residuals_vs_fitted
|
360
|
-
- binary classification - normalized_confusion_matrix, roc_curve, pr_curve
|
361
|
-
- multi class classification - normalized_confusion_matrix, precision_by_label, recall_by_label, f1_by_label
|
362
|
-
perfect: bool, optional (default False)
|
363
|
-
If True, will show how a perfect classifier would perform.
|
364
|
-
baseline: bool, optional (default True)
|
365
|
-
If True, will show how a random classifier would perform.
|
366
|
-
legend_labels : dict, optional
|
367
|
-
Rename legend labels, that used for multi class classification plots. Defaults to None.
|
368
|
-
legend_labels dict keys are the same as class names. legend_labels dict values are strings.
|
369
|
-
If legend_labels not specified class names will be used for plots.
|
370
|
-
precision: int, optional (default 4)
|
371
|
-
The number of decimal points to show for each score/loss value
|
372
|
-
metrics_labels: List, optional
|
373
|
-
The metrics that should be included in the html table.
|
374
|
-
|
375
|
-
Returns
|
376
|
-
-------
|
377
|
-
None
|
378
|
-
Nothing. Outputs several evaluation plots as specified by `plots`.
|
379
|
-
|
380
|
-
Examples
|
381
|
-
--------
|
382
|
-
|
383
|
-
>>> evaluator = Evaluator(models=[model1, model2], X=X, y=y)
|
384
|
-
>>> evaluator.display()
|
385
|
-
|
386
|
-
>>> legend_labels={'class_0': 'green', 'class_1': 'yellow', 'class_2': 'red'}
|
387
|
-
>>> multi_evaluator = Evaluator(models=[model1, model2], X=X, y=y, legend_labels=legend_labels)
|
388
|
-
>>> multi_evaluator.display(plots=["normalized_confusion_matrix",
|
389
|
-
... "precision_by_label", "recall_by_label", "f1_by_label"])
|
390
|
-
"""
|
391
|
-
from IPython.core.display import display, HTML
|
392
|
-
|
393
|
-
legend_labels = (
|
394
|
-
legend_labels if legend_labels is not None else self.legend_labels
|
395
|
-
)
|
396
|
-
if legend_labels is None and self.is_classifier:
|
397
|
-
legend_labels = dict(
|
398
|
-
zip([str(x) for x in self._le.transform(self.classes)], self.classes)
|
399
|
-
)
|
400
|
-
# pass to plotting class
|
401
|
-
self._get_plots_html(
|
402
|
-
plots=plots, perfect=perfect, baseline=baseline, legend_labels=legend_labels
|
403
|
-
)
|
404
|
-
display(
|
405
|
-
HTML(self._get_metrics_html(precision=precision, labels=metrics_labels))
|
406
|
-
)
|
407
|
-
|
408
|
-
def html(
|
409
|
-
self,
|
410
|
-
plots=None,
|
411
|
-
perfect=False,
|
412
|
-
baseline=True,
|
413
|
-
legend_labels=None,
|
414
|
-
precision=4,
|
415
|
-
metrics_labels=None,
|
416
|
-
):
|
417
|
-
"""Get raw HTML report.
|
418
|
-
|
419
|
-
Parameters
|
420
|
-
----------
|
421
|
-
plots : list, optional
|
422
|
-
Filter the plots that are displayed. Defaults to None. The name of the plots are as below:
|
423
|
-
|
424
|
-
- regression - residuals_qq, residuals_vs_fitted
|
425
|
-
- binary classification - normalized_confusion_matrix, roc_curve, pr_curve
|
426
|
-
- multi class classification - normalized_confusion_matrix, precision_by_label, recall_by_label, f1_by_label
|
427
|
-
perfect: bool, optional (default False)
|
428
|
-
If True, will show how a perfect classifier would perform.
|
429
|
-
baseline: bool, optional (default True)
|
430
|
-
If True, will show how a random classifier would perform.
|
431
|
-
legend_labels : dict, optional
|
432
|
-
Rename legend labels, that used for multi class classification plots. Defaults to None.
|
433
|
-
legend_labels dict keys are the same as class names. legend_labels dict values are strings.
|
434
|
-
If legend_labels not specified class names will be used for plots.
|
435
|
-
precision: int, optional (default 4)
|
436
|
-
The number of decimal points to show for each score/loss value
|
437
|
-
metrics_labels: List, optional
|
438
|
-
The metrics that should be included in the html table.
|
439
|
-
|
440
|
-
Returns
|
441
|
-
-------
|
442
|
-
None
|
443
|
-
Nothing. Outputs several evaluation plots as specified by `plots`.
|
444
|
-
|
445
|
-
Examples
|
446
|
-
--------
|
447
|
-
|
448
|
-
>>> evaluator = Evaluator(models=[model1, model2], X=X, y=y)
|
449
|
-
>>> raw_html = evaluator.html()
|
450
|
-
"""
|
451
|
-
html_plots = self._get_plots_html(
|
452
|
-
plots=plots, perfect=perfect, baseline=baseline, legend_labels=legend_labels
|
453
|
-
)
|
454
|
-
html_metrics = self._get_metrics_html(
|
455
|
-
precision=precision, labels=metrics_labels
|
456
|
-
)
|
457
|
-
html_raw = (
|
458
|
-
"<h1>Evaluation Report</h1> \
|
459
|
-
<h2>Evaluation Plots</h2> "
|
460
|
-
+ " \
|
461
|
-
".join(
|
462
|
-
html_plots
|
463
|
-
)
|
464
|
-
+ f"<h2>Evaluation Metrics</h2> \
|
465
|
-
<p> {html_metrics} </p>"
|
466
|
-
)
|
467
|
-
return html_raw
|
468
|
-
|
469
|
-
def save(self, filename: str, **kwargs):
|
470
|
-
"""Save HTML report.
|
471
|
-
|
472
|
-
Parameters
|
473
|
-
----------
|
474
|
-
filename: str
|
475
|
-
The name and path of where to save the html report.
|
476
|
-
plots : list, optional
|
477
|
-
Filter the plots that are displayed. Defaults to None. The name of the plots are as below:
|
478
|
-
|
479
|
-
- regression - residuals_qq, residuals_vs_fitted
|
480
|
-
- binary classification - normalized_confusion_matrix, roc_curve, pr_curve
|
481
|
-
- multi class classification - normalized_confusion_matrix, precision_by_label, recall_by_label, f1_by_label
|
482
|
-
perfect: bool, optional (default False)
|
483
|
-
If True, will show how a perfect classifier would perform.
|
484
|
-
baseline: bool, optional (default True)
|
485
|
-
If True, will show how a random classifier would perform.
|
486
|
-
legend_labels : dict, optional
|
487
|
-
Rename legend labels, that used for multi class classification plots. Defaults to None.
|
488
|
-
legend_labels dict keys are the same as class names. legend_labels dict values are strings.
|
489
|
-
If legend_labels not specified class names will be used for plots.
|
490
|
-
precision: int, optional (default 4)
|
491
|
-
The number of decimal points to show for each score/loss value
|
492
|
-
metrics_labels: List, optional
|
493
|
-
The metrics that should be included in the html table.
|
494
|
-
|
495
|
-
Returns
|
496
|
-
-------
|
497
|
-
None
|
498
|
-
Nothing. Outputs several evaluation plots as specified by `plots`.
|
499
|
-
|
500
|
-
Examples
|
501
|
-
--------
|
502
|
-
|
503
|
-
>>> evaluator = Evaluator(models=[model1, model2], X=X, y=y)
|
504
|
-
>>> evaluator.save("report.html")
|
505
|
-
"""
|
506
|
-
raw_html = self.html(**kwargs)
|
507
|
-
with open(filename, "w") as f:
|
508
|
-
f.write(raw_html)
|
509
|
-
|
510
|
-
def _get_plots_html(
|
511
|
-
self,
|
512
|
-
plots=None,
|
513
|
-
perfect=False,
|
514
|
-
baseline=True,
|
515
|
-
legend_labels=None,
|
516
|
-
):
|
517
|
-
return EvaluationPlot.plot(
|
518
|
-
self.evaluation, plots, len(self.classes), perfect, baseline, legend_labels
|
519
|
-
)
|
520
|
-
|
521
|
-
def _get_metrics_html(self, precision=4, labels=None):
|
522
|
-
def highlight_max(s):
|
523
|
-
"""Highlight the maximum in a Series yellow.
|
524
|
-
|
525
|
-
Parameters
|
526
|
-
----------
|
527
|
-
s : series object
|
528
|
-
the series being evaluated
|
529
|
-
|
530
|
-
Returns
|
531
|
-
-------
|
532
|
-
list
|
533
|
-
containing background color data or empty if not max
|
534
|
-
"""
|
535
|
-
if s.name not in METRICS_TO_MINIMIZE:
|
536
|
-
is_max = s == s.max()
|
537
|
-
else:
|
538
|
-
is_max = s == s.min()
|
539
|
-
return ["background-color: lightgreen" if v else "" for v in is_max]
|
540
|
-
|
541
|
-
def _pretty_label(df, labels, copy=True):
|
542
|
-
"""
|
543
|
-
Output specified labels in proper format.
|
544
|
-
If the labels are provided in then used them. Otherwise, use default.
|
545
|
-
|
546
|
-
Parameters
|
547
|
-
----------
|
548
|
-
labels : dictionary
|
549
|
-
map printing specific labels for metrics display
|
550
|
-
|
551
|
-
Returns
|
552
|
-
-------
|
553
|
-
dataframe
|
554
|
-
dataframe with index names modified according to input labels
|
555
|
-
"""
|
556
|
-
df_display = df.loc[list(labels.keys())]
|
557
|
-
|
558
|
-
if copy:
|
559
|
-
df_display = df_display.copy()
|
560
|
-
for k, v in labels.items():
|
561
|
-
df_display.rename(index={k: v}, inplace=True)
|
562
|
-
return df_display
|
563
|
-
|
564
|
-
if labels is None:
|
565
|
-
if self.is_classifier:
|
566
|
-
if len(self.classes) == 2:
|
567
|
-
labels = DEFAULT_BIN_CLASS_LABELS_MAP
|
568
|
-
else:
|
569
|
-
labels = DEFAULT_MULTI_CLASS_LABELS_MAP
|
570
|
-
else:
|
571
|
-
labels = DEFAULT_REG_LABELS_MAP
|
572
|
-
html_raw = (
|
573
|
-
_pretty_label(self.evaluation, labels)
|
574
|
-
.style.apply(highlight_max, axis=1)
|
575
|
-
.format(precision=precision)
|
576
|
-
.set_properties(**{"text-align": "center"})
|
577
|
-
.set_table_attributes("class=table")
|
578
|
-
.set_caption(
|
579
|
-
'<div align="left"><b style="font-size:20px;">'
|
580
|
-
+ "Evaluation Metrics:</b></div>"
|
581
|
-
)
|
582
|
-
.to_html()
|
583
|
-
)
|
584
|
-
return html_raw
|
585
|
-
|
586
|
-
|
587
|
-
class ADSEvaluator(object):
|
588
|
-
"""ADS Evaluator class. This class holds field and methods for creating and using
|
589
|
-
ADS evaluator objects.
|
590
|
-
|
591
|
-
Attributes
|
592
|
-
----------
|
593
|
-
evaluations : list[DataFrame]
|
594
|
-
list of evaluations.
|
595
|
-
is_classifier : bool
|
596
|
-
Whether the dataset looks like a classification problem (versus regression).
|
597
|
-
legend_labels : dict
|
598
|
-
List of legend labels. Defaults to `None`.
|
599
|
-
metrics_to_show : list[str]
|
600
|
-
Names of metrics to show.
|
601
|
-
models : list[ads.common.model.ADSModel]
|
602
|
-
The object built using `ADSModel.from_estimator()`.
|
603
|
-
positive_class : str or int
|
604
|
-
The class to report metrics for binary dataset, assumed to be true.
|
605
|
-
show_full_name :bool
|
606
|
-
Whether to show the name of the evaluator in relevant contexts.
|
607
|
-
test_data : ads.common.data.ADSData
|
608
|
-
Test data to evaluate model on.
|
609
|
-
training_data : ads.common.data.ADSData
|
610
|
-
Training data to evaluate model.
|
611
|
-
|
612
|
-
Positive_Class_names : list
|
613
|
-
Class attribute listing the ways to represent positive classes
|
614
|
-
|
615
|
-
Methods
|
616
|
-
-------
|
617
|
-
add_metrics(func, names)
|
618
|
-
Adds the listed metics to the evaluator it is called on
|
619
|
-
del_metrics(names)
|
620
|
-
Removes listed metrics from the evaluator object it is called on
|
621
|
-
add_models(models, show_full_name)
|
622
|
-
Adds the listed models to the evaluator object
|
623
|
-
del_models(names)
|
624
|
-
Removes the listed models from the evaluator object
|
625
|
-
show_in_notebook(plots, use_training_data, perfect, baseline, legend_labels)
|
626
|
-
Visualize evalutation plots in the notebook
|
627
|
-
calculate_cost(tn_weight, fp_weight, fn_weight, tp_weight, use_training_data)
|
628
|
-
Returns a cost associated with the input weights
|
629
|
-
"""
|
630
|
-
|
631
|
-
Positive_Class_Names = ["yes", "y", "t", "true", "1"]
|
632
|
-
|
633
|
-
def __init__(
|
634
|
-
self,
|
635
|
-
test_data,
|
636
|
-
models,
|
637
|
-
training_data=None,
|
638
|
-
positive_class=None,
|
639
|
-
legend_labels=None,
|
640
|
-
show_full_name=False,
|
641
|
-
classes=None,
|
642
|
-
classification_threshold=50,
|
643
|
-
):
|
644
|
-
"""Creates an ads evaluator object.
|
645
|
-
|
646
|
-
Parameters
|
647
|
-
----------
|
648
|
-
test_data : ads.common.data.ADSData instance
|
649
|
-
Test data to evaluate model on.
|
650
|
-
The object can be built using `ADSData.build()`.
|
651
|
-
models : list[ads.common.model.ADSModel]
|
652
|
-
The object can be built using `ADSModel.from_estimator()`.
|
653
|
-
Maximum length of the list is 3
|
654
|
-
training_data : ads.common.data.ADSData instance, optional
|
655
|
-
Training data to evaluate model on and compare metrics against test data.
|
656
|
-
The object can be built using `ADSData.build()`
|
657
|
-
positive_class : str or int, optional
|
658
|
-
The class to report metrics for binary dataset. If the target classes is True or False,
|
659
|
-
positive_class will be set to True by default. If the dataset is multiclass or multilabel,
|
660
|
-
this will be ignored.
|
661
|
-
legend_labels : dict, optional
|
662
|
-
List of legend labels. Defaults to `None`.
|
663
|
-
If legend_labels not specified class names will be used for plots.
|
664
|
-
show_full_name : bool, optional
|
665
|
-
Show the name of the evaluator object. Defaults to `False`.
|
666
|
-
classes : List or None, optional
|
667
|
-
A List of the possible labels for y, when evaluating a classification use case
|
668
|
-
classification_threshold : int, defaults to 50
|
669
|
-
The maximum number of unique values that y must have to qualify as classification.
|
670
|
-
If this threshold is exceeded, Evaluator assumes the model is regression.
|
671
|
-
|
672
|
-
Examples
|
673
|
-
--------
|
674
|
-
|
675
|
-
>>> train, test = ds.train_test_split()
|
676
|
-
>>> model1 = MyModelClass1.train(train)
|
677
|
-
>>> model2 = MyModelClass2.train(train)
|
678
|
-
>>> evaluator = ADSEvaluator(test, [model1, model2])
|
679
|
-
|
680
|
-
>>> legend_labels={'class_0': 'one', 'class_1': 'two', 'class_2': 'three'}
|
681
|
-
>>> multi_evaluator = ADSEvaluator(test, models=[model1, model2],
|
682
|
-
... legend_labels=legend_labels)
|
683
|
-
|
684
|
-
"""
|
685
|
-
if any(isinstance(m, ADSModel) for m in models):
|
686
|
-
logger.warn(
|
687
|
-
f"ADSModel is being deprecated. Users should instead use GenericModel or one of its subclasses. More information here: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_registration/introduction.html#register"
|
688
|
-
)
|
689
|
-
self.evaluations = []
|
690
|
-
if isinstance(training_data, ADSDatasetWithTarget):
|
691
|
-
training_data, _ = training_data.train_test_split(test_size=0.0)
|
692
|
-
if isinstance(test_data, ADSDatasetWithTarget):
|
693
|
-
test_data, _ = test_data.train_test_split(test_size=0.0)
|
694
|
-
|
695
|
-
if not isinstance(test_data, ADSData):
|
696
|
-
raise ValueError(
|
697
|
-
"Expected test_data to be of type ADSData. More information here: https://accelerated-data-science.readthedocs.io/en/latest/ads.common.html#ads.common.data.ADSData"
|
698
|
-
)
|
699
|
-
if training_data and not isinstance(training_data, ADSData):
|
700
|
-
raise ValueError(
|
701
|
-
"Expected training_data to be of type ADSData. More information here: https://accelerated-data-science.readthedocs.io/en/latest/ads.common.html#ads.common.data.ADSData"
|
702
|
-
)
|
703
|
-
assert isinstance(
|
704
|
-
models, list
|
705
|
-
), "The `models` argument should be a list of GenericModels. More information here: https://accelerated-data-science.readthedocs.io/en/latest/ads.common.html#ads.common.data.ADSData"
|
706
|
-
|
707
|
-
self.test_data = test_data
|
708
|
-
self.training_data = training_data
|
709
|
-
self.classes = []
|
710
|
-
self.is_classifier = (
|
711
|
-
hasattr(models[0], "classes_") and models[0].classes_ is not None
|
712
|
-
)
|
713
|
-
pclass = positive_class
|
714
|
-
if self.is_classifier:
|
715
|
-
self.classes = list(models[0].classes_)
|
716
|
-
if len(self.classes) == 2:
|
717
|
-
self.metrics_to_show = [
|
718
|
-
"accuracy",
|
719
|
-
"hamming_loss",
|
720
|
-
"precision",
|
721
|
-
"recall",
|
722
|
-
"f1",
|
723
|
-
"auc",
|
724
|
-
]
|
725
|
-
if positive_class is None or positive_class not in self.classes:
|
726
|
-
pclass = next(
|
727
|
-
(
|
728
|
-
x
|
729
|
-
for x in list(self.classes)
|
730
|
-
if str(x).lower() in ADSEvaluator.Positive_Class_Names
|
731
|
-
),
|
732
|
-
self.classes[0],
|
733
|
-
)
|
734
|
-
logger.info(
|
735
|
-
f"Using {pclass} as the positive class. Use `positive_class` to set this value."
|
736
|
-
)
|
737
|
-
else:
|
738
|
-
# Multi-class
|
739
|
-
self.metrics_to_show = [
|
740
|
-
"accuracy",
|
741
|
-
"hamming_loss",
|
742
|
-
"precision_weighted",
|
743
|
-
"precision_micro",
|
744
|
-
"recall_weighted",
|
745
|
-
"recall_micro",
|
746
|
-
"f1_weighted",
|
747
|
-
"f1_micro",
|
748
|
-
]
|
749
|
-
else:
|
750
|
-
# Regression
|
751
|
-
self.metrics_to_show = ["r2_score", "mse", "mae"]
|
752
|
-
self.positive_class = pclass
|
753
|
-
self.legend_labels = legend_labels
|
754
|
-
|
755
|
-
for m in models:
|
756
|
-
if not (isinstance(m, ADSModel)):
|
757
|
-
try:
|
758
|
-
m = ADSModel.from_estimator(m.est)
|
759
|
-
except:
|
760
|
-
logger.info("This model cannot be converted to an ADS Model.")
|
761
|
-
self.evaluations = [pd.DataFrame(), pd.DataFrame()]
|
762
|
-
self.model_names = []
|
763
|
-
self.add_models(models, show_full_name=show_full_name)
|
764
|
-
|
765
|
-
def add_metrics(self, funcs, names):
|
766
|
-
"""Adds the listed metrics to the evaluator object it is called on.
|
767
|
-
|
768
|
-
Parameters
|
769
|
-
----------
|
770
|
-
funcs : list
|
771
|
-
The list of metrics to be added. This function will be provided `y_true`
|
772
|
-
and `y_pred`, the true and predicted values for each model.
|
773
|
-
names : list[str])
|
774
|
-
The list of metric names corresponding to the functions.
|
775
|
-
|
776
|
-
Returns
|
777
|
-
-------
|
778
|
-
Nothing
|
779
|
-
|
780
|
-
Examples
|
781
|
-
--------
|
782
|
-
>>> def f1(y_true, y_pred):
|
783
|
-
... return np.max(y_true - y_pred)
|
784
|
-
>>> evaluator = ADSEvaluator(test, [model1, model2])
|
785
|
-
>>> evaluator.add_metrics([f1], ['Max Residual'])
|
786
|
-
>>> evaluator.metrics
|
787
|
-
Output table will include the desired metric
|
788
|
-
"""
|
789
|
-
|
790
|
-
if len(funcs) != len(names):
|
791
|
-
raise ValueError("Could not find 1 unique name for each function")
|
792
|
-
for name, f in zip(names, funcs):
|
793
|
-
f_res = []
|
794
|
-
for m in self.evaluations[1].columns:
|
795
|
-
res = f(
|
796
|
-
self.evaluations[1][m]["y_true"], self.evaluations[1][m]["y_pred"]
|
797
|
-
)
|
798
|
-
f_res.append(res)
|
799
|
-
pd_res = pd.DataFrame(
|
800
|
-
[f_res], columns=self.evaluations[1].columns, index=[name]
|
801
|
-
)
|
802
|
-
self.evaluations[1] = pd.concat([self.evaluations[1], pd_res])
|
803
|
-
if self.evaluations[0].shape != (0, 0):
|
804
|
-
f_res = []
|
805
|
-
for m in self.evaluations[0].columns:
|
806
|
-
res = f(
|
807
|
-
self.evaluations[0][m]["y_true"],
|
808
|
-
self.evaluations[0][m]["y_pred"],
|
809
|
-
)
|
810
|
-
f_res.append(res)
|
811
|
-
pd_res = pd.DataFrame(
|
812
|
-
[f_res], columns=self.evaluations[0].columns, index=[name]
|
813
|
-
)
|
814
|
-
self.evaluations[0] = pd.concat([self.evaluations[0], pd_res])
|
815
|
-
if name not in self.metrics_to_show:
|
816
|
-
self.metrics_to_show.append(name)
|
817
|
-
setattr(self, "train_evaluations", self.evaluations[0])
|
818
|
-
setattr(self, "test_evaluations", self.evaluations[1])
|
819
|
-
|
820
|
-
def del_metrics(self, names):
|
821
|
-
"""Removes the listed metrics from the evaluator object it is called on.
|
822
|
-
|
823
|
-
Parameters
|
824
|
-
----------
|
825
|
-
names : list[str]
|
826
|
-
The list of names of metrics to be deleted. Names can be found by calling
|
827
|
-
`evaluator.test_evaluations.index`.
|
828
|
-
|
829
|
-
Returns
|
830
|
-
-------
|
831
|
-
None
|
832
|
-
`None`
|
833
|
-
|
834
|
-
Examples
|
835
|
-
--------
|
836
|
-
>>> evaluator = ADSEvaluator(test, [model1, model2])
|
837
|
-
>>> evaluator.del_metrics(['mse])
|
838
|
-
>>> evaluator.metrics
|
839
|
-
Output table will exclude the desired metric
|
840
|
-
"""
|
841
|
-
self.evaluations[1].drop(index=names, inplace=True)
|
842
|
-
if self.evaluations[0].shape != (0, 0):
|
843
|
-
self.evaluations[0].drop(index=names, inplace=True)
|
844
|
-
self.metrics_to_show = [met for met in self.metrics_to_show if met not in names]
|
845
|
-
|
846
|
-
def add_models(self, models, show_full_name=False):
|
847
|
-
"""Adds the listed models to the evaluator object it is called on.
|
848
|
-
|
849
|
-
Parameters
|
850
|
-
----------
|
851
|
-
models : list[ADSModel]
|
852
|
-
The list of models to be added
|
853
|
-
show_full_name : bool, optional
|
854
|
-
Whether to show the full model name. Defaults to False.
|
855
|
-
** NOT USED **
|
856
|
-
|
857
|
-
Returns
|
858
|
-
-------
|
859
|
-
Nothing
|
860
|
-
|
861
|
-
Examples
|
862
|
-
--------
|
863
|
-
>>> evaluator = ADSEvaluator(test, [model1, model2])
|
864
|
-
>>> evaluator.add_models("model3])
|
865
|
-
"""
|
866
|
-
|
867
|
-
if type(models) is list:
|
868
|
-
total_train_metrics = self.evaluations[0]
|
869
|
-
total_test_metrics = self.evaluations[1]
|
870
|
-
for i, m in enumerate(models):
|
871
|
-
# if hasattr(m, 'classes_') != self.is_classifier:
|
872
|
-
# raise ValueError("All models should belong to same problem type.")
|
873
|
-
# calculate evaluations on testing and training data (if X_train is not None)
|
874
|
-
m_name = self._get_model_name(m.name)
|
875
|
-
|
876
|
-
if self.training_data is not None:
|
877
|
-
y_pred, y_score = self._score_data(m, self.training_data.X)
|
878
|
-
train_metrics = ModelEvaluator(
|
879
|
-
y_true=self.training_data.y,
|
880
|
-
y_pred=y_pred,
|
881
|
-
model_name=m_name,
|
882
|
-
classes=m.classes_ if self.is_classifier else None,
|
883
|
-
y_score=y_score,
|
884
|
-
positive_class=self.positive_class,
|
885
|
-
).get_metrics()
|
886
|
-
total_train_metrics = pd.concat(
|
887
|
-
[total_train_metrics, train_metrics], axis=1
|
888
|
-
)
|
889
|
-
|
890
|
-
y_pred, y_score = self._score_data(m, self.test_data.X)
|
891
|
-
test_metrics = ModelEvaluator(
|
892
|
-
y_true=self.test_data.y,
|
893
|
-
y_pred=y_pred,
|
894
|
-
model_name=m_name,
|
895
|
-
classes=m.classes_ if self.is_classifier else None,
|
896
|
-
y_score=y_score,
|
897
|
-
positive_class=self.positive_class,
|
898
|
-
).get_metrics()
|
899
|
-
total_test_metrics = pd.concat(
|
900
|
-
[total_test_metrics, test_metrics], axis=1, sort=False
|
901
|
-
)
|
902
|
-
|
903
|
-
self.evaluations = [total_train_metrics, total_test_metrics]
|
904
|
-
setattr(self, "train_evaluations", self.evaluations[0])
|
905
|
-
setattr(self, "test_evaluations", self.evaluations[1])
|
906
|
-
|
907
|
-
def del_models(self, names):
|
908
|
-
"""Removes the listed models from the evaluator object it is called on.
|
909
|
-
|
910
|
-
Parameters
|
911
|
-
----------
|
912
|
-
names : list[str]
|
913
|
-
the list of models to be delete. Names are the model names by default, and
|
914
|
-
assigned internally when conflicts exist. Actual names can be found using
|
915
|
-
`evaluator.test_evaluations.columns`
|
916
|
-
|
917
|
-
Returns
|
918
|
-
-------
|
919
|
-
Nothing
|
920
|
-
|
921
|
-
Examples
|
922
|
-
--------
|
923
|
-
>>> model3.rename("model3")
|
924
|
-
>>> evaluator = ADSEvaluator(test, [model1, model2, model3])
|
925
|
-
>>> evaluator.del_models([model3])
|
926
|
-
"""
|
927
|
-
|
928
|
-
if type(names) is list:
|
929
|
-
self.model_names = [n for n in self.model_names if n not in names]
|
930
|
-
self.evaluations[1].drop(columns=names, inplace=True)
|
931
|
-
if self.evaluations[0].shape != (0, 0):
|
932
|
-
self.evaluations[0].drop(columns=names, inplace=True)
|
933
|
-
|
934
|
-
def show_in_notebook(
|
935
|
-
self,
|
936
|
-
plots=None,
|
937
|
-
use_training_data=False,
|
938
|
-
perfect=False,
|
939
|
-
baseline=True,
|
940
|
-
legend_labels=None,
|
941
|
-
):
|
942
|
-
"""Visualize evaluation plots.
|
943
|
-
|
944
|
-
Parameters
|
945
|
-
----------
|
946
|
-
plots : list, optional
|
947
|
-
Filter the plots that are displayed. Defaults to None. The name of the plots are as below:
|
948
|
-
|
949
|
-
- regression - residuals_qq, residuals_vs_fitted
|
950
|
-
- binary classification - normalized_confusion_matrix, roc_curve, pr_curve
|
951
|
-
- multi class classification - normalized_confusion_matrix, precision_by_label, recall_by_label, f1_by_label
|
952
|
-
|
953
|
-
use_training_data : bool, optional
|
954
|
-
Use training data to generate plots. Defaults to `False`.
|
955
|
-
By default, this method uses test data to generate plots
|
956
|
-
legend_labels : dict, optional
|
957
|
-
Rename legend labels, that used for multi class classification plots. Defaults to None.
|
958
|
-
legend_labels dict keys are the same as class names. legend_labels dict values are strings.
|
959
|
-
If legend_labels not specified class names will be used for plots.
|
960
|
-
|
961
|
-
Returns
|
962
|
-
-------
|
963
|
-
None
|
964
|
-
Nothing. Outputs several evaluation plots as specified by `plots`.
|
965
|
-
|
966
|
-
Examples
|
967
|
-
--------
|
968
|
-
|
969
|
-
>>> evaluator = ADSEvaluator(test, [model1, model2])
|
970
|
-
>>> evaluator.show_in_notebook()
|
971
|
-
|
972
|
-
>>> legend_labels={'class_0': 'green', 'class_1': 'yellow', 'class_2': 'red'}
|
973
|
-
>>> multi_evaluator = ADSEvaluator(test, [model1, model2],
|
974
|
-
... legend_labels=legend_labels)
|
975
|
-
>>> multi_evaluator.show_in_notebook(plots=["normalized_confusion_matrix",
|
976
|
-
... "precision_by_label", "recall_by_label", "f1_by_label"])
|
977
|
-
"""
|
978
|
-
|
979
|
-
# get evaluations
|
980
|
-
if use_training_data:
|
981
|
-
if self.training_data is None:
|
982
|
-
raise ValueError(
|
983
|
-
"Training data is not provided. Re-build ADSData with training and test data"
|
984
|
-
)
|
985
|
-
model_evaluation = self.evaluations[0]
|
986
|
-
else:
|
987
|
-
model_evaluation = self.evaluations[1]
|
988
|
-
legend_labels = (
|
989
|
-
legend_labels if legend_labels is not None else self.legend_labels
|
990
|
-
)
|
991
|
-
# pass to plotting class
|
992
|
-
EvaluationPlot.plot(
|
993
|
-
model_evaluation, plots, len(self.classes), perfect, baseline, legend_labels
|
994
|
-
)
|
995
|
-
|
996
|
-
def calculate_cost(
|
997
|
-
self, tn_weight, fp_weight, fn_weight, tp_weight, use_training_data=False
|
998
|
-
):
|
999
|
-
"""Returns a cost associated with the input weights.
|
1000
|
-
|
1001
|
-
Parameters
|
1002
|
-
----------
|
1003
|
-
tn_weight : int, float
|
1004
|
-
The weight to assign true negatives in calculating the cost
|
1005
|
-
fp_weight : int, float
|
1006
|
-
The weight to assign false positives in calculating the cost
|
1007
|
-
fn_weight : int, float
|
1008
|
-
The weight to assign false negatives in calculating the cost
|
1009
|
-
tp_weight : int, float
|
1010
|
-
The weight to assign true positives in calculating the cost
|
1011
|
-
use_training_data : bool, optional
|
1012
|
-
Use training data to pull the metrics. Defaults to False
|
1013
|
-
|
1014
|
-
Returns
|
1015
|
-
-------
|
1016
|
-
:class:`pandas.DataFrame`
|
1017
|
-
DataFrame with the cost calculated for each model
|
1018
|
-
|
1019
|
-
Examples
|
1020
|
-
--------
|
1021
|
-
>>> evaluator = ADSEvaluator(test, [model1, model2])
|
1022
|
-
>>> costs_table = evaluator.calculate_cost(0, 10, 1000, 0)
|
1023
|
-
"""
|
1024
|
-
|
1025
|
-
if len(self.classes) != 2:
|
1026
|
-
raise ValueError(
|
1027
|
-
"The calculate_cost api is not supported for non-binary classification datasets."
|
1028
|
-
)
|
1029
|
-
cost_per_model = []
|
1030
|
-
if use_training_data:
|
1031
|
-
if self.training_data is None:
|
1032
|
-
raise ValueError(
|
1033
|
-
"Training data is not provided. Re-build ADSData with training and test data."
|
1034
|
-
)
|
1035
|
-
ev = self.evaluations[0]
|
1036
|
-
else:
|
1037
|
-
ev = self.evaluations[1]
|
1038
|
-
list_of_model = ev.columns
|
1039
|
-
for m in list_of_model:
|
1040
|
-
tn, fp, fn, tp = ev[m]["raw_confusion_matrix"].ravel()
|
1041
|
-
cost_per_model.append(
|
1042
|
-
tn * tn_weight + fp * fp_weight + fn * fn_weight + tp * tp_weight
|
1043
|
-
)
|
1044
|
-
cost_df = pd.DataFrame({"model": list_of_model, "cost": cost_per_model})
|
1045
|
-
return cost_df
|
1046
|
-
|
1047
|
-
class EvaluationMetrics(object):
|
1048
|
-
"""Class holding evaluation metrics.
|
1049
|
-
|
1050
|
-
Attributes
|
1051
|
-
----------
|
1052
|
-
ev_test : list
|
1053
|
-
evaluation test metrics
|
1054
|
-
ev_train : list
|
1055
|
-
evaluation training metrics
|
1056
|
-
use_training : bool
|
1057
|
-
use training data
|
1058
|
-
less_is_more : list
|
1059
|
-
metrics list
|
1060
|
-
|
1061
|
-
Methods
|
1062
|
-
-------
|
1063
|
-
show_in_notebook()
|
1064
|
-
Shows visualization metrics as a color coded table
|
1065
|
-
|
1066
|
-
"""
|
1067
|
-
|
1068
|
-
DEFAULT_LABELS_MAP = {
|
1069
|
-
"accuracy": "Accuracy",
|
1070
|
-
"hamming_loss": "Hamming distance",
|
1071
|
-
"kappa_score_": "Cohen's kappa coefficient",
|
1072
|
-
"precision": "Precision",
|
1073
|
-
"recall": "Recall",
|
1074
|
-
"f1": "F1",
|
1075
|
-
"auc": "ROC AUC",
|
1076
|
-
}
|
1077
|
-
|
1078
|
-
def __init__(
|
1079
|
-
self, ev_test, ev_train, use_training=False, less_is_more=None, precision=4
|
1080
|
-
):
|
1081
|
-
self.ev_test = ev_test
|
1082
|
-
self.ev_train = ev_train
|
1083
|
-
self.use_training = use_training
|
1084
|
-
self.precision = precision
|
1085
|
-
if isinstance(less_is_more, list):
|
1086
|
-
self.less_is_more = [
|
1087
|
-
"hamming_loss",
|
1088
|
-
"hinge_loss",
|
1089
|
-
"mse",
|
1090
|
-
"mae",
|
1091
|
-
] + less_is_more
|
1092
|
-
else:
|
1093
|
-
self.less_is_more = ["hamming_loss", "hinge_loss", "mse", "mae"]
|
1094
|
-
|
1095
|
-
def __repr__(self):
|
1096
|
-
self.show_in_notebook()
|
1097
|
-
return ""
|
1098
|
-
|
1099
|
-
@property
|
1100
|
-
def precision(self):
|
1101
|
-
return self._precision
|
1102
|
-
|
1103
|
-
@precision.setter
|
1104
|
-
def precision(self, value):
|
1105
|
-
"""
|
1106
|
-
Set precision to @property of the class.
|
1107
|
-
"""
|
1108
|
-
if not isinstance(value, int):
|
1109
|
-
if not (isinstance(value, float) and value.is_integer()):
|
1110
|
-
raise TypeError("'value' must be integer")
|
1111
|
-
value = int(value)
|
1112
|
-
if value < 0:
|
1113
|
-
raise ValueError("'value' must be non-negative")
|
1114
|
-
self._precision = value
|
1115
|
-
|
1116
|
-
def show_in_notebook(self, labels=DEFAULT_LABELS_MAP):
|
1117
|
-
"""
|
1118
|
-
Visualizes evaluation metrics as a color coded table.
|
1119
|
-
|
1120
|
-
Parameters
|
1121
|
-
----------
|
1122
|
-
labels : dictionary
|
1123
|
-
map printing specific labels for metrics display
|
1124
|
-
|
1125
|
-
Returns
|
1126
|
-
-------
|
1127
|
-
Nothing
|
1128
|
-
"""
|
1129
|
-
|
1130
|
-
def highlight_max(s):
|
1131
|
-
"""Highlight the maximum in a Series yellow.
|
1132
|
-
|
1133
|
-
Parameters
|
1134
|
-
----------
|
1135
|
-
s : series object
|
1136
|
-
the series being evaluated
|
1137
|
-
|
1138
|
-
Returns
|
1139
|
-
-------
|
1140
|
-
list
|
1141
|
-
containing background color data or empty if not max
|
1142
|
-
"""
|
1143
|
-
if s.name not in self.less_is_more:
|
1144
|
-
is_max = s == s.max()
|
1145
|
-
else:
|
1146
|
-
is_max = s == s.min()
|
1147
|
-
return ["background-color: lightgreen" if v else "" for v in is_max]
|
1148
|
-
|
1149
|
-
table_styles = [
|
1150
|
-
dict(props=[("text-align", "right")]),
|
1151
|
-
dict(selector="caption", props=[("caption-side", "top")]),
|
1152
|
-
]
|
1153
|
-
|
1154
|
-
def _pretty_label(df, labels, copy=False):
|
1155
|
-
"""
|
1156
|
-
Output specified labels in proper format.
|
1157
|
-
If the labels are provided in then used them. Otherwise, use default.
|
1158
|
-
|
1159
|
-
Parameters
|
1160
|
-
----------
|
1161
|
-
labels : dictionary
|
1162
|
-
map printing specific labels for metrics display
|
1163
|
-
|
1164
|
-
Returns
|
1165
|
-
-------
|
1166
|
-
dataframe
|
1167
|
-
dataframe with index names modified according to input labels
|
1168
|
-
"""
|
1169
|
-
if copy:
|
1170
|
-
df = df.copy()
|
1171
|
-
for k, v in labels.items():
|
1172
|
-
df.rename(index={k: v}, inplace=True)
|
1173
|
-
return df
|
1174
|
-
|
1175
|
-
@runtime_dependency(
|
1176
|
-
module="IPython", install_from=OptionalDependency.NOTEBOOK
|
1177
|
-
)
|
1178
|
-
@runtime_dependency(
|
1179
|
-
module="ipywidgets",
|
1180
|
-
object="HTML",
|
1181
|
-
install_from=OptionalDependency.NOTEBOOK,
|
1182
|
-
)
|
1183
|
-
def _display_metrics(df, data_name, labels, precision):
|
1184
|
-
"""
|
1185
|
-
display metrics on web page
|
1186
|
-
|
1187
|
-
Parameters
|
1188
|
-
----------
|
1189
|
-
df : dataframe
|
1190
|
-
metrics in dataframe format
|
1191
|
-
data_name : string
|
1192
|
-
name of data given metrics df describe
|
1193
|
-
labels : dictionary
|
1194
|
-
map printing specific labels for metrics display
|
1195
|
-
precision : int
|
1196
|
-
precision for metrics display
|
1197
|
-
|
1198
|
-
Returns
|
1199
|
-
-------
|
1200
|
-
Nothing
|
1201
|
-
"""
|
1202
|
-
from IPython.core.display import display, HTML
|
1203
|
-
|
1204
|
-
display(
|
1205
|
-
HTML(
|
1206
|
-
_pretty_label(df, labels)
|
1207
|
-
.style.apply(highlight_max, axis=1)
|
1208
|
-
.format(precision=precision)
|
1209
|
-
.set_properties(**{"text-align": "center"})
|
1210
|
-
.set_table_attributes("class=table")
|
1211
|
-
.set_caption(
|
1212
|
-
'<div align="left"><b style="font-size:20px;">'
|
1213
|
-
+ "Evaluation Metrics ("
|
1214
|
-
+ data_name
|
1215
|
-
+ "):</b></div>"
|
1216
|
-
)
|
1217
|
-
.to_html()
|
1218
|
-
)
|
1219
|
-
)
|
1220
|
-
|
1221
|
-
_display_metrics(self.ev_test, "testing data", labels, self.precision)
|
1222
|
-
if self.use_training:
|
1223
|
-
_display_metrics(self.ev_train, "training data", labels, self.precision)
|
1224
|
-
|
1225
|
-
@property
|
1226
|
-
def raw_metrics(self, metrics=None, use_training_data=False):
|
1227
|
-
"""Returns the raw metric numbers
|
1228
|
-
|
1229
|
-
Parameters
|
1230
|
-
----------
|
1231
|
-
metrics : list, optional
|
1232
|
-
Request metrics to pull. Defaults to all.
|
1233
|
-
use_training_data : bool, optional
|
1234
|
-
Use training data to pull metrics. Defaults to False
|
1235
|
-
|
1236
|
-
Returns
|
1237
|
-
-------
|
1238
|
-
dict
|
1239
|
-
The requested raw metrics for each model. If `metrics` is `None` return all.
|
1240
|
-
|
1241
|
-
Examples
|
1242
|
-
--------
|
1243
|
-
>>> evaluator = ADSEvaluator(test, [model1, model2])
|
1244
|
-
>>> raw_metrics_dictionary = evaluator.raw_metrics()
|
1245
|
-
"""
|
1246
|
-
|
1247
|
-
[train_met, test_met] = self.evaluations
|
1248
|
-
test_d = test_met.to_dict()
|
1249
|
-
if use_training_data and train_met is not None:
|
1250
|
-
train_d = train_met.add_suffix("_train").to_dict()
|
1251
|
-
test_d.update(train_d)
|
1252
|
-
for m, data in test_d.items():
|
1253
|
-
ret = dict()
|
1254
|
-
for k, v in data.items():
|
1255
|
-
if isinstance(v, np.ndarray):
|
1256
|
-
ret[k] = v.tolist()
|
1257
|
-
else:
|
1258
|
-
ret[k] = v
|
1259
|
-
test_d[m] = ret
|
1260
|
-
return test_d
|
1261
|
-
|
1262
|
-
@property
|
1263
|
-
def metrics(self):
|
1264
|
-
"""Returns evaluation metrics
|
1265
|
-
|
1266
|
-
Returns
|
1267
|
-
-------
|
1268
|
-
metrics
|
1269
|
-
HTML representation of a table comparing relevant metrics.
|
1270
|
-
|
1271
|
-
Examples
|
1272
|
-
--------
|
1273
|
-
>>> evaluator = ADSEvaluator(test, [model1, model2])
|
1274
|
-
>>> evaluator.metrics
|
1275
|
-
Outputs table displaying metrics.
|
1276
|
-
"""
|
1277
|
-
|
1278
|
-
ev_test = self.evaluations[1].loc[self.metrics_to_show]
|
1279
|
-
use_training = self.evaluations[0].shape != (0, 0)
|
1280
|
-
ev_train = (
|
1281
|
-
self.evaluations[0].loc[self.metrics_to_show] if use_training else None
|
1282
|
-
)
|
1283
|
-
return ADSEvaluator.EvaluationMetrics(ev_test, ev_train, use_training)
|
1284
|
-
|
1285
|
-
"""
|
1286
|
-
Internal methods
|
1287
|
-
"""
|
1288
|
-
|
1289
|
-
def _get_model_name(self, name, show_full_name=False):
|
1290
|
-
name_edit = re.sub(r" ?\([^)]+\)", "", name)
|
1291
|
-
## if name only has '(' without ')', the code above wouldnt remove the argument followed by '('.
|
1292
|
-
if "(" in name_edit and not show_full_name:
|
1293
|
-
name_edit = name.split("(")[0]
|
1294
|
-
logger.info("Use `show_full_name=True` to show the full model name.")
|
1295
|
-
if name_edit in self.model_names:
|
1296
|
-
name_edit += "_1"
|
1297
|
-
num_tries = 1
|
1298
|
-
while name_edit in self.model_names:
|
1299
|
-
num_tries += 1
|
1300
|
-
name_edit = name_edit[:-1] + str(num_tries)
|
1301
|
-
if num_tries == 1:
|
1302
|
-
logger.info(
|
1303
|
-
f"The name '{name_edit[:-2]}' is used by multiple models. "
|
1304
|
-
f"Use the `rename()` method to change the name."
|
1305
|
-
)
|
1306
|
-
self.model_names.append(name_edit)
|
1307
|
-
return name_edit
|
1308
|
-
|
1309
|
-
def _score_data(self, est, X):
|
1310
|
-
y_pred = est.predict(X)
|
1311
|
-
y_score = None
|
1312
|
-
|
1313
|
-
# we will compute y_score only for binary classification cases because only for binary classification can
|
1314
|
-
# we use it for ROC Curves and AUC etc
|
1315
|
-
if self.is_classifier and hasattr(est.est, "predict_proba"):
|
1316
|
-
if len(est.classes_) == 2:
|
1317
|
-
# positive label index is assumed to be 0 if the ADSModel does not have a positive class defined
|
1318
|
-
positive_class_index = 0
|
1319
|
-
# For prediction probability, we only consider the positive class.
|
1320
|
-
if self.positive_class is not None:
|
1321
|
-
if self.positive_class not in list(est.classes_):
|
1322
|
-
raise ValueError(
|
1323
|
-
"Invalid positive class '%s' for model %s. Positive class should be one of %s."
|
1324
|
-
% (
|
1325
|
-
self.positive_class,
|
1326
|
-
est.est.__class__.__name__,
|
1327
|
-
list(est.classes_),
|
1328
|
-
)
|
1329
|
-
)
|
1330
|
-
positive_class_index = list(est.classes_).index(self.positive_class)
|
1331
|
-
y_score = est.predict_proba(X)[:, positive_class_index]
|
1332
|
-
else:
|
1333
|
-
y_score = est.predict_proba(X)
|
1334
|
-
return y_pred, y_score
|