oracle-ads 2.13.8__py3-none-any.whl → 2.13.9rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/METADATA +151 -151
- oracle_ads-2.13.9rc0.dist-info/RECORD +9 -0
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/WHEEL +2 -1
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/entry_points.txt +1 -2
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +1 -0
- ads/aqua/__init__.py +0 -40
- ads/aqua/app.py +0 -506
- ads/aqua/cli.py +0 -96
- ads/aqua/client/__init__.py +0 -3
- ads/aqua/client/client.py +0 -836
- ads/aqua/client/openai_client.py +0 -305
- ads/aqua/common/__init__.py +0 -5
- ads/aqua/common/decorator.py +0 -125
- ads/aqua/common/entities.py +0 -266
- ads/aqua/common/enums.py +0 -122
- ads/aqua/common/errors.py +0 -109
- ads/aqua/common/utils.py +0 -1285
- ads/aqua/config/__init__.py +0 -4
- ads/aqua/config/container_config.py +0 -248
- ads/aqua/config/evaluation/__init__.py +0 -4
- ads/aqua/config/evaluation/evaluation_service_config.py +0 -147
- ads/aqua/config/utils/__init__.py +0 -4
- ads/aqua/config/utils/serializer.py +0 -339
- ads/aqua/constants.py +0 -114
- ads/aqua/data.py +0 -14
- ads/aqua/dummy_data/icon.txt +0 -1
- ads/aqua/dummy_data/oci_model_deployments.json +0 -56
- ads/aqua/dummy_data/oci_models.json +0 -1
- ads/aqua/dummy_data/readme.md +0 -26
- ads/aqua/evaluation/__init__.py +0 -8
- ads/aqua/evaluation/constants.py +0 -53
- ads/aqua/evaluation/entities.py +0 -186
- ads/aqua/evaluation/errors.py +0 -70
- ads/aqua/evaluation/evaluation.py +0 -1814
- ads/aqua/extension/__init__.py +0 -42
- ads/aqua/extension/aqua_ws_msg_handler.py +0 -76
- ads/aqua/extension/base_handler.py +0 -90
- ads/aqua/extension/common_handler.py +0 -121
- ads/aqua/extension/common_ws_msg_handler.py +0 -36
- ads/aqua/extension/deployment_handler.py +0 -298
- ads/aqua/extension/deployment_ws_msg_handler.py +0 -54
- ads/aqua/extension/errors.py +0 -30
- ads/aqua/extension/evaluation_handler.py +0 -129
- ads/aqua/extension/evaluation_ws_msg_handler.py +0 -61
- ads/aqua/extension/finetune_handler.py +0 -96
- ads/aqua/extension/model_handler.py +0 -390
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +0 -145
- ads/aqua/extension/models_ws_msg_handler.py +0 -50
- ads/aqua/extension/ui_handler.py +0 -282
- ads/aqua/extension/ui_websocket_handler.py +0 -130
- ads/aqua/extension/utils.py +0 -133
- ads/aqua/finetuning/__init__.py +0 -7
- ads/aqua/finetuning/constants.py +0 -23
- ads/aqua/finetuning/entities.py +0 -181
- ads/aqua/finetuning/finetuning.py +0 -731
- ads/aqua/model/__init__.py +0 -8
- ads/aqua/model/constants.py +0 -60
- ads/aqua/model/entities.py +0 -306
- ads/aqua/model/enums.py +0 -30
- ads/aqua/model/model.py +0 -2080
- ads/aqua/modeldeployment/__init__.py +0 -8
- ads/aqua/modeldeployment/constants.py +0 -10
- ads/aqua/modeldeployment/deployment.py +0 -1324
- ads/aqua/modeldeployment/entities.py +0 -653
- ads/aqua/modeldeployment/inference.py +0 -74
- ads/aqua/modeldeployment/utils.py +0 -543
- ads/aqua/resources/gpu_shapes_index.json +0 -94
- ads/aqua/server/__init__.py +0 -4
- ads/aqua/server/__main__.py +0 -24
- ads/aqua/server/app.py +0 -47
- ads/aqua/server/aqua_spec.yml +0 -1291
- ads/aqua/training/__init__.py +0 -4
- ads/aqua/training/exceptions.py +0 -476
- ads/aqua/ui.py +0 -499
- ads/automl/__init__.py +0 -9
- ads/automl/driver.py +0 -330
- ads/automl/provider.py +0 -975
- ads/bds/__init__.py +0 -5
- ads/bds/auth.py +0 -127
- ads/bds/big_data_service.py +0 -255
- ads/catalog/__init__.py +0 -19
- ads/catalog/model.py +0 -1576
- ads/catalog/notebook.py +0 -461
- ads/catalog/project.py +0 -468
- ads/catalog/summary.py +0 -178
- ads/common/__init__.py +0 -11
- ads/common/analyzer.py +0 -65
- ads/common/artifact/.model-ignore +0 -63
- ads/common/artifact/__init__.py +0 -10
- ads/common/auth.py +0 -1122
- ads/common/card_identifier.py +0 -83
- ads/common/config.py +0 -647
- ads/common/data.py +0 -165
- ads/common/decorator/__init__.py +0 -9
- ads/common/decorator/argument_to_case.py +0 -88
- ads/common/decorator/deprecate.py +0 -69
- ads/common/decorator/require_nonempty_arg.py +0 -65
- ads/common/decorator/runtime_dependency.py +0 -178
- ads/common/decorator/threaded.py +0 -97
- ads/common/decorator/utils.py +0 -35
- ads/common/dsc_file_system.py +0 -303
- ads/common/error.py +0 -14
- ads/common/extended_enum.py +0 -81
- ads/common/function/__init__.py +0 -5
- ads/common/function/fn_util.py +0 -142
- ads/common/function/func_conf.yaml +0 -25
- ads/common/ipython.py +0 -76
- ads/common/model.py +0 -679
- ads/common/model_artifact.py +0 -1759
- ads/common/model_artifact_schema.json +0 -107
- ads/common/model_export_util.py +0 -664
- ads/common/model_metadata.py +0 -24
- ads/common/object_storage_details.py +0 -296
- ads/common/oci_client.py +0 -175
- ads/common/oci_datascience.py +0 -46
- ads/common/oci_logging.py +0 -1144
- ads/common/oci_mixin.py +0 -957
- ads/common/oci_resource.py +0 -136
- ads/common/serializer.py +0 -559
- ads/common/utils.py +0 -1852
- ads/common/word_lists.py +0 -1491
- ads/common/work_request.py +0 -189
- ads/data_labeling/__init__.py +0 -13
- ads/data_labeling/boundingbox.py +0 -253
- ads/data_labeling/constants.py +0 -47
- ads/data_labeling/data_labeling_service.py +0 -244
- ads/data_labeling/interface/__init__.py +0 -5
- ads/data_labeling/interface/loader.py +0 -16
- ads/data_labeling/interface/parser.py +0 -16
- ads/data_labeling/interface/reader.py +0 -23
- ads/data_labeling/loader/__init__.py +0 -5
- ads/data_labeling/loader/file_loader.py +0 -241
- ads/data_labeling/metadata.py +0 -110
- ads/data_labeling/mixin/__init__.py +0 -5
- ads/data_labeling/mixin/data_labeling.py +0 -232
- ads/data_labeling/ner.py +0 -129
- ads/data_labeling/parser/__init__.py +0 -5
- ads/data_labeling/parser/dls_record_parser.py +0 -388
- ads/data_labeling/parser/export_metadata_parser.py +0 -94
- ads/data_labeling/parser/export_record_parser.py +0 -473
- ads/data_labeling/reader/__init__.py +0 -5
- ads/data_labeling/reader/dataset_reader.py +0 -574
- ads/data_labeling/reader/dls_record_reader.py +0 -121
- ads/data_labeling/reader/export_record_reader.py +0 -62
- ads/data_labeling/reader/jsonl_reader.py +0 -75
- ads/data_labeling/reader/metadata_reader.py +0 -203
- ads/data_labeling/reader/record_reader.py +0 -263
- ads/data_labeling/record.py +0 -52
- ads/data_labeling/visualizer/__init__.py +0 -5
- ads/data_labeling/visualizer/image_visualizer.py +0 -525
- ads/data_labeling/visualizer/text_visualizer.py +0 -357
- ads/database/__init__.py +0 -5
- ads/database/connection.py +0 -338
- ads/dataset/__init__.py +0 -10
- ads/dataset/capabilities.md +0 -51
- ads/dataset/classification_dataset.py +0 -339
- ads/dataset/correlation.py +0 -226
- ads/dataset/correlation_plot.py +0 -563
- ads/dataset/dask_series.py +0 -173
- ads/dataset/dataframe_transformer.py +0 -110
- ads/dataset/dataset.py +0 -1979
- ads/dataset/dataset_browser.py +0 -360
- ads/dataset/dataset_with_target.py +0 -995
- ads/dataset/exception.py +0 -25
- ads/dataset/factory.py +0 -987
- ads/dataset/feature_engineering_transformer.py +0 -35
- ads/dataset/feature_selection.py +0 -107
- ads/dataset/forecasting_dataset.py +0 -26
- ads/dataset/helper.py +0 -1450
- ads/dataset/label_encoder.py +0 -99
- ads/dataset/mixin/__init__.py +0 -5
- ads/dataset/mixin/dataset_accessor.py +0 -134
- ads/dataset/pipeline.py +0 -58
- ads/dataset/plot.py +0 -710
- ads/dataset/progress.py +0 -86
- ads/dataset/recommendation.py +0 -297
- ads/dataset/recommendation_transformer.py +0 -502
- ads/dataset/regression_dataset.py +0 -14
- ads/dataset/sampled_dataset.py +0 -1050
- ads/dataset/target.py +0 -98
- ads/dataset/timeseries.py +0 -18
- ads/dbmixin/__init__.py +0 -5
- ads/dbmixin/db_pandas_accessor.py +0 -153
- ads/environment/__init__.py +0 -9
- ads/environment/ml_runtime.py +0 -66
- ads/evaluations/README.md +0 -14
- ads/evaluations/__init__.py +0 -109
- ads/evaluations/evaluation_plot.py +0 -983
- ads/evaluations/evaluator.py +0 -1334
- ads/evaluations/statistical_metrics.py +0 -543
- ads/experiments/__init__.py +0 -9
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +0 -21
- ads/explanations/base_explainer.py +0 -142
- ads/explanations/capabilities.md +0 -83
- ads/explanations/explainer.py +0 -190
- ads/explanations/mlx_global_explainer.py +0 -1050
- ads/explanations/mlx_interface.py +0 -386
- ads/explanations/mlx_local_explainer.py +0 -287
- ads/explanations/mlx_whatif_explainer.py +0 -201
- ads/feature_engineering/__init__.py +0 -20
- ads/feature_engineering/accessor/__init__.py +0 -5
- ads/feature_engineering/accessor/dataframe_accessor.py +0 -535
- ads/feature_engineering/accessor/mixin/__init__.py +0 -5
- ads/feature_engineering/accessor/mixin/correlation.py +0 -166
- ads/feature_engineering/accessor/mixin/eda_mixin.py +0 -266
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +0 -85
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +0 -211
- ads/feature_engineering/accessor/mixin/utils.py +0 -65
- ads/feature_engineering/accessor/series_accessor.py +0 -431
- ads/feature_engineering/adsimage/__init__.py +0 -5
- ads/feature_engineering/adsimage/image.py +0 -192
- ads/feature_engineering/adsimage/image_reader.py +0 -170
- ads/feature_engineering/adsimage/interface/__init__.py +0 -5
- ads/feature_engineering/adsimage/interface/reader.py +0 -19
- ads/feature_engineering/adsstring/__init__.py +0 -7
- ads/feature_engineering/adsstring/oci_language/__init__.py +0 -8
- ads/feature_engineering/adsstring/string/__init__.py +0 -8
- ads/feature_engineering/data_schema.json +0 -57
- ads/feature_engineering/dataset/__init__.py +0 -5
- ads/feature_engineering/dataset/zip_code_data.py +0 -42062
- ads/feature_engineering/exceptions.py +0 -40
- ads/feature_engineering/feature_type/__init__.py +0 -133
- ads/feature_engineering/feature_type/address.py +0 -184
- ads/feature_engineering/feature_type/adsstring/__init__.py +0 -5
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +0 -164
- ads/feature_engineering/feature_type/adsstring/oci_language.py +0 -93
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +0 -5
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +0 -47
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +0 -96
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +0 -221
- ads/feature_engineering/feature_type/adsstring/string.py +0 -258
- ads/feature_engineering/feature_type/base.py +0 -58
- ads/feature_engineering/feature_type/boolean.py +0 -183
- ads/feature_engineering/feature_type/category.py +0 -146
- ads/feature_engineering/feature_type/constant.py +0 -137
- ads/feature_engineering/feature_type/continuous.py +0 -151
- ads/feature_engineering/feature_type/creditcard.py +0 -314
- ads/feature_engineering/feature_type/datetime.py +0 -190
- ads/feature_engineering/feature_type/discrete.py +0 -134
- ads/feature_engineering/feature_type/document.py +0 -43
- ads/feature_engineering/feature_type/gis.py +0 -251
- ads/feature_engineering/feature_type/handler/__init__.py +0 -5
- ads/feature_engineering/feature_type/handler/feature_validator.py +0 -524
- ads/feature_engineering/feature_type/handler/feature_warning.py +0 -319
- ads/feature_engineering/feature_type/handler/warnings.py +0 -128
- ads/feature_engineering/feature_type/integer.py +0 -142
- ads/feature_engineering/feature_type/ip_address.py +0 -144
- ads/feature_engineering/feature_type/ip_address_v4.py +0 -138
- ads/feature_engineering/feature_type/ip_address_v6.py +0 -138
- ads/feature_engineering/feature_type/lat_long.py +0 -256
- ads/feature_engineering/feature_type/object.py +0 -43
- ads/feature_engineering/feature_type/ordinal.py +0 -132
- ads/feature_engineering/feature_type/phone_number.py +0 -135
- ads/feature_engineering/feature_type/string.py +0 -171
- ads/feature_engineering/feature_type/text.py +0 -93
- ads/feature_engineering/feature_type/unknown.py +0 -43
- ads/feature_engineering/feature_type/zip_code.py +0 -164
- ads/feature_engineering/feature_type_manager.py +0 -406
- ads/feature_engineering/schema.py +0 -795
- ads/feature_engineering/utils.py +0 -245
- ads/feature_store/.readthedocs.yaml +0 -19
- ads/feature_store/README.md +0 -65
- ads/feature_store/__init__.py +0 -9
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +0 -339
- ads/feature_store/common/exceptions.py +0 -18
- ads/feature_store/common/spark_session_singleton.py +0 -125
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +0 -72
- ads/feature_store/common/utils/feature_schema_mapper.py +0 -283
- ads/feature_store/common/utils/transformation_utils.py +0 -82
- ads/feature_store/common/utils/utility.py +0 -403
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +0 -129
- ads/feature_store/dataset.py +0 -1230
- ads/feature_store/dataset_job.py +0 -530
- ads/feature_store/docs/Dockerfile +0 -7
- ads/feature_store/docs/Makefile +0 -44
- ads/feature_store/docs/conf.py +0 -28
- ads/feature_store/docs/requirements.txt +0 -14
- ads/feature_store/docs/source/ads.feature_store.query.rst +0 -20
- ads/feature_store/docs/source/cicd.rst +0 -137
- ads/feature_store/docs/source/conf.py +0 -86
- ads/feature_store/docs/source/data_versioning.rst +0 -33
- ads/feature_store/docs/source/dataset.rst +0 -388
- ads/feature_store/docs/source/dataset_job.rst +0 -27
- ads/feature_store/docs/source/demo.rst +0 -70
- ads/feature_store/docs/source/entity.rst +0 -78
- ads/feature_store/docs/source/feature_group.rst +0 -624
- ads/feature_store/docs/source/feature_group_job.rst +0 -29
- ads/feature_store/docs/source/feature_store.rst +0 -122
- ads/feature_store/docs/source/feature_store_class.rst +0 -123
- ads/feature_store/docs/source/feature_validation.rst +0 -66
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +0 -81
- ads/feature_store/docs/source/module.rst +0 -8
- ads/feature_store/docs/source/notebook.rst +0 -94
- ads/feature_store/docs/source/overview.rst +0 -47
- ads/feature_store/docs/source/quickstart.rst +0 -176
- ads/feature_store/docs/source/release_notes.rst +0 -194
- ads/feature_store/docs/source/setup_feature_store.rst +0 -81
- ads/feature_store/docs/source/statistics.rst +0 -58
- ads/feature_store/docs/source/transformation.rst +0 -199
- ads/feature_store/docs/source/ui.rst +0 -65
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +0 -66
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +0 -192
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +0 -338
- ads/feature_store/entity.py +0 -718
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +0 -375
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +0 -316
- ads/feature_store/execution_strategy/execution_strategy.py +0 -113
- ads/feature_store/execution_strategy/execution_strategy_provider.py +0 -47
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +0 -618
- ads/feature_store/feature.py +0 -192
- ads/feature_store/feature_group.py +0 -1494
- ads/feature_store/feature_group_expectation.py +0 -346
- ads/feature_store/feature_group_job.py +0 -602
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +0 -180
- ads/feature_store/feature_option_details.py +0 -50
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +0 -99
- ads/feature_store/feature_store.py +0 -699
- ads/feature_store/feature_store_registrar.py +0 -518
- ads/feature_store/input_feature_detail.py +0 -149
- ads/feature_store/mixin/__init__.py +0 -4
- ads/feature_store/mixin/oci_feature_store.py +0 -145
- ads/feature_store/model_details.py +0 -73
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +0 -266
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +0 -298
- ads/feature_store/query/join.py +0 -161
- ads/feature_store/query/query.py +0 -403
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +0 -57
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +0 -68
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +0 -139
- ads/feature_store/service/oci_dataset_job.py +0 -199
- ads/feature_store/service/oci_entity.py +0 -125
- ads/feature_store/service/oci_feature_group.py +0 -164
- ads/feature_store/service/oci_feature_group_job.py +0 -214
- ads/feature_store/service/oci_feature_store.py +0 -182
- ads/feature_store/service/oci_lineage.py +0 -87
- ads/feature_store/service/oci_transformation.py +0 -104
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +0 -49
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +0 -37
- ads/feature_store/statistics/charts/box_plot.py +0 -148
- ads/feature_store/statistics/charts/frequency_distribution.py +0 -65
- ads/feature_store/statistics/charts/probability_distribution.py +0 -68
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +0 -98
- ads/feature_store/statistics/feature_stat.py +0 -126
- ads/feature_store/statistics/generic_feature_value.py +0 -33
- ads/feature_store/statistics/statistics.py +0 -41
- ads/feature_store/statistics_config.py +0 -101
- ads/feature_store/templates/feature_store_template.yaml +0 -45
- ads/feature_store/transformation.py +0 -499
- ads/feature_store/validation_output.py +0 -57
- ads/hpo/__init__.py +0 -9
- ads/hpo/_imports.py +0 -91
- ads/hpo/ads_search_space.py +0 -439
- ads/hpo/distributions.py +0 -325
- ads/hpo/objective.py +0 -280
- ads/hpo/search_cv.py +0 -1657
- ads/hpo/stopping_criterion.py +0 -75
- ads/hpo/tuner_artifact.py +0 -413
- ads/hpo/utils.py +0 -91
- ads/hpo/validation.py +0 -140
- ads/hpo/visualization/__init__.py +0 -5
- ads/hpo/visualization/_contour.py +0 -23
- ads/hpo/visualization/_edf.py +0 -20
- ads/hpo/visualization/_intermediate_values.py +0 -21
- ads/hpo/visualization/_optimization_history.py +0 -25
- ads/hpo/visualization/_parallel_coordinate.py +0 -169
- ads/hpo/visualization/_param_importances.py +0 -26
- ads/jobs/__init__.py +0 -53
- ads/jobs/ads_job.py +0 -663
- ads/jobs/builders/__init__.py +0 -5
- ads/jobs/builders/base.py +0 -156
- ads/jobs/builders/infrastructure/__init__.py +0 -6
- ads/jobs/builders/infrastructure/base.py +0 -165
- ads/jobs/builders/infrastructure/dataflow.py +0 -1252
- ads/jobs/builders/infrastructure/dsc_job.py +0 -1894
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +0 -1233
- ads/jobs/builders/infrastructure/utils.py +0 -65
- ads/jobs/builders/runtimes/__init__.py +0 -5
- ads/jobs/builders/runtimes/artifact.py +0 -338
- ads/jobs/builders/runtimes/base.py +0 -325
- ads/jobs/builders/runtimes/container_runtime.py +0 -242
- ads/jobs/builders/runtimes/python_runtime.py +0 -1016
- ads/jobs/builders/runtimes/pytorch_runtime.py +0 -204
- ads/jobs/cli.py +0 -104
- ads/jobs/env_var_parser.py +0 -131
- ads/jobs/extension.py +0 -160
- ads/jobs/schema/__init__.py +0 -5
- ads/jobs/schema/infrastructure_schema.json +0 -116
- ads/jobs/schema/job_schema.json +0 -42
- ads/jobs/schema/runtime_schema.json +0 -183
- ads/jobs/schema/validator.py +0 -141
- ads/jobs/serializer.py +0 -296
- ads/jobs/templates/__init__.py +0 -5
- ads/jobs/templates/container.py +0 -6
- ads/jobs/templates/driver_notebook.py +0 -177
- ads/jobs/templates/driver_oci.py +0 -500
- ads/jobs/templates/driver_python.py +0 -48
- ads/jobs/templates/driver_pytorch.py +0 -852
- ads/jobs/templates/driver_utils.py +0 -615
- ads/jobs/templates/hostname_from_env.c +0 -55
- ads/jobs/templates/oci_metrics.py +0 -181
- ads/jobs/utils.py +0 -104
- ads/llm/__init__.py +0 -28
- ads/llm/autogen/__init__.py +0 -2
- ads/llm/autogen/constants.py +0 -15
- ads/llm/autogen/reports/__init__.py +0 -2
- ads/llm/autogen/reports/base.py +0 -67
- ads/llm/autogen/reports/data.py +0 -103
- ads/llm/autogen/reports/session.py +0 -526
- ads/llm/autogen/reports/templates/chat_box.html +0 -13
- ads/llm/autogen/reports/templates/chat_box_lt.html +0 -5
- ads/llm/autogen/reports/templates/chat_box_rt.html +0 -6
- ads/llm/autogen/reports/utils.py +0 -56
- ads/llm/autogen/v02/__init__.py +0 -4
- ads/llm/autogen/v02/client.py +0 -295
- ads/llm/autogen/v02/log_handlers/__init__.py +0 -2
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +0 -83
- ads/llm/autogen/v02/loggers/__init__.py +0 -6
- ads/llm/autogen/v02/loggers/metric_logger.py +0 -320
- ads/llm/autogen/v02/loggers/session_logger.py +0 -580
- ads/llm/autogen/v02/loggers/utils.py +0 -86
- ads/llm/autogen/v02/runtime_logging.py +0 -163
- ads/llm/chain.py +0 -268
- ads/llm/chat_template.py +0 -31
- ads/llm/deploy.py +0 -63
- ads/llm/guardrails/__init__.py +0 -5
- ads/llm/guardrails/base.py +0 -442
- ads/llm/guardrails/huggingface.py +0 -44
- ads/llm/langchain/__init__.py +0 -5
- ads/llm/langchain/plugins/__init__.py +0 -5
- ads/llm/langchain/plugins/chat_models/__init__.py +0 -5
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +0 -1027
- ads/llm/langchain/plugins/embeddings/__init__.py +0 -4
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +0 -184
- ads/llm/langchain/plugins/llms/__init__.py +0 -5
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +0 -979
- ads/llm/requirements.txt +0 -3
- ads/llm/serialize.py +0 -219
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +0 -153
- ads/llm/serializers/runnable_parallel.py +0 -27
- ads/llm/templates/score_chain.jinja2 +0 -155
- ads/llm/templates/tool_chat_template_hermes.jinja +0 -130
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +0 -94
- ads/model/__init__.py +0 -52
- ads/model/artifact.py +0 -573
- ads/model/artifact_downloader.py +0 -254
- ads/model/artifact_uploader.py +0 -267
- ads/model/base_properties.py +0 -238
- ads/model/common/.model-ignore +0 -66
- ads/model/common/__init__.py +0 -5
- ads/model/common/utils.py +0 -142
- ads/model/datascience_model.py +0 -2635
- ads/model/deployment/__init__.py +0 -20
- ads/model/deployment/common/__init__.py +0 -5
- ads/model/deployment/common/utils.py +0 -308
- ads/model/deployment/model_deployer.py +0 -466
- ads/model/deployment/model_deployment.py +0 -1846
- ads/model/deployment/model_deployment_infrastructure.py +0 -671
- ads/model/deployment/model_deployment_properties.py +0 -493
- ads/model/deployment/model_deployment_runtime.py +0 -838
- ads/model/extractor/__init__.py +0 -5
- ads/model/extractor/automl_extractor.py +0 -74
- ads/model/extractor/embedding_onnx_extractor.py +0 -80
- ads/model/extractor/huggingface_extractor.py +0 -88
- ads/model/extractor/keras_extractor.py +0 -84
- ads/model/extractor/lightgbm_extractor.py +0 -93
- ads/model/extractor/model_info_extractor.py +0 -114
- ads/model/extractor/model_info_extractor_factory.py +0 -105
- ads/model/extractor/pytorch_extractor.py +0 -87
- ads/model/extractor/sklearn_extractor.py +0 -112
- ads/model/extractor/spark_extractor.py +0 -89
- ads/model/extractor/tensorflow_extractor.py +0 -85
- ads/model/extractor/xgboost_extractor.py +0 -94
- ads/model/framework/__init__.py +0 -5
- ads/model/framework/automl_model.py +0 -178
- ads/model/framework/embedding_onnx_model.py +0 -438
- ads/model/framework/huggingface_model.py +0 -399
- ads/model/framework/lightgbm_model.py +0 -266
- ads/model/framework/pytorch_model.py +0 -266
- ads/model/framework/sklearn_model.py +0 -250
- ads/model/framework/spark_model.py +0 -326
- ads/model/framework/tensorflow_model.py +0 -254
- ads/model/framework/xgboost_model.py +0 -258
- ads/model/generic_model.py +0 -3518
- ads/model/model_artifact_boilerplate/README.md +0 -381
- ads/model/model_artifact_boilerplate/__init__.py +0 -5
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +0 -5
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +0 -427
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +0 -2
- ads/model/model_artifact_boilerplate/runtime.yaml +0 -7
- ads/model/model_artifact_boilerplate/score.py +0 -61
- ads/model/model_file_description_schema.json +0 -68
- ads/model/model_introspect.py +0 -331
- ads/model/model_metadata.py +0 -1810
- ads/model/model_metadata_mixin.py +0 -460
- ads/model/model_properties.py +0 -63
- ads/model/model_version_set.py +0 -739
- ads/model/runtime/__init__.py +0 -5
- ads/model/runtime/env_info.py +0 -306
- ads/model/runtime/model_deployment_details.py +0 -37
- ads/model/runtime/model_provenance_details.py +0 -58
- ads/model/runtime/runtime_info.py +0 -81
- ads/model/runtime/schemas/inference_env_info_schema.yaml +0 -16
- ads/model/runtime/schemas/model_provenance_schema.yaml +0 -36
- ads/model/runtime/schemas/training_env_info_schema.yaml +0 -16
- ads/model/runtime/utils.py +0 -201
- ads/model/serde/__init__.py +0 -5
- ads/model/serde/common.py +0 -40
- ads/model/serde/model_input.py +0 -547
- ads/model/serde/model_serializer.py +0 -1184
- ads/model/service/__init__.py +0 -5
- ads/model/service/oci_datascience_model.py +0 -1076
- ads/model/service/oci_datascience_model_deployment.py +0 -500
- ads/model/service/oci_datascience_model_version_set.py +0 -176
- ads/model/transformer/__init__.py +0 -5
- ads/model/transformer/onnx_transformer.py +0 -324
- ads/mysqldb/__init__.py +0 -5
- ads/mysqldb/mysql_db.py +0 -227
- ads/opctl/__init__.py +0 -18
- ads/opctl/anomaly_detection.py +0 -11
- ads/opctl/backend/__init__.py +0 -5
- ads/opctl/backend/ads_dataflow.py +0 -353
- ads/opctl/backend/ads_ml_job.py +0 -710
- ads/opctl/backend/ads_ml_pipeline.py +0 -164
- ads/opctl/backend/ads_model_deployment.py +0 -209
- ads/opctl/backend/base.py +0 -146
- ads/opctl/backend/local.py +0 -1053
- ads/opctl/backend/marketplace/__init__.py +0 -9
- ads/opctl/backend/marketplace/helm_helper.py +0 -173
- ads/opctl/backend/marketplace/local_marketplace.py +0 -271
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +0 -71
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +0 -44
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +0 -24
- ads/opctl/backend/marketplace/marketplace_utils.py +0 -212
- ads/opctl/backend/marketplace/models/__init__.py +0 -5
- ads/opctl/backend/marketplace/models/bearer_token.py +0 -94
- ads/opctl/backend/marketplace/models/marketplace_type.py +0 -70
- ads/opctl/backend/marketplace/models/ocir_details.py +0 -56
- ads/opctl/backend/marketplace/prerequisite_checker.py +0 -238
- ads/opctl/cli.py +0 -707
- ads/opctl/cmds.py +0 -869
- ads/opctl/conda/__init__.py +0 -5
- ads/opctl/conda/cli.py +0 -193
- ads/opctl/conda/cmds.py +0 -749
- ads/opctl/conda/config.yaml +0 -34
- ads/opctl/conda/manifest_template.yaml +0 -13
- ads/opctl/conda/multipart_uploader.py +0 -188
- ads/opctl/conda/pack.py +0 -89
- ads/opctl/config/__init__.py +0 -5
- ads/opctl/config/base.py +0 -57
- ads/opctl/config/diagnostics/__init__.py +0 -5
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +0 -62
- ads/opctl/config/merger.py +0 -255
- ads/opctl/config/resolver.py +0 -297
- ads/opctl/config/utils.py +0 -79
- ads/opctl/config/validator.py +0 -17
- ads/opctl/config/versioner.py +0 -68
- ads/opctl/config/yaml_parsers/__init__.py +0 -7
- ads/opctl/config/yaml_parsers/base.py +0 -58
- ads/opctl/config/yaml_parsers/distributed/__init__.py +0 -7
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +0 -201
- ads/opctl/constants.py +0 -66
- ads/opctl/decorator/__init__.py +0 -5
- ads/opctl/decorator/common.py +0 -129
- ads/opctl/diagnostics/__init__.py +0 -5
- ads/opctl/diagnostics/__main__.py +0 -25
- ads/opctl/diagnostics/check_distributed_job_requirements.py +0 -212
- ads/opctl/diagnostics/check_requirements.py +0 -144
- ads/opctl/diagnostics/requirement_exception.py +0 -9
- ads/opctl/distributed/README.md +0 -109
- ads/opctl/distributed/__init__.py +0 -5
- ads/opctl/distributed/certificates.py +0 -32
- ads/opctl/distributed/cli.py +0 -207
- ads/opctl/distributed/cmds.py +0 -731
- ads/opctl/distributed/common/__init__.py +0 -5
- ads/opctl/distributed/common/abstract_cluster_provider.py +0 -449
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +0 -88
- ads/opctl/distributed/common/cluster_config_helper.py +0 -103
- ads/opctl/distributed/common/cluster_provider_factory.py +0 -21
- ads/opctl/distributed/common/cluster_runner.py +0 -54
- ads/opctl/distributed/common/framework_factory.py +0 -29
- ads/opctl/docker/Dockerfile.job +0 -103
- ads/opctl/docker/Dockerfile.job.arm +0 -107
- ads/opctl/docker/Dockerfile.job.gpu +0 -175
- ads/opctl/docker/base-env.yaml +0 -13
- ads/opctl/docker/cuda.repo +0 -6
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +0 -41
- ads/opctl/docker/operator/Dockerfile.gpu +0 -85
- ads/opctl/docker/operator/cuda.repo +0 -6
- ads/opctl/docker/operator/environment.yaml +0 -8
- ads/opctl/forecast.py +0 -11
- ads/opctl/index.yaml +0 -3
- ads/opctl/model/__init__.py +0 -5
- ads/opctl/model/cli.py +0 -65
- ads/opctl/model/cmds.py +0 -73
- ads/opctl/operator/README.md +0 -4
- ads/opctl/operator/__init__.py +0 -31
- ads/opctl/operator/cli.py +0 -344
- ads/opctl/operator/cmd.py +0 -596
- ads/opctl/operator/common/__init__.py +0 -5
- ads/opctl/operator/common/backend_factory.py +0 -460
- ads/opctl/operator/common/const.py +0 -27
- ads/opctl/operator/common/data/synthetic.csv +0 -16001
- ads/opctl/operator/common/dictionary_merger.py +0 -148
- ads/opctl/operator/common/errors.py +0 -42
- ads/opctl/operator/common/operator_config.py +0 -99
- ads/opctl/operator/common/operator_loader.py +0 -811
- ads/opctl/operator/common/operator_schema.yaml +0 -130
- ads/opctl/operator/common/operator_yaml_generator.py +0 -152
- ads/opctl/operator/common/utils.py +0 -208
- ads/opctl/operator/lowcode/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/MLoperator +0 -16
- ads/opctl/operator/lowcode/anomaly/README.md +0 -207
- ads/opctl/operator/lowcode/anomaly/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/__main__.py +0 -103
- ads/opctl/operator/lowcode/anomaly/cmd.py +0 -35
- ads/opctl/operator/lowcode/anomaly/const.py +0 -167
- ads/opctl/operator/lowcode/anomaly/environment.yaml +0 -10
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +0 -146
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +0 -162
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +0 -99
- ads/opctl/operator/lowcode/anomaly/model/autots.py +0 -115
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +0 -404
- ads/opctl/operator/lowcode/anomaly/model/factory.py +0 -110
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +0 -78
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +0 -78
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +0 -120
- ads/opctl/operator/lowcode/anomaly/model/tods.py +0 -119
- ads/opctl/operator/lowcode/anomaly/operator_config.py +0 -127
- ads/opctl/operator/lowcode/anomaly/schema.yaml +0 -401
- ads/opctl/operator/lowcode/anomaly/utils.py +0 -88
- ads/opctl/operator/lowcode/common/__init__.py +0 -5
- ads/opctl/operator/lowcode/common/const.py +0 -10
- ads/opctl/operator/lowcode/common/data.py +0 -116
- ads/opctl/operator/lowcode/common/errors.py +0 -47
- ads/opctl/operator/lowcode/common/transformations.py +0 -296
- ads/opctl/operator/lowcode/common/utils.py +0 -293
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +0 -13
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +0 -30
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +0 -5
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +0 -116
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +0 -85
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +0 -15
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +0 -4
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +0 -32
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +0 -43
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +0 -120
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +0 -34
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +0 -386
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +0 -160
- ads/opctl/operator/lowcode/forecast/MLoperator +0 -25
- ads/opctl/operator/lowcode/forecast/README.md +0 -209
- ads/opctl/operator/lowcode/forecast/__init__.py +0 -5
- ads/opctl/operator/lowcode/forecast/__main__.py +0 -89
- ads/opctl/operator/lowcode/forecast/cmd.py +0 -40
- ads/opctl/operator/lowcode/forecast/const.py +0 -92
- ads/opctl/operator/lowcode/forecast/environment.yaml +0 -20
- ads/opctl/operator/lowcode/forecast/errors.py +0 -26
- ads/opctl/operator/lowcode/forecast/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/forecast/model/arima.py +0 -279
- ads/opctl/operator/lowcode/forecast/model/automlx.py +0 -542
- ads/opctl/operator/lowcode/forecast/model/autots.py +0 -312
- ads/opctl/operator/lowcode/forecast/model/base_model.py +0 -863
- ads/opctl/operator/lowcode/forecast/model/factory.py +0 -106
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +0 -492
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +0 -243
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +0 -486
- ads/opctl/operator/lowcode/forecast/model/prophet.py +0 -445
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +0 -244
- ads/opctl/operator/lowcode/forecast/operator_config.py +0 -234
- ads/opctl/operator/lowcode/forecast/schema.yaml +0 -506
- ads/opctl/operator/lowcode/forecast/utils.py +0 -413
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +0 -7
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +0 -285
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +0 -246
- ads/opctl/operator/lowcode/pii/MLoperator +0 -17
- ads/opctl/operator/lowcode/pii/README.md +0 -208
- ads/opctl/operator/lowcode/pii/__init__.py +0 -5
- ads/opctl/operator/lowcode/pii/__main__.py +0 -78
- ads/opctl/operator/lowcode/pii/cmd.py +0 -39
- ads/opctl/operator/lowcode/pii/constant.py +0 -84
- ads/opctl/operator/lowcode/pii/environment.yaml +0 -17
- ads/opctl/operator/lowcode/pii/errors.py +0 -27
- ads/opctl/operator/lowcode/pii/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/pii/model/factory.py +0 -82
- ads/opctl/operator/lowcode/pii/model/guardrails.py +0 -167
- ads/opctl/operator/lowcode/pii/model/pii.py +0 -145
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +0 -34
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +0 -34
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +0 -35
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +0 -225
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +0 -73
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +0 -26
- ads/opctl/operator/lowcode/pii/model/report.py +0 -487
- ads/opctl/operator/lowcode/pii/operator_config.py +0 -95
- ads/opctl/operator/lowcode/pii/schema.yaml +0 -108
- ads/opctl/operator/lowcode/pii/utils.py +0 -43
- ads/opctl/operator/lowcode/recommender/MLoperator +0 -16
- ads/opctl/operator/lowcode/recommender/README.md +0 -206
- ads/opctl/operator/lowcode/recommender/__init__.py +0 -5
- ads/opctl/operator/lowcode/recommender/__main__.py +0 -82
- ads/opctl/operator/lowcode/recommender/cmd.py +0 -33
- ads/opctl/operator/lowcode/recommender/constant.py +0 -30
- ads/opctl/operator/lowcode/recommender/environment.yaml +0 -11
- ads/opctl/operator/lowcode/recommender/model/base_model.py +0 -212
- ads/opctl/operator/lowcode/recommender/model/factory.py +0 -56
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +0 -25
- ads/opctl/operator/lowcode/recommender/model/svd.py +0 -106
- ads/opctl/operator/lowcode/recommender/operator_config.py +0 -81
- ads/opctl/operator/lowcode/recommender/schema.yaml +0 -265
- ads/opctl/operator/lowcode/recommender/utils.py +0 -13
- ads/opctl/operator/runtime/__init__.py +0 -5
- ads/opctl/operator/runtime/const.py +0 -17
- ads/opctl/operator/runtime/container_runtime_schema.yaml +0 -50
- ads/opctl/operator/runtime/marketplace_runtime.py +0 -50
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +0 -21
- ads/opctl/operator/runtime/python_runtime_schema.yaml +0 -21
- ads/opctl/operator/runtime/runtime.py +0 -115
- ads/opctl/schema.yaml.yml +0 -36
- ads/opctl/script.py +0 -40
- ads/opctl/spark/__init__.py +0 -5
- ads/opctl/spark/cli.py +0 -43
- ads/opctl/spark/cmds.py +0 -147
- ads/opctl/templates/diagnostic_report_template.jinja2 +0 -102
- ads/opctl/utils.py +0 -344
- ads/oracledb/__init__.py +0 -5
- ads/oracledb/oracle_db.py +0 -346
- ads/pipeline/__init__.py +0 -39
- ads/pipeline/ads_pipeline.py +0 -2279
- ads/pipeline/ads_pipeline_run.py +0 -772
- ads/pipeline/ads_pipeline_step.py +0 -605
- ads/pipeline/builders/__init__.py +0 -5
- ads/pipeline/builders/infrastructure/__init__.py +0 -5
- ads/pipeline/builders/infrastructure/custom_script.py +0 -32
- ads/pipeline/cli.py +0 -119
- ads/pipeline/extension.py +0 -291
- ads/pipeline/schema/__init__.py +0 -5
- ads/pipeline/schema/cs_step_schema.json +0 -35
- ads/pipeline/schema/ml_step_schema.json +0 -31
- ads/pipeline/schema/pipeline_schema.json +0 -71
- ads/pipeline/visualizer/__init__.py +0 -5
- ads/pipeline/visualizer/base.py +0 -570
- ads/pipeline/visualizer/graph_renderer.py +0 -272
- ads/pipeline/visualizer/text_renderer.py +0 -84
- ads/secrets/__init__.py +0 -11
- ads/secrets/adb.py +0 -386
- ads/secrets/auth_token.py +0 -86
- ads/secrets/big_data_service.py +0 -365
- ads/secrets/mysqldb.py +0 -149
- ads/secrets/oracledb.py +0 -160
- ads/secrets/secrets.py +0 -407
- ads/telemetry/__init__.py +0 -7
- ads/telemetry/base.py +0 -69
- ads/telemetry/client.py +0 -125
- ads/telemetry/telemetry.py +0 -257
- ads/templates/dataflow_pyspark.jinja2 +0 -13
- ads/templates/dataflow_sparksql.jinja2 +0 -22
- ads/templates/func.jinja2 +0 -20
- ads/templates/schemas/openapi.json +0 -1740
- ads/templates/score-pkl.jinja2 +0 -173
- ads/templates/score.jinja2 +0 -322
- ads/templates/score_embedding_onnx.jinja2 +0 -202
- ads/templates/score_generic.jinja2 +0 -165
- ads/templates/score_huggingface_pipeline.jinja2 +0 -217
- ads/templates/score_lightgbm.jinja2 +0 -185
- ads/templates/score_onnx.jinja2 +0 -407
- ads/templates/score_onnx_new.jinja2 +0 -473
- ads/templates/score_oracle_automl.jinja2 +0 -185
- ads/templates/score_pyspark.jinja2 +0 -154
- ads/templates/score_pytorch.jinja2 +0 -219
- ads/templates/score_scikit-learn.jinja2 +0 -184
- ads/templates/score_tensorflow.jinja2 +0 -184
- ads/templates/score_xgboost.jinja2 +0 -178
- ads/text_dataset/__init__.py +0 -5
- ads/text_dataset/backends.py +0 -211
- ads/text_dataset/dataset.py +0 -445
- ads/text_dataset/extractor.py +0 -207
- ads/text_dataset/options.py +0 -53
- ads/text_dataset/udfs.py +0 -22
- ads/text_dataset/utils.py +0 -49
- ads/type_discovery/__init__.py +0 -9
- ads/type_discovery/abstract_detector.py +0 -21
- ads/type_discovery/constant_detector.py +0 -41
- ads/type_discovery/continuous_detector.py +0 -54
- ads/type_discovery/credit_card_detector.py +0 -99
- ads/type_discovery/datetime_detector.py +0 -92
- ads/type_discovery/discrete_detector.py +0 -118
- ads/type_discovery/document_detector.py +0 -146
- ads/type_discovery/ip_detector.py +0 -68
- ads/type_discovery/latlon_detector.py +0 -90
- ads/type_discovery/phone_number_detector.py +0 -63
- ads/type_discovery/type_discovery_driver.py +0 -87
- ads/type_discovery/typed_feature.py +0 -594
- ads/type_discovery/unknown_detector.py +0 -41
- ads/type_discovery/zipcode_detector.py +0 -48
- ads/vault/__init__.py +0 -7
- ads/vault/vault.py +0 -237
- oracle_ads-2.13.8.dist-info/RECORD +0 -858
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/licenses/LICENSE.txt +0 -0
ads/dataset/sampled_dataset.py
DELETED
@@ -1,1050 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8; -*-
|
3
|
-
|
4
|
-
# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
|
5
|
-
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
-
|
7
|
-
import matplotlib
|
8
|
-
import matplotlib.pyplot as plt
|
9
|
-
import numpy as np
|
10
|
-
import pandas as pd
|
11
|
-
import itertools
|
12
|
-
|
13
|
-
from matplotlib.patches import BoxStyle
|
14
|
-
|
15
|
-
from ads.dataset.label_encoder import DataFrameLabelEncoder
|
16
|
-
from sklearn.feature_selection import SelectKBest
|
17
|
-
from sklearn.feature_selection import chi2
|
18
|
-
from sklearn.preprocessing import MinMaxScaler
|
19
|
-
|
20
|
-
import matplotlib.font_manager
|
21
|
-
from ads.common import utils, logger
|
22
|
-
from ads.dataset.helper import (
|
23
|
-
fix_column_names,
|
24
|
-
convert_columns,
|
25
|
-
get_feature_type,
|
26
|
-
convert_to_html,
|
27
|
-
)
|
28
|
-
from ads.dataset.plot import Plotting
|
29
|
-
from ads.dataset.progress import DummyProgressBar
|
30
|
-
from ads.dataset.timeseries import Timeseries
|
31
|
-
from ads.type_discovery.type_discovery_driver import TypeDiscoveryDriver
|
32
|
-
from ads.type_discovery.typed_feature import (
|
33
|
-
DateTimeTypedFeature,
|
34
|
-
ContinuousTypedFeature,
|
35
|
-
GISTypedFeature,
|
36
|
-
ConstantTypedFeature,
|
37
|
-
CreditCardTypedFeature,
|
38
|
-
ZipcodeTypedFeature,
|
39
|
-
PhoneNumberTypedFeature,
|
40
|
-
OrdinalTypedFeature,
|
41
|
-
CategoricalTypedFeature,
|
42
|
-
DocumentTypedFeature,
|
43
|
-
AddressTypedFeature,
|
44
|
-
)
|
45
|
-
from ads.common.decorator.runtime_dependency import (
|
46
|
-
runtime_dependency,
|
47
|
-
OptionalDependency,
|
48
|
-
)
|
49
|
-
|
50
|
-
NATURAL_EARTH_DATASET = "naturalearth_lowres"
|
51
|
-
|
52
|
-
|
53
|
-
class PandasDataset(object):
|
54
|
-
"""
|
55
|
-
This class provides APIs that can work on a sampled dataset.
|
56
|
-
"""
|
57
|
-
|
58
|
-
def __init__(
|
59
|
-
self,
|
60
|
-
sampled_df,
|
61
|
-
type_discovery=True,
|
62
|
-
types={},
|
63
|
-
metadata=None,
|
64
|
-
progress=DummyProgressBar(),
|
65
|
-
):
|
66
|
-
self.client = None
|
67
|
-
self.sampled_df = fix_column_names(sampled_df)
|
68
|
-
self.correlation = None
|
69
|
-
self.feature_dist_html_dict = {}
|
70
|
-
self.feature_types = metadata if metadata is not None else {}
|
71
|
-
self.world = None
|
72
|
-
|
73
|
-
self.numeric_columns = self.sampled_df.select_dtypes(
|
74
|
-
utils.numeric_pandas_dtypes()
|
75
|
-
).columns.values
|
76
|
-
|
77
|
-
# run type discovery
|
78
|
-
if len(self.feature_types) == 0:
|
79
|
-
if len(types) != 0:
|
80
|
-
# update feature types as it is for the types given by user
|
81
|
-
self._update_feature_types(types.keys())
|
82
|
-
if type_discovery:
|
83
|
-
try:
|
84
|
-
#
|
85
|
-
# perform type-discovery
|
86
|
-
#
|
87
|
-
progress.update("Running data type discovery")
|
88
|
-
tdd = TypeDiscoveryDriver()
|
89
|
-
self.feature_types = {
|
90
|
-
col_name: tdd.discover(col_name, col_vals)
|
91
|
-
for col_name, col_vals in self.sampled_df.items()
|
92
|
-
if col_name not in types
|
93
|
-
}
|
94
|
-
except Exception as e:
|
95
|
-
print(
|
96
|
-
f"An error occured while performing typed discovery on this dataset. Try running again with "
|
97
|
-
f"`type_discovery=False`"
|
98
|
-
)
|
99
|
-
raise e
|
100
|
-
|
101
|
-
# convert dataframe columns to the data types discovered.
|
102
|
-
self.sampled_df = convert_columns(self.sampled_df, self.feature_types)
|
103
|
-
|
104
|
-
# update feature types for rest of the columns - no type discovery, does not include user overrides
|
105
|
-
self._update_feature_types(
|
106
|
-
set(self.sampled_df.columns.values) - set(self.feature_types.keys())
|
107
|
-
)
|
108
|
-
self.sampled_df = self.sampled_df.reset_index(drop=True)
|
109
|
-
|
110
|
-
def _find_feature_subset(self, df, target_name, include_n_features=32):
|
111
|
-
if len(df.columns) <= include_n_features:
|
112
|
-
return self.sampled_df
|
113
|
-
else:
|
114
|
-
try:
|
115
|
-
y = df[target_name]
|
116
|
-
X = df.drop(columns=[target_name])
|
117
|
-
X_columns = X.columns
|
118
|
-
|
119
|
-
X = X.fillna(X.mode().iloc[0])
|
120
|
-
X = DataFrameLabelEncoder().fit_transform(X)
|
121
|
-
X = MinMaxScaler().fit_transform(X)
|
122
|
-
|
123
|
-
from sklearn.impute import SimpleImputer
|
124
|
-
|
125
|
-
imp_most_freq = SimpleImputer(strategy="most_frequent")
|
126
|
-
X = imp_most_freq.fit_transform(X)
|
127
|
-
|
128
|
-
est = SelectKBest(score_func=chi2, k=include_n_features)
|
129
|
-
est.fit_transform(X, y)
|
130
|
-
|
131
|
-
subset_features = [self.target.name] + list(
|
132
|
-
itertools.compress(X_columns, est.get_support())
|
133
|
-
)
|
134
|
-
|
135
|
-
return self.sampled_df.filter(subset_features, axis=1)
|
136
|
-
|
137
|
-
except Exception as e:
|
138
|
-
print("_find_feature_subset failed: ", str(e))
|
139
|
-
return pd.DataFrame()
|
140
|
-
|
141
|
-
def _update_multiple_outputs(self, out, msg):
|
142
|
-
if isinstance(out, (list, tuple)):
|
143
|
-
for o in out:
|
144
|
-
o.value = msg
|
145
|
-
else:
|
146
|
-
self._update_multiple_outputs([out], msg)
|
147
|
-
|
148
|
-
def _calculate_dataset_statistics(self, is_wide_dataset, out):
|
149
|
-
#
|
150
|
-
# first the missing values for non-wide datasets
|
151
|
-
#
|
152
|
-
df_missing = pd.DataFrame()
|
153
|
-
df_skew = pd.DataFrame()
|
154
|
-
|
155
|
-
if is_wide_dataset:
|
156
|
-
df_missing = pd.DataFrame() # empty dataframe when not calculating
|
157
|
-
df_skew = pd.DataFrame() # empty dataframe when not calculating
|
158
|
-
else:
|
159
|
-
#
|
160
|
-
# count missing values
|
161
|
-
#
|
162
|
-
d = {column_name: np.nan for column_name in self.df.columns} # default
|
163
|
-
for column_name in self.df.columns:
|
164
|
-
self._update_multiple_outputs(
|
165
|
-
out, f"calculating missing values (<code>{column_name}</code>)..."
|
166
|
-
)
|
167
|
-
d[column_name] = self.df[column_name].isna().sum()
|
168
|
-
|
169
|
-
df_missing = pd.DataFrame.from_dict(d, orient="index", columns=["missing"])
|
170
|
-
|
171
|
-
#
|
172
|
-
# calculate skew
|
173
|
-
#
|
174
|
-
d = {column_name: np.nan for column_name in self.df.columns} # default
|
175
|
-
for column_name in self.numeric_columns:
|
176
|
-
self._update_multiple_outputs(
|
177
|
-
out, f"calculating skew (<code>{column_name}</code>)..."
|
178
|
-
)
|
179
|
-
if len(self.df[column_name].dropna()) > 0:
|
180
|
-
d[column_name] = self.df[column_name].dropna().skew()
|
181
|
-
else:
|
182
|
-
d[column_name] = np.nan
|
183
|
-
df_skew = pd.DataFrame.from_dict(d, orient="index", columns=["skew"])
|
184
|
-
self._update_multiple_outputs(out, "calculating dataset statistics...")
|
185
|
-
for col in self.df.columns:
|
186
|
-
if df_missing["missing"][col] == len(self.df[col]):
|
187
|
-
self.df[col] = self.df[col].astype("object")
|
188
|
-
|
189
|
-
#
|
190
|
-
# compute descriptive dataset statistics
|
191
|
-
df_stats = self.df.describe(include="all").round(2)
|
192
|
-
|
193
|
-
self._update_multiple_outputs(out, "transpose dataset statistics...")
|
194
|
-
df_stats = df_stats.transpose()
|
195
|
-
|
196
|
-
self._update_multiple_outputs(out, "finalizing dataset statistics...")
|
197
|
-
df_stats = df_stats.fillna("")
|
198
|
-
|
199
|
-
#
|
200
|
-
# we join the stats with missing df if we computed that
|
201
|
-
#
|
202
|
-
self._update_multiple_outputs(
|
203
|
-
out, "Assembling statistics into single result..."
|
204
|
-
)
|
205
|
-
if not df_missing.empty:
|
206
|
-
df_stats = df_stats.join(df_missing).fillna("")
|
207
|
-
if not df_skew.empty:
|
208
|
-
df_stats = df_stats.join(df_skew).fillna("")
|
209
|
-
|
210
|
-
return df_stats
|
211
|
-
|
212
|
-
def _generate_features_html(
|
213
|
-
self, is_wide_dataset, n_features, df_stats, visualizations_follow
|
214
|
-
):
|
215
|
-
html = utils.get_bootstrap_styles()
|
216
|
-
|
217
|
-
if is_wide_dataset:
|
218
|
-
html += """<p>The dataset has too many columns ({:,}) to
|
219
|
-
efficiently show feature visualizations, instead only showing table of
|
220
|
-
statistics</p>""".format(
|
221
|
-
n_features
|
222
|
-
)
|
223
|
-
|
224
|
-
html += "<p><b>• Note</b> these are computed on the entire dataset.</p>"
|
225
|
-
html += "<hr>"
|
226
|
-
html += (
|
227
|
-
df_stats.replace(np.nan, "")
|
228
|
-
.style.set_table_styles(utils.get_dataframe_styles(max_width=125))
|
229
|
-
.set_table_attributes("class=table")
|
230
|
-
.format(
|
231
|
-
lambda x: "{:.8g}".format(x)
|
232
|
-
if ("float" in str(type(x))) or ("int" in str(type(x)))
|
233
|
-
else x
|
234
|
-
)
|
235
|
-
.to_html()
|
236
|
-
)
|
237
|
-
|
238
|
-
if visualizations_follow:
|
239
|
-
html += "<br><hr><h2>Feature Visualizations...</h2>"
|
240
|
-
|
241
|
-
return html
|
242
|
-
|
243
|
-
def _generate_warnings_html(
|
244
|
-
self, is_wide_dataset, n_rows, n_features, df_stats, out, accordion
|
245
|
-
):
|
246
|
-
#
|
247
|
-
# create the "Warnings" accordion section:
|
248
|
-
# - show high cardinal categoricals
|
249
|
-
# - show high missing values
|
250
|
-
# - large number of zeros (not for wide datasets - slow to compute)
|
251
|
-
#
|
252
|
-
#
|
253
|
-
|
254
|
-
accum = []
|
255
|
-
|
256
|
-
ignored_feature_types = ["document"]
|
257
|
-
|
258
|
-
# more than 5% missing is a warning
|
259
|
-
#
|
260
|
-
|
261
|
-
if "missing" in df_stats.columns:
|
262
|
-
df_missing = df_stats[["missing"]][df_stats["missing"] != ""]
|
263
|
-
if not df_missing.empty:
|
264
|
-
# ignore document types
|
265
|
-
for column_name, missing in df_missing.iterrows():
|
266
|
-
if (
|
267
|
-
self.feature_types[column_name]["type"]
|
268
|
-
not in ignored_feature_types
|
269
|
-
):
|
270
|
-
missing_values = float(missing[0])
|
271
|
-
missing_pct = 100 * missing_values / n_rows
|
272
|
-
if missing_pct >= 5:
|
273
|
-
accum.append(
|
274
|
-
{
|
275
|
-
"label": "missing",
|
276
|
-
"message": f"<code>{column_name}</code> has {missing_values} ({missing_pct:.1f}%) missing values. Consider remove the column or replace null values.",
|
277
|
-
}
|
278
|
-
)
|
279
|
-
if missing_pct == 100:
|
280
|
-
accum.append(
|
281
|
-
{
|
282
|
-
"label": "missing",
|
283
|
-
"message": f"<code>{column_name}</code> is excluded from correlation computation due to {missing_values} ({missing_pct:.1f}%) missing values.",
|
284
|
-
}
|
285
|
-
)
|
286
|
-
|
287
|
-
# abs skew > 20 skewness warning issues
|
288
|
-
#
|
289
|
-
|
290
|
-
if "skew" in df_stats.columns:
|
291
|
-
df_skew = df_stats[["skew"]][df_stats["skew"] != ""]
|
292
|
-
if not df_skew.empty:
|
293
|
-
# ignore document types
|
294
|
-
for column_name, skew in df_skew.iterrows():
|
295
|
-
if (
|
296
|
-
self.feature_types[column_name]["type"]
|
297
|
-
not in ignored_feature_types
|
298
|
-
):
|
299
|
-
skewness = float(skew[0])
|
300
|
-
if abs(skewness) >= 20:
|
301
|
-
accum.append(
|
302
|
-
{
|
303
|
-
"label": "skew",
|
304
|
-
"message": f"<code>{column_name}</code> has skew of {skewness:.3f}",
|
305
|
-
}
|
306
|
-
)
|
307
|
-
|
308
|
-
# high cardinality (> 15 unique values) is a warning
|
309
|
-
#
|
310
|
-
|
311
|
-
if "unique" in df_stats.columns:
|
312
|
-
for column_name, count in df_stats[["unique"]][
|
313
|
-
df_stats["unique"] != ""
|
314
|
-
].iterrows():
|
315
|
-
# ignore document types
|
316
|
-
if self.feature_types[column_name]["type"] not in ignored_feature_types:
|
317
|
-
out.value = (
|
318
|
-
f"Analyzing cadinalities (<code>{column_name}</code>)..."
|
319
|
-
)
|
320
|
-
unique = self.df[column_name].unique().shape[0]
|
321
|
-
if unique == n_rows:
|
322
|
-
accum.append(
|
323
|
-
{
|
324
|
-
"label": "high-cardinality",
|
325
|
-
"message": f"<code>{column_name}</code> has a high cardinality: every value is distinct",
|
326
|
-
}
|
327
|
-
)
|
328
|
-
elif unique > 15:
|
329
|
-
accum.append(
|
330
|
-
{
|
331
|
-
"label": "high-cardinality",
|
332
|
-
"message": f"<code>{column_name}</code> has a high cardinality: {unique} distinct values",
|
333
|
-
}
|
334
|
-
)
|
335
|
-
|
336
|
-
if not is_wide_dataset:
|
337
|
-
# more than 10% zeros is a warning
|
338
|
-
if "min" in df_stats.columns:
|
339
|
-
for column_name, count in df_stats[["min"]][
|
340
|
-
df_stats["min"] != ""
|
341
|
-
].iterrows():
|
342
|
-
if (
|
343
|
-
self.feature_types[column_name]["type"]
|
344
|
-
not in ignored_feature_types
|
345
|
-
):
|
346
|
-
out.value = f"Analyzing zeros (<code>{column_name}</code>)..."
|
347
|
-
# we've filtered on only columns that have a min value of 0 for speed
|
348
|
-
zeros = self.df[self.df[column_name] == 0].shape[0]
|
349
|
-
zeros_pct = 100 * zeros / n_rows
|
350
|
-
if zeros_pct >= 10:
|
351
|
-
accum.append(
|
352
|
-
{
|
353
|
-
"label": "zeros",
|
354
|
-
"message": f"<code>{column_name}</code> has {zeros} ({zeros_pct:.2f}%) zeros)",
|
355
|
-
}
|
356
|
-
)
|
357
|
-
|
358
|
-
#
|
359
|
-
# collect the warnings into an HTML presentation
|
360
|
-
#
|
361
|
-
|
362
|
-
out.value = "Assembling results..."
|
363
|
-
|
364
|
-
if accum:
|
365
|
-
html = utils.get_bootstrap_styles()
|
366
|
-
|
367
|
-
html += utils.highlight_text(f"{len(accum)} WARNING(S) found")
|
368
|
-
|
369
|
-
accordion.set_title(3, f"Warnings ({len(accum)})")
|
370
|
-
|
371
|
-
html += "<hr>"
|
372
|
-
|
373
|
-
tr_rows = [
|
374
|
-
f"""
|
375
|
-
<tr style="border-top:0">
|
376
|
-
<td>
|
377
|
-
{feature_warning['message']}
|
378
|
-
<td>
|
379
|
-
<span class="label label-{feature_warning['label']}">{feature_warning['label']}</span>
|
380
|
-
</td>
|
381
|
-
</tr>
|
382
|
-
""".strip()
|
383
|
-
for feature_warning in accum
|
384
|
-
]
|
385
|
-
|
386
|
-
html += """
|
387
|
-
<table style="width: 100%; max-width: 100%;">
|
388
|
-
<tbody>
|
389
|
-
{}
|
390
|
-
</tbody>
|
391
|
-
</table>""".format(
|
392
|
-
"\n".join(tr_rows)
|
393
|
-
)
|
394
|
-
|
395
|
-
return html
|
396
|
-
|
397
|
-
else:
|
398
|
-
return "<h3>No Feature warnings found</h3>"
|
399
|
-
|
400
|
-
def summary(self, feature_name=None):
|
401
|
-
"""
|
402
|
-
Display list of features & their datatypes.
|
403
|
-
Shows the column name and the feature's meta_data if given a specific feature name.
|
404
|
-
|
405
|
-
Parameters
|
406
|
-
----------
|
407
|
-
date_col: str
|
408
|
-
The name of the feature
|
409
|
-
|
410
|
-
Returns
|
411
|
-
-------
|
412
|
-
dict
|
413
|
-
a dictionary that contains requested information
|
414
|
-
"""
|
415
|
-
|
416
|
-
feature_n_datatype = {}
|
417
|
-
list_of_dfs = []
|
418
|
-
|
419
|
-
if feature_name is None:
|
420
|
-
feature_n_datatype = {
|
421
|
-
col: self.feature_types[col].type
|
422
|
-
+ "/"
|
423
|
-
+ self.feature_types[col].low_level_type
|
424
|
-
for col in self.sampled_df.columns
|
425
|
-
}
|
426
|
-
df = pd.DataFrame(
|
427
|
-
feature_n_datatype.items(), columns=["Feature", "Datatype"]
|
428
|
-
)
|
429
|
-
else:
|
430
|
-
if isinstance(feature_name, (list, tuple, pd.core.indexes.base.Index)):
|
431
|
-
feature_names = list(feature_name)
|
432
|
-
else:
|
433
|
-
feature_names = [feature_name]
|
434
|
-
|
435
|
-
for col in feature_names:
|
436
|
-
if col in self.sampled_df.columns:
|
437
|
-
feature_n_datatype[col] = {
|
438
|
-
k: v
|
439
|
-
for k, v in self.feature_types[col].meta_data.items()
|
440
|
-
if k not in ["internal", "feature_name"]
|
441
|
-
}
|
442
|
-
new_dict = utils.flatten(feature_n_datatype[col])
|
443
|
-
tmp_df = pd.DataFrame.from_dict(
|
444
|
-
new_dict,
|
445
|
-
orient="index",
|
446
|
-
columns=[feature_names[feature_names.index(str(col))]],
|
447
|
-
)
|
448
|
-
list_of_dfs.append(tmp_df)
|
449
|
-
else:
|
450
|
-
feature_n_datatype[col] = None
|
451
|
-
|
452
|
-
df = pd.concat(list_of_dfs, axis=1).transpose().fillna("-")
|
453
|
-
|
454
|
-
# get all the unique types from df
|
455
|
-
new_list_dfs = []
|
456
|
-
for t in df.type.unique():
|
457
|
-
new_list_dfs.append(df[df["type"] == t])
|
458
|
-
df = pd.concat(new_list_dfs)
|
459
|
-
|
460
|
-
# reorder columns in df
|
461
|
-
|
462
|
-
# get a list of columns
|
463
|
-
cols = list(df)
|
464
|
-
# move the column to head of list using index, pop and insert
|
465
|
-
cols.insert(0, cols.pop(cols.index("low_level_type")))
|
466
|
-
cols.insert(0, cols.pop(cols.index("type")))
|
467
|
-
df = df.loc[:, cols]
|
468
|
-
|
469
|
-
return df
|
470
|
-
|
471
|
-
def timeseries(self, date_col):
|
472
|
-
"""
|
473
|
-
Supports any plotting operations where x=datetime.
|
474
|
-
|
475
|
-
Parameters
|
476
|
-
----------
|
477
|
-
date_col: str
|
478
|
-
The name of the feature to plot
|
479
|
-
|
480
|
-
Returns
|
481
|
-
-------
|
482
|
-
func
|
483
|
-
a plotting object that contains a date column and dataframe
|
484
|
-
"""
|
485
|
-
|
486
|
-
if date_col in self.feature_types and isinstance(
|
487
|
-
self.feature_types[date_col], DateTimeTypedFeature
|
488
|
-
):
|
489
|
-
return Timeseries(date_col, self.sampled_df)
|
490
|
-
else:
|
491
|
-
raise ValueError("Not a date time column.")
|
492
|
-
|
493
|
-
def plot(
|
494
|
-
self, x, y=None, plot_type="infer", yscale=None, verbose=True, sample_size=0
|
495
|
-
):
|
496
|
-
"""
|
497
|
-
Supports plotting feature distribution, and relationship between features.
|
498
|
-
|
499
|
-
Parameters
|
500
|
-
----------
|
501
|
-
x: str
|
502
|
-
The name of the feature to plot
|
503
|
-
y: str, optional
|
504
|
-
Name of the feature to plot against x
|
505
|
-
plot_type: str, default: infer
|
506
|
-
Override the inferred plot type for certain combinations of the data types of x and y.
|
507
|
-
By default, the best plot type is inferred based on x and y data types.
|
508
|
-
Valid values:
|
509
|
-
|
510
|
-
- box_plot - discrete feature vs continuous feature. Draw a box plot to show
|
511
|
-
distributions with respect to categories,
|
512
|
-
- scatter - continuous feature vs continuous feature. Draw a scatter plot
|
513
|
-
with possibility of several semantic groupings.
|
514
|
-
|
515
|
-
yscale : str, optional
|
516
|
-
One of {"linear", "log", "symlog", "logit"}.
|
517
|
-
The y axis scale type to apply. Can be used when either x or y is an ordinal feature.
|
518
|
-
verbose: bool, default True
|
519
|
-
Displays Note/Tips if True
|
520
|
-
"""
|
521
|
-
sample_size = int(sample_size)
|
522
|
-
min_sample_size = 10000
|
523
|
-
if sample_size == 0:
|
524
|
-
sub_samp_size = len(self.sampled_df)
|
525
|
-
sub_samp_df = self.sampled_df
|
526
|
-
else:
|
527
|
-
sub_samp_size = max(min(sample_size, len(self.sampled_df)), min_sample_size)
|
528
|
-
sub_samp_df = self.sampled_df.sample(n=sub_samp_size)
|
529
|
-
plot = Plotting(
|
530
|
-
sub_samp_df, self.feature_types, x, y=y, plot_type=plot_type, yscale=yscale
|
531
|
-
)
|
532
|
-
if verbose:
|
533
|
-
if len(self.df) != sub_samp_size:
|
534
|
-
logger.info(f"Downsampling from dataset for graphing.")
|
535
|
-
return plot
|
536
|
-
|
537
|
-
@runtime_dependency(module="geopandas", install_from=OptionalDependency.GEO)
|
538
|
-
def plot_gis_scatter(self, lon="longitude", lat="latitude", ax=None):
|
539
|
-
"""
|
540
|
-
Supports plotting Choropleth maps
|
541
|
-
|
542
|
-
Parameters
|
543
|
-
----------
|
544
|
-
df: pandas dataframe
|
545
|
-
The dataframe to plot
|
546
|
-
x: str
|
547
|
-
The name of the feature to plot, usually the longitude
|
548
|
-
y: str
|
549
|
-
THe name of the feature to plot, usually the latitude
|
550
|
-
"""
|
551
|
-
if lon in self.sampled_df.columns and lat in self.sampled_df.columns:
|
552
|
-
if ax is None:
|
553
|
-
fig, ax = plt.subplots(1, figsize=(10, 10))
|
554
|
-
gdf = geopandas.GeoDataFrame(
|
555
|
-
self.sampled_df,
|
556
|
-
geometry=geopandas.points_from_xy(
|
557
|
-
self.sampled_df[lon], self.sampled_df[lat]
|
558
|
-
),
|
559
|
-
)
|
560
|
-
world = geopandas.read_file(
|
561
|
-
geopandas.datasets.get_path(NATURAL_EARTH_DATASET)
|
562
|
-
)
|
563
|
-
ax1 = world.plot(ax=ax, color="lightgrey", linewidth=0.5, edgecolor="white")
|
564
|
-
gdf.plot(ax=ax1, color="blue", markersize=10)
|
565
|
-
|
566
|
-
else:
|
567
|
-
if len(self.sampled_df.columns) > 0:
|
568
|
-
logger.info(
|
569
|
-
"The available latitude and longitude columns are: "
|
570
|
-
+ ", ".join(self.sampled_df.columns)
|
571
|
-
+ "."
|
572
|
-
)
|
573
|
-
else:
|
574
|
-
logger.info("There are no latitude and longitude columns available.")
|
575
|
-
|
576
|
-
"""
|
577
|
-
Internal methods
|
578
|
-
"""
|
579
|
-
|
580
|
-
def _update_feature_types(self, columns):
|
581
|
-
# Build feature types for columns which are not type discovered, by using the inferred type as it is
|
582
|
-
for column in columns:
|
583
|
-
self.feature_types[column] = get_feature_type(
|
584
|
-
column, self.sampled_df[column]
|
585
|
-
)
|
586
|
-
|
587
|
-
@runtime_dependency(module="geopandas", install_from=OptionalDependency.GEO)
|
588
|
-
def _visualize_feature_distribution(self, html_widget):
|
589
|
-
"""
|
590
|
-
This function is called once per dataset to generate html for feature distribution plots.
|
591
|
-
"""
|
592
|
-
if len(self.feature_dist_html_dict) > 0:
|
593
|
-
return self.feature_dist_html_dict
|
594
|
-
|
595
|
-
feature_dist_html = ""
|
596
|
-
figsize = (6.5, 2)
|
597
|
-
|
598
|
-
props = {
|
599
|
-
"boxstyle": BoxStyle("Round", pad=0),
|
600
|
-
"facecolor": "white",
|
601
|
-
"linestyle": "solid",
|
602
|
-
"linewidth": 0,
|
603
|
-
"edgecolor": "white",
|
604
|
-
}
|
605
|
-
|
606
|
-
font = {"size": 10}
|
607
|
-
|
608
|
-
matplotlib.rc("font", **font)
|
609
|
-
|
610
|
-
red_square = dict(markerfacecolor="r", marker="s")
|
611
|
-
|
612
|
-
blues = [
|
613
|
-
"#AED6F1",
|
614
|
-
"#85C1E9",
|
615
|
-
"#5DADE2",
|
616
|
-
"#3498DB",
|
617
|
-
"#2E86C1",
|
618
|
-
"#2874A6",
|
619
|
-
"#1B4F72",
|
620
|
-
]
|
621
|
-
bright_colors = ["red", "green", "blue", "yellow", "green"]
|
622
|
-
|
623
|
-
for col_index, col in enumerate(self.sampled_df.columns):
|
624
|
-
feature_metadata = self.feature_types[col].meta_data
|
625
|
-
text = "\n".join(
|
626
|
-
[
|
627
|
-
f"{col}\n",
|
628
|
-
" - type: {} ({})".format(
|
629
|
-
self.feature_types[col].type,
|
630
|
-
self.feature_types[col].low_level_type,
|
631
|
-
),
|
632
|
-
" - missing_percentage: {:.1f}%".format(
|
633
|
-
self.feature_types[col].missing_percentage
|
634
|
-
),
|
635
|
-
]
|
636
|
-
)
|
637
|
-
|
638
|
-
fig, ax = PandasDataset._init_fig_ax(figsize)
|
639
|
-
|
640
|
-
if isinstance(self.feature_types[col], ContinuousTypedFeature):
|
641
|
-
text += PandasDataset._format_stats(
|
642
|
-
self.feature_types[col].type, feature_metadata["stats"]
|
643
|
-
)
|
644
|
-
ax.text(
|
645
|
-
-1.1,
|
646
|
-
1,
|
647
|
-
text,
|
648
|
-
transform=ax.transAxes,
|
649
|
-
verticalalignment="top",
|
650
|
-
bbox=props,
|
651
|
-
)
|
652
|
-
self.sampled_df[col].plot(
|
653
|
-
kind="box",
|
654
|
-
vert=False,
|
655
|
-
flierprops=red_square,
|
656
|
-
ax=ax,
|
657
|
-
figsize=figsize,
|
658
|
-
)
|
659
|
-
|
660
|
-
elif isinstance(self.feature_types[col], DateTimeTypedFeature):
|
661
|
-
text += PandasDataset._format_stats(
|
662
|
-
self.feature_types[col].type, feature_metadata["stats"]
|
663
|
-
)
|
664
|
-
ax.text(
|
665
|
-
-1.1,
|
666
|
-
1,
|
667
|
-
text,
|
668
|
-
transform=ax.transAxes,
|
669
|
-
verticalalignment="top",
|
670
|
-
bbox=props,
|
671
|
-
)
|
672
|
-
from matplotlib.dates import AutoDateFormatter, AutoDateLocator
|
673
|
-
|
674
|
-
loc = AutoDateLocator(interval_multiples=False)
|
675
|
-
ax.xaxis.set_major_locator(loc)
|
676
|
-
ax.xaxis.set_major_formatter(AutoDateFormatter(loc))
|
677
|
-
self.sampled_df[col].hist(
|
678
|
-
bins=50,
|
679
|
-
grid=False,
|
680
|
-
xrot=45,
|
681
|
-
ax=ax,
|
682
|
-
rwidth=0.95,
|
683
|
-
color=blues[-1],
|
684
|
-
figsize=figsize,
|
685
|
-
)
|
686
|
-
|
687
|
-
elif isinstance(self.feature_types[col], GISTypedFeature):
|
688
|
-
text += PandasDataset._format_stats(
|
689
|
-
self.feature_types[col].type, feature_metadata["stats"]
|
690
|
-
)
|
691
|
-
ax.text(
|
692
|
-
-1.1,
|
693
|
-
1,
|
694
|
-
text,
|
695
|
-
transform=ax.transAxes,
|
696
|
-
verticalalignment="top",
|
697
|
-
bbox=props,
|
698
|
-
)
|
699
|
-
df = pd.DataFrame(
|
700
|
-
feature_metadata["internal"]["sample"], columns=["lat", "lon"]
|
701
|
-
)
|
702
|
-
gdf = geopandas.GeoDataFrame(
|
703
|
-
df, geometry=geopandas.points_from_xy(df["lon"], df["lat"])
|
704
|
-
)
|
705
|
-
|
706
|
-
if not self.world:
|
707
|
-
self.world = geopandas.read_file(
|
708
|
-
geopandas.datasets.get_path(NATURAL_EARTH_DATASET)
|
709
|
-
)
|
710
|
-
|
711
|
-
self.world.plot(
|
712
|
-
ax=ax, color="lightgrey", linewidth=0.5, edgecolor="white"
|
713
|
-
)
|
714
|
-
gdf.plot(ax=ax, color="blue", markersize=10)
|
715
|
-
ax.set_aspect("auto")
|
716
|
-
|
717
|
-
elif (
|
718
|
-
isinstance(self.feature_types[col], ConstantTypedFeature)
|
719
|
-
and feature_metadata["missing_percentage"] < 100
|
720
|
-
):
|
721
|
-
text += PandasDataset._format_stats(
|
722
|
-
self.feature_types[col].type, feature_metadata["stats"]
|
723
|
-
)
|
724
|
-
ax.text(
|
725
|
-
-1.1,
|
726
|
-
1,
|
727
|
-
text,
|
728
|
-
transform=ax.transAxes,
|
729
|
-
verticalalignment="top",
|
730
|
-
bbox=props,
|
731
|
-
)
|
732
|
-
pd.Series(
|
733
|
-
feature_metadata["internal"]["counts"],
|
734
|
-
name=col,
|
735
|
-
index=feature_metadata["internal"]["counts"].keys(),
|
736
|
-
).plot(kind="barh", ax=ax, width=0.95, figsize=figsize, color=["black"])
|
737
|
-
|
738
|
-
elif isinstance(self.feature_types[col], CreditCardTypedFeature):
|
739
|
-
text += PandasDataset._format_stats(
|
740
|
-
self.feature_types[col].type, feature_metadata["stats"]
|
741
|
-
)
|
742
|
-
ax.text(
|
743
|
-
-1.1,
|
744
|
-
1,
|
745
|
-
text,
|
746
|
-
transform=ax.transAxes,
|
747
|
-
verticalalignment="top",
|
748
|
-
bbox=props,
|
749
|
-
)
|
750
|
-
sorted_by_value = sorted(
|
751
|
-
feature_metadata["internal"]["counts"],
|
752
|
-
key=feature_metadata["internal"]["counts"].get,
|
753
|
-
reverse=True,
|
754
|
-
)
|
755
|
-
pd.Series(
|
756
|
-
feature_metadata["internal"]["counts"],
|
757
|
-
name=col,
|
758
|
-
index=sorted_by_value,
|
759
|
-
).plot(kind="bar", ax=ax, width=0.95, figsize=figsize, color=blues)
|
760
|
-
|
761
|
-
elif isinstance(self.feature_types[col], ZipcodeTypedFeature):
|
762
|
-
ax.text(
|
763
|
-
-1.1,
|
764
|
-
1,
|
765
|
-
text,
|
766
|
-
transform=ax.transAxes,
|
767
|
-
verticalalignment="top",
|
768
|
-
bbox=props,
|
769
|
-
)
|
770
|
-
pd.Series(
|
771
|
-
feature_metadata["internal"]["histogram"],
|
772
|
-
name=col,
|
773
|
-
index=feature_metadata["internal"]["histogram"].keys(),
|
774
|
-
).plot(kind="bar", ax=ax, figsize=figsize, color=blues)
|
775
|
-
|
776
|
-
elif isinstance(self.feature_types[col], PhoneNumberTypedFeature):
|
777
|
-
text += PandasDataset._format_stats(
|
778
|
-
self.feature_types[col].type, feature_metadata["stats"]
|
779
|
-
)
|
780
|
-
ax.text(
|
781
|
-
-1.1,
|
782
|
-
1,
|
783
|
-
text,
|
784
|
-
transform=ax.transAxes,
|
785
|
-
verticalalignment="top",
|
786
|
-
bbox=props,
|
787
|
-
)
|
788
|
-
pd.Series(
|
789
|
-
feature_metadata["internal"]["counts"],
|
790
|
-
name=col,
|
791
|
-
index=feature_metadata["internal"]["counts"].keys(),
|
792
|
-
).plot(kind="bar", ax=ax, figsize=figsize, color=blues)
|
793
|
-
|
794
|
-
elif isinstance(self.feature_types[col], OrdinalTypedFeature):
|
795
|
-
text += PandasDataset._format_stats(
|
796
|
-
self.feature_types[col].type, feature_metadata["stats"]
|
797
|
-
)
|
798
|
-
high_cardinality = feature_metadata["internal"]["high_cardinality"]
|
799
|
-
very_high_cardinality = feature_metadata["internal"][
|
800
|
-
"very_high_cardinality"
|
801
|
-
]
|
802
|
-
|
803
|
-
ax.text(
|
804
|
-
-1.1,
|
805
|
-
1,
|
806
|
-
text,
|
807
|
-
transform=ax.transAxes,
|
808
|
-
verticalalignment="top",
|
809
|
-
bbox=props,
|
810
|
-
)
|
811
|
-
|
812
|
-
if very_high_cardinality:
|
813
|
-
addrtext = "Samples:\n\n"
|
814
|
-
addrtext += ", ".join(
|
815
|
-
feature_metadata["internal"]["counts"]
|
816
|
-
.keys()
|
817
|
-
.astype(str)
|
818
|
-
.to_list()[:6]
|
819
|
-
)
|
820
|
-
|
821
|
-
ax.text(
|
822
|
-
0.05,
|
823
|
-
0.95,
|
824
|
-
addrtext,
|
825
|
-
transform=ax.transAxes,
|
826
|
-
fontsize=10,
|
827
|
-
verticalalignment="top",
|
828
|
-
bbox=props,
|
829
|
-
)
|
830
|
-
ax.axis("off")
|
831
|
-
|
832
|
-
else:
|
833
|
-
if high_cardinality:
|
834
|
-
text += (
|
835
|
-
"\n\n NOTE: plot has been capped\n from %d to show only most\n common top %d "
|
836
|
-
"categories"
|
837
|
-
% (
|
838
|
-
feature_metadata["internal"]["unique"],
|
839
|
-
len(feature_metadata["internal"]["counts"].keys()),
|
840
|
-
)
|
841
|
-
)
|
842
|
-
|
843
|
-
if feature_metadata["internal"]["unique"] < 24:
|
844
|
-
pd.Series(
|
845
|
-
feature_metadata["internal"]["counts"],
|
846
|
-
name=col,
|
847
|
-
index=feature_metadata["internal"]["counts"].keys(),
|
848
|
-
).plot(
|
849
|
-
kind="bar",
|
850
|
-
ax=ax,
|
851
|
-
width=0.90,
|
852
|
-
color=blues[-1],
|
853
|
-
figsize=figsize,
|
854
|
-
)
|
855
|
-
else:
|
856
|
-
self.sampled_df[col].plot(
|
857
|
-
kind="hist",
|
858
|
-
grid=False,
|
859
|
-
rwidth=0.95,
|
860
|
-
ax=ax,
|
861
|
-
color=blues[-1],
|
862
|
-
figsize=figsize,
|
863
|
-
)
|
864
|
-
|
865
|
-
elif isinstance(self.feature_types[col], CategoricalTypedFeature):
|
866
|
-
text += PandasDataset._format_stats(
|
867
|
-
self.feature_types[col].type, feature_metadata["stats"]
|
868
|
-
)
|
869
|
-
|
870
|
-
high_cardinality = feature_metadata["internal"]["high_cardinality"]
|
871
|
-
very_high_cardinality = feature_metadata["internal"][
|
872
|
-
"very_high_cardinality"
|
873
|
-
]
|
874
|
-
|
875
|
-
ax.text(
|
876
|
-
-1.1,
|
877
|
-
1,
|
878
|
-
text,
|
879
|
-
transform=ax.transAxes,
|
880
|
-
verticalalignment="top",
|
881
|
-
bbox=props,
|
882
|
-
)
|
883
|
-
|
884
|
-
if very_high_cardinality:
|
885
|
-
# grab the first few examples as strings
|
886
|
-
addrtext = "Samples:\n\n"
|
887
|
-
addrtext += "\n".join(
|
888
|
-
[
|
889
|
-
utils.ellipsis_strings(x, 65)
|
890
|
-
for x in feature_metadata["internal"]["counts"]
|
891
|
-
.keys()
|
892
|
-
.astype(str)
|
893
|
-
.to_list()[:3]
|
894
|
-
]
|
895
|
-
)
|
896
|
-
ax.text(
|
897
|
-
0.05,
|
898
|
-
0.95,
|
899
|
-
addrtext,
|
900
|
-
transform=ax.transAxes,
|
901
|
-
fontsize=10,
|
902
|
-
verticalalignment="top",
|
903
|
-
bbox=props,
|
904
|
-
)
|
905
|
-
ax.axis("off")
|
906
|
-
|
907
|
-
else:
|
908
|
-
text += PandasDataset._format_stats(
|
909
|
-
self.feature_types[col].type, feature_metadata["stats"]
|
910
|
-
)
|
911
|
-
if high_cardinality:
|
912
|
-
text += (
|
913
|
-
"\n\n NOTE: plot has been capped\n to show only most\n common top %d categories"
|
914
|
-
% (len(feature_metadata["internal"]["counts"].keys()))
|
915
|
-
)
|
916
|
-
|
917
|
-
if feature_metadata["internal"]["unique"] == 2:
|
918
|
-
#
|
919
|
-
# binary
|
920
|
-
#
|
921
|
-
count_series = pd.Series(
|
922
|
-
feature_metadata["internal"]["counts"],
|
923
|
-
name=col,
|
924
|
-
index=feature_metadata["internal"]["counts"].keys(),
|
925
|
-
).astype(float)
|
926
|
-
ax1 = count_series.plot(
|
927
|
-
kind="barh",
|
928
|
-
ax=ax,
|
929
|
-
width=0.95,
|
930
|
-
figsize=figsize,
|
931
|
-
color=[blues[0], blues[-1]],
|
932
|
-
)
|
933
|
-
# x_labels = utils.ellipsis_strings(feature_metadata['internal']['counts'].keys().astype(str))
|
934
|
-
# ax1.set_xticklabels(x_labels)
|
935
|
-
else:
|
936
|
-
#
|
937
|
-
# multiclass, potentially high cardinality
|
938
|
-
#
|
939
|
-
|
940
|
-
ax1 = pd.Series(
|
941
|
-
feature_metadata["internal"]["counts"],
|
942
|
-
name=col,
|
943
|
-
index=feature_metadata["internal"]["counts"].keys(),
|
944
|
-
).plot(
|
945
|
-
kind="bar", ax=ax, width=0.95, color=blues, figsize=figsize
|
946
|
-
)
|
947
|
-
|
948
|
-
x_labels = utils.ellipsis_strings(
|
949
|
-
feature_metadata["internal"]["counts"].keys()
|
950
|
-
)
|
951
|
-
ax1.set_xticklabels(x_labels)
|
952
|
-
|
953
|
-
elif isinstance(
|
954
|
-
self.feature_types[col], DocumentTypedFeature
|
955
|
-
) or isinstance(self.feature_types[col], AddressTypedFeature):
|
956
|
-
text += PandasDataset._format_stats(
|
957
|
-
self.feature_types[col].type,
|
958
|
-
{k: v for k, v in feature_metadata["stats"].items()},
|
959
|
-
)
|
960
|
-
|
961
|
-
ax.text(
|
962
|
-
-1.1,
|
963
|
-
1,
|
964
|
-
text,
|
965
|
-
transform=ax.transAxes,
|
966
|
-
verticalalignment="top",
|
967
|
-
bbox=props,
|
968
|
-
)
|
969
|
-
if "word_frequencies" in feature_metadata["internal"]:
|
970
|
-
word_freqs = feature_metadata["internal"]["word_frequencies"]
|
971
|
-
stats = "\n".join([" - word count: %d" % (len(word_freqs.keys()))])
|
972
|
-
text = text + "\n"
|
973
|
-
text += stats
|
974
|
-
try:
|
975
|
-
from wordcloud import WordCloud, STOPWORDS
|
976
|
-
|
977
|
-
wordcloud = WordCloud(
|
978
|
-
width=1000,
|
979
|
-
height=int(1000 * (figsize[1] / figsize[0])),
|
980
|
-
background_color="white",
|
981
|
-
stopwords=set(STOPWORDS),
|
982
|
-
max_words=50,
|
983
|
-
max_font_size=75,
|
984
|
-
).fit_words(word_freqs)
|
985
|
-
|
986
|
-
plt.imshow(wordcloud, interpolation="bilinear")
|
987
|
-
plt.axis("off")
|
988
|
-
except ModuleNotFoundError as e:
|
989
|
-
utils._log_missing_module("wordcloud", OptionalDependency.TEXT)
|
990
|
-
logger.info(
|
991
|
-
"The text word cloud is not plotted due to missing dependency wordcloud."
|
992
|
-
)
|
993
|
-
|
994
|
-
else:
|
995
|
-
ax.text(
|
996
|
-
-1.1,
|
997
|
-
1,
|
998
|
-
text,
|
999
|
-
transform=ax.transAxes,
|
1000
|
-
verticalalignment="top",
|
1001
|
-
bbox=props,
|
1002
|
-
weight="bold",
|
1003
|
-
)
|
1004
|
-
if feature_metadata["missing_percentage"] == 100:
|
1005
|
-
addrtext = "NOTE: plot has been disabled as all values in this column are missing."
|
1006
|
-
else:
|
1007
|
-
addrtext = "NOTE: plot has been disabled,\nfor features of unknown type\nno visualization is available"
|
1008
|
-
ax.text(
|
1009
|
-
0.05,
|
1010
|
-
0.95,
|
1011
|
-
addrtext,
|
1012
|
-
transform=ax.transAxes,
|
1013
|
-
fontsize=14,
|
1014
|
-
verticalalignment="top",
|
1015
|
-
bbox=props,
|
1016
|
-
weight="bold",
|
1017
|
-
)
|
1018
|
-
ax.axis("off")
|
1019
|
-
|
1020
|
-
self.feature_dist_html_dict[col] = convert_to_html(plt)
|
1021
|
-
plt.close()
|
1022
|
-
|
1023
|
-
html_widget.value += self.feature_dist_html_dict[col]
|
1024
|
-
|
1025
|
-
@staticmethod
|
1026
|
-
def _init_fig_ax(figsize, dpi=288):
|
1027
|
-
fig, ax = plt.subplots(figsize=figsize, dpi=dpi)
|
1028
|
-
fig.set(facecolor="white")
|
1029
|
-
return fig, ax
|
1030
|
-
|
1031
|
-
@staticmethod
|
1032
|
-
def _format_stats(feature_type_name, stats):
|
1033
|
-
text = "\n - %s statistics:" % (feature_type_name)
|
1034
|
-
for k in list(stats.keys()):
|
1035
|
-
if "percentage" in k:
|
1036
|
-
text += "\n - {}: {:.3f}%".format(k, stats[k])
|
1037
|
-
elif isinstance(stats[k], (int, np.int64)) or (
|
1038
|
-
isinstance(stats[k], float)
|
1039
|
-
and not np.isnan(stats[k])
|
1040
|
-
and stats[k] == int(stats[k])
|
1041
|
-
):
|
1042
|
-
text += "\n - {}: {:,}".format(k, int(stats[k]))
|
1043
|
-
elif isinstance(stats[k], bool):
|
1044
|
-
text += "\n - {}: {s}".format(k, "yes" if stats[k] else "no")
|
1045
|
-
elif isinstance(stats[k], (float, np.float64)):
|
1046
|
-
text += "\n - {}: {:.3f}".format(k, stats[k])
|
1047
|
-
else:
|
1048
|
-
text += "\n - {}: {}".format(k, stats[k])
|
1049
|
-
|
1050
|
-
return text
|