oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +40 -0
- ads/aqua/app.py +507 -0
- ads/aqua/cli.py +96 -0
- ads/aqua/client/__init__.py +3 -0
- ads/aqua/client/client.py +836 -0
- ads/aqua/client/openai_client.py +305 -0
- ads/aqua/common/__init__.py +5 -0
- ads/aqua/common/decorator.py +125 -0
- ads/aqua/common/entities.py +274 -0
- ads/aqua/common/enums.py +134 -0
- ads/aqua/common/errors.py +109 -0
- ads/aqua/common/utils.py +1295 -0
- ads/aqua/config/__init__.py +4 -0
- ads/aqua/config/container_config.py +246 -0
- ads/aqua/config/evaluation/__init__.py +4 -0
- ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
- ads/aqua/config/utils/__init__.py +4 -0
- ads/aqua/config/utils/serializer.py +339 -0
- ads/aqua/constants.py +116 -0
- ads/aqua/data.py +14 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation/__init__.py +8 -0
- ads/aqua/evaluation/constants.py +53 -0
- ads/aqua/evaluation/entities.py +186 -0
- ads/aqua/evaluation/errors.py +70 -0
- ads/aqua/evaluation/evaluation.py +1814 -0
- ads/aqua/extension/__init__.py +42 -0
- ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
- ads/aqua/extension/base_handler.py +90 -0
- ads/aqua/extension/common_handler.py +121 -0
- ads/aqua/extension/common_ws_msg_handler.py +36 -0
- ads/aqua/extension/deployment_handler.py +381 -0
- ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
- ads/aqua/extension/errors.py +30 -0
- ads/aqua/extension/evaluation_handler.py +129 -0
- ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
- ads/aqua/extension/finetune_handler.py +96 -0
- ads/aqua/extension/model_handler.py +390 -0
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +145 -0
- ads/aqua/extension/models_ws_msg_handler.py +50 -0
- ads/aqua/extension/ui_handler.py +300 -0
- ads/aqua/extension/ui_websocket_handler.py +130 -0
- ads/aqua/extension/utils.py +133 -0
- ads/aqua/finetuning/__init__.py +7 -0
- ads/aqua/finetuning/constants.py +23 -0
- ads/aqua/finetuning/entities.py +181 -0
- ads/aqua/finetuning/finetuning.py +749 -0
- ads/aqua/model/__init__.py +8 -0
- ads/aqua/model/constants.py +60 -0
- ads/aqua/model/entities.py +385 -0
- ads/aqua/model/enums.py +32 -0
- ads/aqua/model/model.py +2134 -0
- ads/aqua/model/utils.py +52 -0
- ads/aqua/modeldeployment/__init__.py +6 -0
- ads/aqua/modeldeployment/constants.py +10 -0
- ads/aqua/modeldeployment/deployment.py +1315 -0
- ads/aqua/modeldeployment/entities.py +653 -0
- ads/aqua/modeldeployment/utils.py +543 -0
- ads/aqua/resources/gpu_shapes_index.json +94 -0
- ads/aqua/server/__init__.py +4 -0
- ads/aqua/server/__main__.py +24 -0
- ads/aqua/server/app.py +47 -0
- ads/aqua/server/aqua_spec.yml +1291 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +476 -0
- ads/aqua/ui.py +519 -0
- ads/automl/__init__.py +9 -0
- ads/automl/driver.py +330 -0
- ads/automl/provider.py +975 -0
- ads/bds/__init__.py +5 -0
- ads/bds/auth.py +127 -0
- ads/bds/big_data_service.py +255 -0
- ads/catalog/__init__.py +19 -0
- ads/catalog/model.py +1576 -0
- ads/catalog/notebook.py +461 -0
- ads/catalog/project.py +468 -0
- ads/catalog/summary.py +178 -0
- ads/common/__init__.py +11 -0
- ads/common/analyzer.py +65 -0
- ads/common/artifact/.model-ignore +63 -0
- ads/common/artifact/__init__.py +10 -0
- ads/common/auth.py +1122 -0
- ads/common/card_identifier.py +83 -0
- ads/common/config.py +647 -0
- ads/common/data.py +165 -0
- ads/common/decorator/__init__.py +9 -0
- ads/common/decorator/argument_to_case.py +88 -0
- ads/common/decorator/deprecate.py +69 -0
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/decorator/runtime_dependency.py +178 -0
- ads/common/decorator/threaded.py +97 -0
- ads/common/decorator/utils.py +35 -0
- ads/common/dsc_file_system.py +303 -0
- ads/common/error.py +14 -0
- ads/common/extended_enum.py +81 -0
- ads/common/function/__init__.py +5 -0
- ads/common/function/fn_util.py +142 -0
- ads/common/function/func_conf.yaml +25 -0
- ads/common/ipython.py +76 -0
- ads/common/model.py +679 -0
- ads/common/model_artifact.py +1759 -0
- ads/common/model_artifact_schema.json +107 -0
- ads/common/model_export_util.py +664 -0
- ads/common/model_metadata.py +24 -0
- ads/common/object_storage_details.py +296 -0
- ads/common/oci_client.py +179 -0
- ads/common/oci_datascience.py +46 -0
- ads/common/oci_logging.py +1144 -0
- ads/common/oci_mixin.py +957 -0
- ads/common/oci_resource.py +136 -0
- ads/common/serializer.py +559 -0
- ads/common/utils.py +1852 -0
- ads/common/word_lists.py +1491 -0
- ads/common/work_request.py +189 -0
- ads/config.py +1 -0
- ads/data_labeling/__init__.py +13 -0
- ads/data_labeling/boundingbox.py +253 -0
- ads/data_labeling/constants.py +47 -0
- ads/data_labeling/data_labeling_service.py +244 -0
- ads/data_labeling/interface/__init__.py +5 -0
- ads/data_labeling/interface/loader.py +16 -0
- ads/data_labeling/interface/parser.py +16 -0
- ads/data_labeling/interface/reader.py +23 -0
- ads/data_labeling/loader/__init__.py +5 -0
- ads/data_labeling/loader/file_loader.py +241 -0
- ads/data_labeling/metadata.py +110 -0
- ads/data_labeling/mixin/__init__.py +5 -0
- ads/data_labeling/mixin/data_labeling.py +232 -0
- ads/data_labeling/ner.py +129 -0
- ads/data_labeling/parser/__init__.py +5 -0
- ads/data_labeling/parser/dls_record_parser.py +388 -0
- ads/data_labeling/parser/export_metadata_parser.py +94 -0
- ads/data_labeling/parser/export_record_parser.py +473 -0
- ads/data_labeling/reader/__init__.py +5 -0
- ads/data_labeling/reader/dataset_reader.py +574 -0
- ads/data_labeling/reader/dls_record_reader.py +121 -0
- ads/data_labeling/reader/export_record_reader.py +62 -0
- ads/data_labeling/reader/jsonl_reader.py +75 -0
- ads/data_labeling/reader/metadata_reader.py +203 -0
- ads/data_labeling/reader/record_reader.py +263 -0
- ads/data_labeling/record.py +52 -0
- ads/data_labeling/visualizer/__init__.py +5 -0
- ads/data_labeling/visualizer/image_visualizer.py +525 -0
- ads/data_labeling/visualizer/text_visualizer.py +357 -0
- ads/database/__init__.py +5 -0
- ads/database/connection.py +338 -0
- ads/dataset/__init__.py +10 -0
- ads/dataset/capabilities.md +51 -0
- ads/dataset/classification_dataset.py +339 -0
- ads/dataset/correlation.py +226 -0
- ads/dataset/correlation_plot.py +563 -0
- ads/dataset/dask_series.py +173 -0
- ads/dataset/dataframe_transformer.py +110 -0
- ads/dataset/dataset.py +1979 -0
- ads/dataset/dataset_browser.py +360 -0
- ads/dataset/dataset_with_target.py +995 -0
- ads/dataset/exception.py +25 -0
- ads/dataset/factory.py +987 -0
- ads/dataset/feature_engineering_transformer.py +35 -0
- ads/dataset/feature_selection.py +107 -0
- ads/dataset/forecasting_dataset.py +26 -0
- ads/dataset/helper.py +1450 -0
- ads/dataset/label_encoder.py +99 -0
- ads/dataset/mixin/__init__.py +5 -0
- ads/dataset/mixin/dataset_accessor.py +134 -0
- ads/dataset/pipeline.py +58 -0
- ads/dataset/plot.py +710 -0
- ads/dataset/progress.py +86 -0
- ads/dataset/recommendation.py +297 -0
- ads/dataset/recommendation_transformer.py +502 -0
- ads/dataset/regression_dataset.py +14 -0
- ads/dataset/sampled_dataset.py +1050 -0
- ads/dataset/target.py +98 -0
- ads/dataset/timeseries.py +18 -0
- ads/dbmixin/__init__.py +5 -0
- ads/dbmixin/db_pandas_accessor.py +153 -0
- ads/environment/__init__.py +9 -0
- ads/environment/ml_runtime.py +66 -0
- ads/evaluations/README.md +14 -0
- ads/evaluations/__init__.py +109 -0
- ads/evaluations/evaluation_plot.py +983 -0
- ads/evaluations/evaluator.py +1334 -0
- ads/evaluations/statistical_metrics.py +543 -0
- ads/experiments/__init__.py +9 -0
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +21 -0
- ads/explanations/base_explainer.py +142 -0
- ads/explanations/capabilities.md +83 -0
- ads/explanations/explainer.py +190 -0
- ads/explanations/mlx_global_explainer.py +1050 -0
- ads/explanations/mlx_interface.py +386 -0
- ads/explanations/mlx_local_explainer.py +287 -0
- ads/explanations/mlx_whatif_explainer.py +201 -0
- ads/feature_engineering/__init__.py +20 -0
- ads/feature_engineering/accessor/__init__.py +5 -0
- ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
- ads/feature_engineering/accessor/mixin/__init__.py +5 -0
- ads/feature_engineering/accessor/mixin/correlation.py +166 -0
- ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
- ads/feature_engineering/accessor/mixin/utils.py +65 -0
- ads/feature_engineering/accessor/series_accessor.py +431 -0
- ads/feature_engineering/adsimage/__init__.py +5 -0
- ads/feature_engineering/adsimage/image.py +192 -0
- ads/feature_engineering/adsimage/image_reader.py +170 -0
- ads/feature_engineering/adsimage/interface/__init__.py +5 -0
- ads/feature_engineering/adsimage/interface/reader.py +19 -0
- ads/feature_engineering/adsstring/__init__.py +7 -0
- ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
- ads/feature_engineering/adsstring/string/__init__.py +8 -0
- ads/feature_engineering/data_schema.json +57 -0
- ads/feature_engineering/dataset/__init__.py +5 -0
- ads/feature_engineering/dataset/zip_code_data.py +42062 -0
- ads/feature_engineering/exceptions.py +40 -0
- ads/feature_engineering/feature_type/__init__.py +133 -0
- ads/feature_engineering/feature_type/address.py +184 -0
- ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
- ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
- ads/feature_engineering/feature_type/adsstring/string.py +258 -0
- ads/feature_engineering/feature_type/base.py +58 -0
- ads/feature_engineering/feature_type/boolean.py +183 -0
- ads/feature_engineering/feature_type/category.py +146 -0
- ads/feature_engineering/feature_type/constant.py +137 -0
- ads/feature_engineering/feature_type/continuous.py +151 -0
- ads/feature_engineering/feature_type/creditcard.py +314 -0
- ads/feature_engineering/feature_type/datetime.py +190 -0
- ads/feature_engineering/feature_type/discrete.py +134 -0
- ads/feature_engineering/feature_type/document.py +43 -0
- ads/feature_engineering/feature_type/gis.py +251 -0
- ads/feature_engineering/feature_type/handler/__init__.py +5 -0
- ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
- ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
- ads/feature_engineering/feature_type/handler/warnings.py +128 -0
- ads/feature_engineering/feature_type/integer.py +142 -0
- ads/feature_engineering/feature_type/ip_address.py +144 -0
- ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
- ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
- ads/feature_engineering/feature_type/lat_long.py +256 -0
- ads/feature_engineering/feature_type/object.py +43 -0
- ads/feature_engineering/feature_type/ordinal.py +132 -0
- ads/feature_engineering/feature_type/phone_number.py +135 -0
- ads/feature_engineering/feature_type/string.py +171 -0
- ads/feature_engineering/feature_type/text.py +93 -0
- ads/feature_engineering/feature_type/unknown.py +43 -0
- ads/feature_engineering/feature_type/zip_code.py +164 -0
- ads/feature_engineering/feature_type_manager.py +406 -0
- ads/feature_engineering/schema.py +795 -0
- ads/feature_engineering/utils.py +245 -0
- ads/feature_store/.readthedocs.yaml +19 -0
- ads/feature_store/README.md +65 -0
- ads/feature_store/__init__.py +9 -0
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +339 -0
- ads/feature_store/common/exceptions.py +18 -0
- ads/feature_store/common/spark_session_singleton.py +125 -0
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
- ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
- ads/feature_store/common/utils/transformation_utils.py +82 -0
- ads/feature_store/common/utils/utility.py +403 -0
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +129 -0
- ads/feature_store/dataset.py +1230 -0
- ads/feature_store/dataset_job.py +530 -0
- ads/feature_store/docs/Dockerfile +7 -0
- ads/feature_store/docs/Makefile +44 -0
- ads/feature_store/docs/conf.py +28 -0
- ads/feature_store/docs/requirements.txt +14 -0
- ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
- ads/feature_store/docs/source/cicd.rst +137 -0
- ads/feature_store/docs/source/conf.py +86 -0
- ads/feature_store/docs/source/data_versioning.rst +33 -0
- ads/feature_store/docs/source/dataset.rst +388 -0
- ads/feature_store/docs/source/dataset_job.rst +27 -0
- ads/feature_store/docs/source/demo.rst +70 -0
- ads/feature_store/docs/source/entity.rst +78 -0
- ads/feature_store/docs/source/feature_group.rst +624 -0
- ads/feature_store/docs/source/feature_group_job.rst +29 -0
- ads/feature_store/docs/source/feature_store.rst +122 -0
- ads/feature_store/docs/source/feature_store_class.rst +123 -0
- ads/feature_store/docs/source/feature_validation.rst +66 -0
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +81 -0
- ads/feature_store/docs/source/module.rst +8 -0
- ads/feature_store/docs/source/notebook.rst +94 -0
- ads/feature_store/docs/source/overview.rst +47 -0
- ads/feature_store/docs/source/quickstart.rst +176 -0
- ads/feature_store/docs/source/release_notes.rst +194 -0
- ads/feature_store/docs/source/setup_feature_store.rst +81 -0
- ads/feature_store/docs/source/statistics.rst +58 -0
- ads/feature_store/docs/source/transformation.rst +199 -0
- ads/feature_store/docs/source/ui.rst +65 -0
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
- ads/feature_store/entity.py +718 -0
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
- ads/feature_store/execution_strategy/execution_strategy.py +113 -0
- ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
- ads/feature_store/feature.py +192 -0
- ads/feature_store/feature_group.py +1494 -0
- ads/feature_store/feature_group_expectation.py +346 -0
- ads/feature_store/feature_group_job.py +602 -0
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +180 -0
- ads/feature_store/feature_option_details.py +50 -0
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +99 -0
- ads/feature_store/feature_store.py +699 -0
- ads/feature_store/feature_store_registrar.py +518 -0
- ads/feature_store/input_feature_detail.py +149 -0
- ads/feature_store/mixin/__init__.py +4 -0
- ads/feature_store/mixin/oci_feature_store.py +145 -0
- ads/feature_store/model_details.py +73 -0
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +266 -0
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +298 -0
- ads/feature_store/query/join.py +161 -0
- ads/feature_store/query/query.py +403 -0
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +57 -0
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +68 -0
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +139 -0
- ads/feature_store/service/oci_dataset_job.py +199 -0
- ads/feature_store/service/oci_entity.py +125 -0
- ads/feature_store/service/oci_feature_group.py +164 -0
- ads/feature_store/service/oci_feature_group_job.py +214 -0
- ads/feature_store/service/oci_feature_store.py +182 -0
- ads/feature_store/service/oci_lineage.py +87 -0
- ads/feature_store/service/oci_transformation.py +104 -0
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +49 -0
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
- ads/feature_store/statistics/charts/box_plot.py +148 -0
- ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
- ads/feature_store/statistics/charts/probability_distribution.py +68 -0
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
- ads/feature_store/statistics/feature_stat.py +126 -0
- ads/feature_store/statistics/generic_feature_value.py +33 -0
- ads/feature_store/statistics/statistics.py +41 -0
- ads/feature_store/statistics_config.py +101 -0
- ads/feature_store/templates/feature_store_template.yaml +45 -0
- ads/feature_store/transformation.py +499 -0
- ads/feature_store/validation_output.py +57 -0
- ads/hpo/__init__.py +9 -0
- ads/hpo/_imports.py +91 -0
- ads/hpo/ads_search_space.py +439 -0
- ads/hpo/distributions.py +325 -0
- ads/hpo/objective.py +280 -0
- ads/hpo/search_cv.py +1657 -0
- ads/hpo/stopping_criterion.py +75 -0
- ads/hpo/tuner_artifact.py +413 -0
- ads/hpo/utils.py +91 -0
- ads/hpo/validation.py +140 -0
- ads/hpo/visualization/__init__.py +5 -0
- ads/hpo/visualization/_contour.py +23 -0
- ads/hpo/visualization/_edf.py +20 -0
- ads/hpo/visualization/_intermediate_values.py +21 -0
- ads/hpo/visualization/_optimization_history.py +25 -0
- ads/hpo/visualization/_parallel_coordinate.py +169 -0
- ads/hpo/visualization/_param_importances.py +26 -0
- ads/jobs/__init__.py +53 -0
- ads/jobs/ads_job.py +663 -0
- ads/jobs/builders/__init__.py +5 -0
- ads/jobs/builders/base.py +156 -0
- ads/jobs/builders/infrastructure/__init__.py +6 -0
- ads/jobs/builders/infrastructure/base.py +165 -0
- ads/jobs/builders/infrastructure/dataflow.py +1252 -0
- ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
- ads/jobs/builders/infrastructure/utils.py +65 -0
- ads/jobs/builders/runtimes/__init__.py +5 -0
- ads/jobs/builders/runtimes/artifact.py +338 -0
- ads/jobs/builders/runtimes/base.py +325 -0
- ads/jobs/builders/runtimes/container_runtime.py +242 -0
- ads/jobs/builders/runtimes/python_runtime.py +1016 -0
- ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
- ads/jobs/cli.py +104 -0
- ads/jobs/env_var_parser.py +131 -0
- ads/jobs/extension.py +160 -0
- ads/jobs/schema/__init__.py +5 -0
- ads/jobs/schema/infrastructure_schema.json +116 -0
- ads/jobs/schema/job_schema.json +42 -0
- ads/jobs/schema/runtime_schema.json +183 -0
- ads/jobs/schema/validator.py +141 -0
- ads/jobs/serializer.py +296 -0
- ads/jobs/templates/__init__.py +5 -0
- ads/jobs/templates/container.py +6 -0
- ads/jobs/templates/driver_notebook.py +177 -0
- ads/jobs/templates/driver_oci.py +500 -0
- ads/jobs/templates/driver_python.py +48 -0
- ads/jobs/templates/driver_pytorch.py +852 -0
- ads/jobs/templates/driver_utils.py +615 -0
- ads/jobs/templates/hostname_from_env.c +55 -0
- ads/jobs/templates/oci_metrics.py +181 -0
- ads/jobs/utils.py +104 -0
- ads/llm/__init__.py +28 -0
- ads/llm/autogen/__init__.py +2 -0
- ads/llm/autogen/constants.py +15 -0
- ads/llm/autogen/reports/__init__.py +2 -0
- ads/llm/autogen/reports/base.py +67 -0
- ads/llm/autogen/reports/data.py +103 -0
- ads/llm/autogen/reports/session.py +526 -0
- ads/llm/autogen/reports/templates/chat_box.html +13 -0
- ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
- ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
- ads/llm/autogen/reports/utils.py +56 -0
- ads/llm/autogen/v02/__init__.py +4 -0
- ads/llm/autogen/v02/client.py +295 -0
- ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
- ads/llm/autogen/v02/loggers/__init__.py +6 -0
- ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
- ads/llm/autogen/v02/loggers/session_logger.py +580 -0
- ads/llm/autogen/v02/loggers/utils.py +86 -0
- ads/llm/autogen/v02/runtime_logging.py +163 -0
- ads/llm/chain.py +268 -0
- ads/llm/chat_template.py +31 -0
- ads/llm/deploy.py +63 -0
- ads/llm/guardrails/__init__.py +5 -0
- ads/llm/guardrails/base.py +442 -0
- ads/llm/guardrails/huggingface.py +44 -0
- ads/llm/langchain/__init__.py +5 -0
- ads/llm/langchain/plugins/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
- ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
- ads/llm/langchain/plugins/llms/__init__.py +5 -0
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
- ads/llm/requirements.txt +3 -0
- ads/llm/serialize.py +219 -0
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +153 -0
- ads/llm/serializers/runnable_parallel.py +27 -0
- ads/llm/templates/score_chain.jinja2 +155 -0
- ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
- ads/model/__init__.py +52 -0
- ads/model/artifact.py +573 -0
- ads/model/artifact_downloader.py +254 -0
- ads/model/artifact_uploader.py +267 -0
- ads/model/base_properties.py +238 -0
- ads/model/common/.model-ignore +66 -0
- ads/model/common/__init__.py +5 -0
- ads/model/common/utils.py +142 -0
- ads/model/datascience_model.py +2635 -0
- ads/model/deployment/__init__.py +20 -0
- ads/model/deployment/common/__init__.py +5 -0
- ads/model/deployment/common/utils.py +308 -0
- ads/model/deployment/model_deployer.py +466 -0
- ads/model/deployment/model_deployment.py +1846 -0
- ads/model/deployment/model_deployment_infrastructure.py +671 -0
- ads/model/deployment/model_deployment_properties.py +493 -0
- ads/model/deployment/model_deployment_runtime.py +838 -0
- ads/model/extractor/__init__.py +5 -0
- ads/model/extractor/automl_extractor.py +74 -0
- ads/model/extractor/embedding_onnx_extractor.py +80 -0
- ads/model/extractor/huggingface_extractor.py +88 -0
- ads/model/extractor/keras_extractor.py +84 -0
- ads/model/extractor/lightgbm_extractor.py +93 -0
- ads/model/extractor/model_info_extractor.py +114 -0
- ads/model/extractor/model_info_extractor_factory.py +105 -0
- ads/model/extractor/pytorch_extractor.py +87 -0
- ads/model/extractor/sklearn_extractor.py +112 -0
- ads/model/extractor/spark_extractor.py +89 -0
- ads/model/extractor/tensorflow_extractor.py +85 -0
- ads/model/extractor/xgboost_extractor.py +94 -0
- ads/model/framework/__init__.py +5 -0
- ads/model/framework/automl_model.py +178 -0
- ads/model/framework/embedding_onnx_model.py +438 -0
- ads/model/framework/huggingface_model.py +399 -0
- ads/model/framework/lightgbm_model.py +266 -0
- ads/model/framework/pytorch_model.py +266 -0
- ads/model/framework/sklearn_model.py +250 -0
- ads/model/framework/spark_model.py +326 -0
- ads/model/framework/tensorflow_model.py +254 -0
- ads/model/framework/xgboost_model.py +258 -0
- ads/model/generic_model.py +3518 -0
- ads/model/model_artifact_boilerplate/README.md +381 -0
- ads/model/model_artifact_boilerplate/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
- ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
- ads/model/model_artifact_boilerplate/score.py +61 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_introspect.py +331 -0
- ads/model/model_metadata.py +1810 -0
- ads/model/model_metadata_mixin.py +460 -0
- ads/model/model_properties.py +63 -0
- ads/model/model_version_set.py +739 -0
- ads/model/runtime/__init__.py +5 -0
- ads/model/runtime/env_info.py +306 -0
- ads/model/runtime/model_deployment_details.py +37 -0
- ads/model/runtime/model_provenance_details.py +58 -0
- ads/model/runtime/runtime_info.py +81 -0
- ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
- ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
- ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
- ads/model/runtime/utils.py +201 -0
- ads/model/serde/__init__.py +5 -0
- ads/model/serde/common.py +40 -0
- ads/model/serde/model_input.py +547 -0
- ads/model/serde/model_serializer.py +1184 -0
- ads/model/service/__init__.py +5 -0
- ads/model/service/oci_datascience_model.py +1076 -0
- ads/model/service/oci_datascience_model_deployment.py +500 -0
- ads/model/service/oci_datascience_model_version_set.py +176 -0
- ads/model/transformer/__init__.py +5 -0
- ads/model/transformer/onnx_transformer.py +324 -0
- ads/mysqldb/__init__.py +5 -0
- ads/mysqldb/mysql_db.py +227 -0
- ads/opctl/__init__.py +18 -0
- ads/opctl/anomaly_detection.py +11 -0
- ads/opctl/backend/__init__.py +5 -0
- ads/opctl/backend/ads_dataflow.py +353 -0
- ads/opctl/backend/ads_ml_job.py +710 -0
- ads/opctl/backend/ads_ml_pipeline.py +164 -0
- ads/opctl/backend/ads_model_deployment.py +209 -0
- ads/opctl/backend/base.py +146 -0
- ads/opctl/backend/local.py +1053 -0
- ads/opctl/backend/marketplace/__init__.py +9 -0
- ads/opctl/backend/marketplace/helm_helper.py +173 -0
- ads/opctl/backend/marketplace/local_marketplace.py +271 -0
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
- ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
- ads/opctl/backend/marketplace/models/__init__.py +5 -0
- ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
- ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
- ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
- ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
- ads/opctl/cli.py +707 -0
- ads/opctl/cmds.py +869 -0
- ads/opctl/conda/__init__.py +5 -0
- ads/opctl/conda/cli.py +193 -0
- ads/opctl/conda/cmds.py +749 -0
- ads/opctl/conda/config.yaml +34 -0
- ads/opctl/conda/manifest_template.yaml +13 -0
- ads/opctl/conda/multipart_uploader.py +188 -0
- ads/opctl/conda/pack.py +89 -0
- ads/opctl/config/__init__.py +5 -0
- ads/opctl/config/base.py +57 -0
- ads/opctl/config/diagnostics/__init__.py +5 -0
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
- ads/opctl/config/merger.py +255 -0
- ads/opctl/config/resolver.py +297 -0
- ads/opctl/config/utils.py +79 -0
- ads/opctl/config/validator.py +17 -0
- ads/opctl/config/versioner.py +68 -0
- ads/opctl/config/yaml_parsers/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/base.py +58 -0
- ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
- ads/opctl/constants.py +66 -0
- ads/opctl/decorator/__init__.py +5 -0
- ads/opctl/decorator/common.py +129 -0
- ads/opctl/diagnostics/__init__.py +5 -0
- ads/opctl/diagnostics/__main__.py +25 -0
- ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
- ads/opctl/diagnostics/check_requirements.py +144 -0
- ads/opctl/diagnostics/requirement_exception.py +9 -0
- ads/opctl/distributed/README.md +109 -0
- ads/opctl/distributed/__init__.py +5 -0
- ads/opctl/distributed/certificates.py +32 -0
- ads/opctl/distributed/cli.py +207 -0
- ads/opctl/distributed/cmds.py +731 -0
- ads/opctl/distributed/common/__init__.py +5 -0
- ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
- ads/opctl/distributed/common/cluster_config_helper.py +103 -0
- ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
- ads/opctl/distributed/common/cluster_runner.py +54 -0
- ads/opctl/distributed/common/framework_factory.py +29 -0
- ads/opctl/docker/Dockerfile.job +103 -0
- ads/opctl/docker/Dockerfile.job.arm +107 -0
- ads/opctl/docker/Dockerfile.job.gpu +175 -0
- ads/opctl/docker/base-env.yaml +13 -0
- ads/opctl/docker/cuda.repo +6 -0
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +41 -0
- ads/opctl/docker/operator/Dockerfile.gpu +85 -0
- ads/opctl/docker/operator/cuda.repo +6 -0
- ads/opctl/docker/operator/environment.yaml +8 -0
- ads/opctl/forecast.py +11 -0
- ads/opctl/index.yaml +3 -0
- ads/opctl/model/__init__.py +5 -0
- ads/opctl/model/cli.py +65 -0
- ads/opctl/model/cmds.py +73 -0
- ads/opctl/operator/README.md +4 -0
- ads/opctl/operator/__init__.py +31 -0
- ads/opctl/operator/cli.py +344 -0
- ads/opctl/operator/cmd.py +596 -0
- ads/opctl/operator/common/__init__.py +5 -0
- ads/opctl/operator/common/backend_factory.py +460 -0
- ads/opctl/operator/common/const.py +27 -0
- ads/opctl/operator/common/data/synthetic.csv +16001 -0
- ads/opctl/operator/common/dictionary_merger.py +148 -0
- ads/opctl/operator/common/errors.py +42 -0
- ads/opctl/operator/common/operator_config.py +99 -0
- ads/opctl/operator/common/operator_loader.py +811 -0
- ads/opctl/operator/common/operator_schema.yaml +130 -0
- ads/opctl/operator/common/operator_yaml_generator.py +152 -0
- ads/opctl/operator/common/utils.py +208 -0
- ads/opctl/operator/lowcode/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
- ads/opctl/operator/lowcode/anomaly/README.md +207 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +167 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +116 -0
- ads/opctl/operator/lowcode/common/errors.py +47 -0
- ads/opctl/operator/lowcode/common/transformations.py +296 -0
- ads/opctl/operator/lowcode/common/utils.py +384 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
- ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
- ads/opctl/operator/lowcode/forecast/README.md +209 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
- ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
- ads/opctl/operator/lowcode/forecast/const.py +92 -0
- ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
- ads/opctl/operator/lowcode/forecast/errors.py +26 -0
- ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
- ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
- ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
- ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
- ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
- ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
- ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
- ads/opctl/operator/lowcode/forecast/utils.py +397 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
- ads/opctl/operator/lowcode/pii/MLoperator +17 -0
- ads/opctl/operator/lowcode/pii/README.md +208 -0
- ads/opctl/operator/lowcode/pii/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/__main__.py +78 -0
- ads/opctl/operator/lowcode/pii/cmd.py +39 -0
- ads/opctl/operator/lowcode/pii/constant.py +84 -0
- ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
- ads/opctl/operator/lowcode/pii/errors.py +27 -0
- ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
- ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
- ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
- ads/opctl/operator/lowcode/pii/model/report.py +487 -0
- ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
- ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
- ads/opctl/operator/lowcode/pii/utils.py +43 -0
- ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
- ads/opctl/operator/lowcode/recommender/README.md +206 -0
- ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
- ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
- ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
- ads/opctl/operator/lowcode/recommender/constant.py +30 -0
- ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
- ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
- ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
- ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
- ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
- ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
- ads/opctl/operator/lowcode/recommender/utils.py +13 -0
- ads/opctl/operator/runtime/__init__.py +5 -0
- ads/opctl/operator/runtime/const.py +17 -0
- ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
- ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/runtime.py +115 -0
- ads/opctl/schema.yaml.yml +36 -0
- ads/opctl/script.py +40 -0
- ads/opctl/spark/__init__.py +5 -0
- ads/opctl/spark/cli.py +43 -0
- ads/opctl/spark/cmds.py +147 -0
- ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
- ads/opctl/utils.py +344 -0
- ads/oracledb/__init__.py +5 -0
- ads/oracledb/oracle_db.py +346 -0
- ads/pipeline/__init__.py +39 -0
- ads/pipeline/ads_pipeline.py +2279 -0
- ads/pipeline/ads_pipeline_run.py +772 -0
- ads/pipeline/ads_pipeline_step.py +605 -0
- ads/pipeline/builders/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/custom_script.py +32 -0
- ads/pipeline/cli.py +119 -0
- ads/pipeline/extension.py +291 -0
- ads/pipeline/schema/__init__.py +5 -0
- ads/pipeline/schema/cs_step_schema.json +35 -0
- ads/pipeline/schema/ml_step_schema.json +31 -0
- ads/pipeline/schema/pipeline_schema.json +71 -0
- ads/pipeline/visualizer/__init__.py +5 -0
- ads/pipeline/visualizer/base.py +570 -0
- ads/pipeline/visualizer/graph_renderer.py +272 -0
- ads/pipeline/visualizer/text_renderer.py +84 -0
- ads/secrets/__init__.py +11 -0
- ads/secrets/adb.py +386 -0
- ads/secrets/auth_token.py +86 -0
- ads/secrets/big_data_service.py +365 -0
- ads/secrets/mysqldb.py +149 -0
- ads/secrets/oracledb.py +160 -0
- ads/secrets/secrets.py +407 -0
- ads/telemetry/__init__.py +7 -0
- ads/telemetry/base.py +69 -0
- ads/telemetry/client.py +122 -0
- ads/telemetry/telemetry.py +257 -0
- ads/templates/dataflow_pyspark.jinja2 +13 -0
- ads/templates/dataflow_sparksql.jinja2 +22 -0
- ads/templates/func.jinja2 +20 -0
- ads/templates/schemas/openapi.json +1740 -0
- ads/templates/score-pkl.jinja2 +173 -0
- ads/templates/score.jinja2 +322 -0
- ads/templates/score_embedding_onnx.jinja2 +202 -0
- ads/templates/score_generic.jinja2 +165 -0
- ads/templates/score_huggingface_pipeline.jinja2 +217 -0
- ads/templates/score_lightgbm.jinja2 +185 -0
- ads/templates/score_onnx.jinja2 +407 -0
- ads/templates/score_onnx_new.jinja2 +473 -0
- ads/templates/score_oracle_automl.jinja2 +185 -0
- ads/templates/score_pyspark.jinja2 +154 -0
- ads/templates/score_pytorch.jinja2 +219 -0
- ads/templates/score_scikit-learn.jinja2 +184 -0
- ads/templates/score_tensorflow.jinja2 +184 -0
- ads/templates/score_xgboost.jinja2 +178 -0
- ads/text_dataset/__init__.py +5 -0
- ads/text_dataset/backends.py +211 -0
- ads/text_dataset/dataset.py +445 -0
- ads/text_dataset/extractor.py +207 -0
- ads/text_dataset/options.py +53 -0
- ads/text_dataset/udfs.py +22 -0
- ads/text_dataset/utils.py +49 -0
- ads/type_discovery/__init__.py +9 -0
- ads/type_discovery/abstract_detector.py +21 -0
- ads/type_discovery/constant_detector.py +41 -0
- ads/type_discovery/continuous_detector.py +54 -0
- ads/type_discovery/credit_card_detector.py +99 -0
- ads/type_discovery/datetime_detector.py +92 -0
- ads/type_discovery/discrete_detector.py +118 -0
- ads/type_discovery/document_detector.py +146 -0
- ads/type_discovery/ip_detector.py +68 -0
- ads/type_discovery/latlon_detector.py +90 -0
- ads/type_discovery/phone_number_detector.py +63 -0
- ads/type_discovery/type_discovery_driver.py +87 -0
- ads/type_discovery/typed_feature.py +594 -0
- ads/type_discovery/unknown_detector.py +41 -0
- ads/type_discovery/zipcode_detector.py +48 -0
- ads/vault/__init__.py +7 -0
- ads/vault/vault.py +237 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/METADATA +150 -149
- oracle_ads-2.13.10.dist-info/RECORD +858 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/WHEEL +1 -2
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/entry_points.txt +2 -1
- oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,535 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
"""
|
8
|
+
The ADS accessor for the Pandas DataFrame.
|
9
|
+
The accessor will be initialized with the pandas object the user is interacting with.
|
10
|
+
|
11
|
+
Examples
|
12
|
+
--------
|
13
|
+
>>> from ads.feature_engineering.accessor.dataframe_accessor import ADSDataFrameAccessor
|
14
|
+
>>> from ads.feature_engineering.feature_type.continuous import Continuous
|
15
|
+
>>> from ads.feature_engineering.feature_type.creditcard import CreditCard
|
16
|
+
>>> from ads.feature_engineering.feature_type.string import String
|
17
|
+
>>> from ads.feature_engineering.feature_type.base import Tag
|
18
|
+
>>> df = pd.DataFrame({'Name': ['Alex'], 'CreditCard': ["4532640527811543"]})
|
19
|
+
>>> df.ads.feature_type
|
20
|
+
{'Name': ['string'], 'Credit Card': ['string']}
|
21
|
+
>>> df.ads.feature_type_description
|
22
|
+
Column Feature Type Description
|
23
|
+
------------------------------------------------------------------
|
24
|
+
0 Name string Type representing string values.
|
25
|
+
1 Credit Card string Type representing string values.
|
26
|
+
>>> df.ads.default_type
|
27
|
+
{'Name': 'string', 'Credit Card': 'string'}
|
28
|
+
>>> df.ads.feature_type = {'Name':['string', Tag('abc')]}
|
29
|
+
>>> df.ads.tags
|
30
|
+
{'Name': ['abc']}
|
31
|
+
>>> df.ads.feature_type = {'Credit Card':['credit_card']}
|
32
|
+
>>> df.ads.feature_select(include=['credit_card'])
|
33
|
+
Credit Card
|
34
|
+
-------------------------------
|
35
|
+
0 4532640527811543
|
36
|
+
"""
|
37
|
+
|
38
|
+
from typing import Any, Dict, List, Union
|
39
|
+
|
40
|
+
import numpy as np
|
41
|
+
import pandas as pd
|
42
|
+
from ads.common.utils import DATA_SCHEMA_MAX_COL_NUM
|
43
|
+
from ads.data_labeling.mixin.data_labeling import DataLabelingAccessMixin
|
44
|
+
from ads.dataset.mixin.dataset_accessor import ADSDatasetAccessMixin
|
45
|
+
from ads.dbmixin.db_pandas_accessor import DBAccessMixin
|
46
|
+
from ads.feature_engineering import schema
|
47
|
+
from ads.feature_engineering.accessor.mixin.eda_mixin import EDAMixin
|
48
|
+
from ads.feature_engineering.accessor.mixin.feature_types_mixin import (
|
49
|
+
ADSFeatureTypesMixin,
|
50
|
+
)
|
51
|
+
from ads.feature_engineering.feature_type.base import FeatureType
|
52
|
+
from pandas.core.dtypes.common import is_list_like
|
53
|
+
|
54
|
+
|
55
|
+
@pd.api.extensions.register_dataframe_accessor("ads")
|
56
|
+
class ADSDataFrameAccessor(
|
57
|
+
ADSFeatureTypesMixin,
|
58
|
+
EDAMixin,
|
59
|
+
DBAccessMixin,
|
60
|
+
DataLabelingAccessMixin,
|
61
|
+
ADSDatasetAccessMixin
|
62
|
+
):
|
63
|
+
"""ADS accessor for the Pandas DataFrame.
|
64
|
+
|
65
|
+
Attributes
|
66
|
+
----------
|
67
|
+
columns: List[str]
|
68
|
+
The column labels of the DataFrame.
|
69
|
+
|
70
|
+
tags(self) -> Dict[str, str]
|
71
|
+
Gets the dictionary of user defined tags for the dataframe.
|
72
|
+
default_type(self) -> Dict[str, str]
|
73
|
+
Gets the map of columns and associated default feature type names.
|
74
|
+
feature_type(self) -> Dict[str, List[str]]
|
75
|
+
Gets the list of registered feature types.
|
76
|
+
feature_type_description(self) -> pd.DataFrame
|
77
|
+
Gets the list of registered feature types in a DataFrame format.
|
78
|
+
|
79
|
+
Methods
|
80
|
+
-------
|
81
|
+
sync(self, src: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame
|
82
|
+
Syncs feature types of current DataFrame with that from src.
|
83
|
+
feature_select(self, include: List[Union[FeatureType, str]] = None, exclude: List[Union[FeatureType, str]] = None) -> pd.DataFrame
|
84
|
+
Gets the list of registered feature types in a DataFrame format.
|
85
|
+
help(self, prop: str = None) -> None
|
86
|
+
Provids docstring for affordable methods and properties.
|
87
|
+
|
88
|
+
Examples
|
89
|
+
--------
|
90
|
+
>>> from ads.feature_engineering.accessor.dataframe_accessor import ADSDataFrameAccessor
|
91
|
+
>>> from ads.feature_engineering.feature_type.continuous import Continuous
|
92
|
+
>>> from ads.feature_engineering.feature_type.creditcard import CreditCard
|
93
|
+
>>> from ads.feature_engineering.feature_type.string import String
|
94
|
+
>>> from ads.feature_engineering.feature_type.base import Tag
|
95
|
+
df = pd.DataFrame({'Name': ['Alex'], 'CreditCard': ["4532640527811543"]})
|
96
|
+
>>> df.ads.feature_type
|
97
|
+
{'Name': ['string'], 'Credit Card': ['string']}
|
98
|
+
>>> df.ads.feature_type_description
|
99
|
+
Column Feature Type Description
|
100
|
+
-------------------------------------------------------------------
|
101
|
+
0 Name string Type representing string values.
|
102
|
+
1 Credit Card string Type representing string values.
|
103
|
+
>>> df.ads.default_type
|
104
|
+
{'Name': 'string', 'Credit Card': 'string'}
|
105
|
+
>>> df.ads.feature_type = {'Name':['string', Tag('abc')]}
|
106
|
+
>>> df.ads.tags
|
107
|
+
{'Name': ['abc']}
|
108
|
+
>>> df.ads.feature_type = {'Credit Card':['credit_card']}
|
109
|
+
>>> df.ads.feature_select(include=['credit_card'])
|
110
|
+
Credit Card
|
111
|
+
------------------------------
|
112
|
+
0 4532640527811543
|
113
|
+
"""
|
114
|
+
|
115
|
+
def __init__(self, pandas_obj) -> None:
|
116
|
+
"""Initializes ADS Pandas DataFrame Accessor.
|
117
|
+
|
118
|
+
Parameters
|
119
|
+
----------
|
120
|
+
pandas_obj : pandas.DataFrame
|
121
|
+
Pandas dataframe
|
122
|
+
|
123
|
+
Raises
|
124
|
+
------
|
125
|
+
ValueError
|
126
|
+
If provided DataFrame has duplicate columns.
|
127
|
+
"""
|
128
|
+
if len(set(pandas_obj.columns)) != len(pandas_obj.columns):
|
129
|
+
raise ValueError(
|
130
|
+
"Failed to initialize a DataFrame accessor. " "Duplicate column found."
|
131
|
+
)
|
132
|
+
self._obj = pandas_obj
|
133
|
+
super().__init__()
|
134
|
+
self.columns = self._obj.columns
|
135
|
+
self._info = None
|
136
|
+
|
137
|
+
def info(self) -> Any:
|
138
|
+
"""Gets information about the dataframe.
|
139
|
+
|
140
|
+
Returns
|
141
|
+
-------
|
142
|
+
Any
|
143
|
+
The information about the dataframe.
|
144
|
+
"""
|
145
|
+
return self._info
|
146
|
+
|
147
|
+
@property
|
148
|
+
def _feature_type(self) -> Dict[str, List[FeatureType]]:
|
149
|
+
"""Gets the map of columns and associated feature types.
|
150
|
+
Key is column name and value is list of feature types.
|
151
|
+
"""
|
152
|
+
return {
|
153
|
+
self._obj[col].name: self._obj[col].ads._feature_type for col in self._obj
|
154
|
+
}
|
155
|
+
|
156
|
+
@property
|
157
|
+
def _default_type(self) -> Dict[str, FeatureType]:
|
158
|
+
"""Gets the map of columns and associated default feature types.
|
159
|
+
Key is column name and value is a default feature type.
|
160
|
+
"""
|
161
|
+
return {
|
162
|
+
self._obj[col].name: self._obj[col].ads._default_type for col in self._obj
|
163
|
+
}
|
164
|
+
|
165
|
+
@property
|
166
|
+
def tags(self) -> Dict[str, List[str]]:
|
167
|
+
"""Gets the dictionary of user defined tags for the dataframe. Key is column name
|
168
|
+
and value is list of tag names.
|
169
|
+
|
170
|
+
Returns
|
171
|
+
-------
|
172
|
+
Dict[str, List[str]]
|
173
|
+
The map of columns and associated default tags.
|
174
|
+
"""
|
175
|
+
return {self._obj[col].name: self._obj[col].ads.tags for col in self._obj}
|
176
|
+
|
177
|
+
@property
|
178
|
+
def default_type(self) -> Dict[str, str]:
|
179
|
+
"""Gets the map of columns and associated default feature type names.
|
180
|
+
|
181
|
+
Returns
|
182
|
+
-------
|
183
|
+
Dict[str, str]
|
184
|
+
The dictionary where key is column name and value is the name of default feature
|
185
|
+
type.
|
186
|
+
"""
|
187
|
+
return {k: v.name for k, v in self._default_type.items()}
|
188
|
+
|
189
|
+
@property
|
190
|
+
def feature_type(self) -> Dict[str, List[str]]:
|
191
|
+
"""Gets the list of registered feature types.
|
192
|
+
|
193
|
+
Returns
|
194
|
+
-------
|
195
|
+
Dict[str, List[str]]
|
196
|
+
The dictionary where key is column name and value is list of associated feature type
|
197
|
+
names.
|
198
|
+
"""
|
199
|
+
return {col.name: col.ads.feature_type for _, col in self._obj.items()}
|
200
|
+
|
201
|
+
@property
|
202
|
+
def feature_type_description(self) -> pd.DataFrame:
|
203
|
+
"""Gets the list of registered feature types in a DataFrame format.
|
204
|
+
|
205
|
+
Returns
|
206
|
+
-------
|
207
|
+
:class:`pandas.DataFrame`
|
208
|
+
|
209
|
+
Examples
|
210
|
+
________
|
211
|
+
>>> df.ads.feature_type_description()
|
212
|
+
Column Feature Type Description
|
213
|
+
-------------------------------------------------------------------
|
214
|
+
0 City string Type representing string values.
|
215
|
+
1 Phone Number string Type representing string values.
|
216
|
+
"""
|
217
|
+
result_df = pd.DataFrame([], columns=["Column", "Feature Type", "Description"])
|
218
|
+
for col in self._obj:
|
219
|
+
series_feature_type_df = self._obj[col].ads.feature_type_description
|
220
|
+
series_feature_type_df.insert(0, "Column", col)
|
221
|
+
result_df = pd.concat([result_df, series_feature_type_df])
|
222
|
+
result_df.reset_index(drop=True, inplace=True)
|
223
|
+
return result_df
|
224
|
+
|
225
|
+
@feature_type.setter
|
226
|
+
def feature_type(
|
227
|
+
self, feature_type_map: Dict[str, List[Union[FeatureType, str]]]
|
228
|
+
) -> None:
|
229
|
+
"""Sets feature types for the DataFrame.
|
230
|
+
|
231
|
+
Parameters
|
232
|
+
----------
|
233
|
+
feature_type_map : Dict[str, List[Union[FeatureType, str]]]
|
234
|
+
The map of feature types where key is column name and value is list of feature
|
235
|
+
types.
|
236
|
+
|
237
|
+
Returns
|
238
|
+
-------
|
239
|
+
None
|
240
|
+
Nothing.
|
241
|
+
"""
|
242
|
+
for col, feature_types in feature_type_map.items():
|
243
|
+
self._obj[col].ads.feature_type = feature_types
|
244
|
+
|
245
|
+
def sync(self, src: Union[pd.DataFrame, pd.Series]) -> pd.DataFrame:
|
246
|
+
"""Syncs feature types of current DataFrame with that from src.
|
247
|
+
|
248
|
+
Syncs feature types of current dataframe with that from src, where src
|
249
|
+
can be a dataframe or a series. In either case, only columns with
|
250
|
+
matched names are synced.
|
251
|
+
|
252
|
+
Parameters
|
253
|
+
----------
|
254
|
+
src: `pd.DataFrame` | `pd.Series`
|
255
|
+
The source to sync from.
|
256
|
+
|
257
|
+
Returns
|
258
|
+
-------
|
259
|
+
:class:`pandas.DataFrame`
|
260
|
+
Synced dataframe.
|
261
|
+
"""
|
262
|
+
for _, col in self._obj.items():
|
263
|
+
col.ads.sync(src)
|
264
|
+
|
265
|
+
def _extract_columns_of_target_types(
|
266
|
+
self, target_types: List[Union[FeatureType, str]]
|
267
|
+
) -> List:
|
268
|
+
"""Returns all the column names that are of the target types from the
|
269
|
+
feature_type dictionary.
|
270
|
+
|
271
|
+
Parameters
|
272
|
+
----------
|
273
|
+
target_types: list
|
274
|
+
A list of target feature types, can be either feature type names of
|
275
|
+
feature type class.
|
276
|
+
|
277
|
+
Returns:
|
278
|
+
-------
|
279
|
+
List[str]
|
280
|
+
The list of columns names.
|
281
|
+
"""
|
282
|
+
columns = []
|
283
|
+
target_types = (
|
284
|
+
np.unique(
|
285
|
+
[self._get_type(feature_type).name for feature_type in target_types]
|
286
|
+
)
|
287
|
+
if target_types is not None
|
288
|
+
else None
|
289
|
+
)
|
290
|
+
for target_type in target_types:
|
291
|
+
for name, feature_types in self.feature_type.items():
|
292
|
+
if target_type in feature_types:
|
293
|
+
columns.append(name)
|
294
|
+
return columns
|
295
|
+
|
296
|
+
def feature_select(
|
297
|
+
self,
|
298
|
+
include: List[Union[FeatureType, str]] = None,
|
299
|
+
exclude: List[Union[FeatureType, str]] = None,
|
300
|
+
) -> pd.DataFrame:
|
301
|
+
"""Returns a subset of the DataFrame’s columns based on the column feature_types.
|
302
|
+
|
303
|
+
Parameters
|
304
|
+
----------
|
305
|
+
include: List[Union[FeatureType, str]], optional
|
306
|
+
Defaults to None. A list of FeatureType subclass or str to be included.
|
307
|
+
exclude: List[Union[FeatureType, str]], optional
|
308
|
+
Defaults to None. A list of FeatureType subclass or str to be excluded.
|
309
|
+
|
310
|
+
Raises
|
311
|
+
------
|
312
|
+
ValueError
|
313
|
+
If both of include and exclude are empty
|
314
|
+
ValueError
|
315
|
+
If include and exclude are used simultaneously
|
316
|
+
|
317
|
+
Returns
|
318
|
+
-------
|
319
|
+
:class:`pandas.DataFrame`
|
320
|
+
The subset of the frame including the feature types in include and excluding
|
321
|
+
the feature types in exclude.
|
322
|
+
"""
|
323
|
+
if not (include or exclude):
|
324
|
+
raise ValueError("at least one of include or exclude must be nonempty")
|
325
|
+
|
326
|
+
if not is_list_like(include):
|
327
|
+
include = (include,) if include is not None else ()
|
328
|
+
if not is_list_like(exclude):
|
329
|
+
exclude = (exclude,) if exclude is not None else ()
|
330
|
+
|
331
|
+
# unify the feature types to str representation
|
332
|
+
include = (
|
333
|
+
np.unique([self._get_type(feature_type).name for feature_type in include])
|
334
|
+
if include is not None
|
335
|
+
else None
|
336
|
+
)
|
337
|
+
exclude = (
|
338
|
+
np.unique([self._get_type(feature_type).name for feature_type in exclude])
|
339
|
+
if exclude is not None
|
340
|
+
else None
|
341
|
+
)
|
342
|
+
|
343
|
+
# convert the myriad valid dtypes object to a single representation
|
344
|
+
include = frozenset(include)
|
345
|
+
exclude = frozenset(exclude)
|
346
|
+
|
347
|
+
# can't both include AND exclude!
|
348
|
+
if not include.isdisjoint(exclude):
|
349
|
+
raise ValueError(f"include and exclude overlap on {(include & exclude)}")
|
350
|
+
|
351
|
+
# We raise when both include and exclude are empty
|
352
|
+
# Hence, we can just shrink the columns we want to keep
|
353
|
+
keep_these = np.full(self._obj.shape[1], True)
|
354
|
+
|
355
|
+
columns = self._obj.columns
|
356
|
+
|
357
|
+
if include:
|
358
|
+
included_columns = self._extract_columns_of_target_types(include)
|
359
|
+
keep_these &= columns.isin(included_columns)
|
360
|
+
|
361
|
+
if exclude:
|
362
|
+
excluded_columns = self._extract_columns_of_target_types(exclude)
|
363
|
+
keep_these &= ~columns.isin(excluded_columns)
|
364
|
+
|
365
|
+
return self._obj.loc[:, keep_these]
|
366
|
+
|
367
|
+
def _add_feature_type(
|
368
|
+
self, col: str, feature_type: Union[FeatureType, str]
|
369
|
+
) -> bool:
|
370
|
+
"""Adds a feature type
|
371
|
+
|
372
|
+
Parameters
|
373
|
+
----------
|
374
|
+
col : str
|
375
|
+
The column name.
|
376
|
+
feature_type : Union[FeatureType, str]
|
377
|
+
The feature type to add.
|
378
|
+
|
379
|
+
Returns
|
380
|
+
-------
|
381
|
+
bool
|
382
|
+
Whether add succeeded.
|
383
|
+
"""
|
384
|
+
if col not in self._obj.columns:
|
385
|
+
raise ValueError(f"Column {col} is not found.")
|
386
|
+
return self._obj[col].ads._add_feature_type(feature_type)
|
387
|
+
|
388
|
+
def _remove_feature_type(
|
389
|
+
self, col: str, feature_type: Union[FeatureType, str]
|
390
|
+
) -> None:
|
391
|
+
"""Removes a feature type
|
392
|
+
|
393
|
+
Parameters
|
394
|
+
----------
|
395
|
+
col : str
|
396
|
+
column name
|
397
|
+
feature_type : Union[FeatureType, str]
|
398
|
+
feature type
|
399
|
+
|
400
|
+
Returns
|
401
|
+
-------
|
402
|
+
None
|
403
|
+
Nothing
|
404
|
+
"""
|
405
|
+
if col not in self._obj.columns:
|
406
|
+
raise ValueError(f"Column {col} is not found.")
|
407
|
+
self._obj[col].ads._remove_feature_type(feature_type)
|
408
|
+
|
409
|
+
def model_schema(self, max_col_num: int = DATA_SCHEMA_MAX_COL_NUM):
|
410
|
+
"""
|
411
|
+
Generates schema from the dataframe.
|
412
|
+
|
413
|
+
Parameters
|
414
|
+
----------
|
415
|
+
max_col_num : int, optional. Defaults to 1000
|
416
|
+
The maximum column size of the data that allows to auto generate schema.
|
417
|
+
|
418
|
+
Examples
|
419
|
+
--------
|
420
|
+
>>> df = pd.read_csv('./orcl_attrition.csv', usecols=['Age', 'Attrition'])
|
421
|
+
>>> schema = df.ads.model_schema()
|
422
|
+
>>> schema
|
423
|
+
Schema:
|
424
|
+
- description: Attrition
|
425
|
+
domain:
|
426
|
+
constraints: []
|
427
|
+
stats:
|
428
|
+
count: 1470
|
429
|
+
unique: 2
|
430
|
+
values: String
|
431
|
+
dtype: object
|
432
|
+
feature_type: String
|
433
|
+
name: Attrition
|
434
|
+
required: true
|
435
|
+
- description: Age
|
436
|
+
domain:
|
437
|
+
constraints: []
|
438
|
+
stats:
|
439
|
+
25%: 31.0
|
440
|
+
50%: 37.0
|
441
|
+
75%: 44.0
|
442
|
+
count: 1470.0
|
443
|
+
max: 61.0
|
444
|
+
mean: 37.923809523809524
|
445
|
+
min: 19.0
|
446
|
+
std: 9.135373489136732
|
447
|
+
values: Integer
|
448
|
+
dtype: int64
|
449
|
+
feature_type: Integer
|
450
|
+
name: Age
|
451
|
+
required: true
|
452
|
+
>>> schema.to_dict()
|
453
|
+
{'Schema': [{'dtype': 'object',
|
454
|
+
'feature_type': 'String',
|
455
|
+
'name': 'Attrition',
|
456
|
+
'domain': {'values': 'String',
|
457
|
+
'stats': {'count': 1470, 'unique': 2},
|
458
|
+
'constraints': []},
|
459
|
+
'required': True,
|
460
|
+
'description': 'Attrition'},
|
461
|
+
{'dtype': 'int64',
|
462
|
+
'feature_type': 'Integer',
|
463
|
+
'name': 'Age',
|
464
|
+
'domain': {'values': 'Integer',
|
465
|
+
'stats': {'count': 1470.0,
|
466
|
+
'mean': 37.923809523809524,
|
467
|
+
'std': 9.135373489136732,
|
468
|
+
'min': 19.0,
|
469
|
+
'25%': 31.0,
|
470
|
+
'50%': 37.0,
|
471
|
+
'75%': 44.0,
|
472
|
+
'max': 61.0},
|
473
|
+
'constraints': []},
|
474
|
+
'required': True,
|
475
|
+
'description': 'Age'}]}
|
476
|
+
|
477
|
+
Returns
|
478
|
+
-------
|
479
|
+
ads.feature_engineering.schema.Schema
|
480
|
+
data schema.
|
481
|
+
|
482
|
+
Raises
|
483
|
+
------
|
484
|
+
ads.feature_engineering.schema.DataSizeTooWide
|
485
|
+
If the number of columns of input data exceeds `max_col_num`.
|
486
|
+
"""
|
487
|
+
if max_col_num and len(self._obj.columns) > max_col_num:
|
488
|
+
raise schema.DataSizeTooWide(
|
489
|
+
data_col_num=len(self._obj.columns), max_col_num=max_col_num
|
490
|
+
)
|
491
|
+
|
492
|
+
sc = schema.Schema()
|
493
|
+
for i, col in enumerate(self._obj.columns):
|
494
|
+
domain = schema.Domain()
|
495
|
+
try:
|
496
|
+
domain = self._obj[col].ads.feature_domain()
|
497
|
+
except:
|
498
|
+
pass
|
499
|
+
|
500
|
+
sc.add(
|
501
|
+
schema.Attribute(
|
502
|
+
self._obj[col].dtype.name,
|
503
|
+
domain.values,
|
504
|
+
col,
|
505
|
+
domain=domain,
|
506
|
+
description=str(col),
|
507
|
+
required=bool(~self._obj[col].isnull().any()),
|
508
|
+
order=i,
|
509
|
+
)
|
510
|
+
)
|
511
|
+
|
512
|
+
return sc
|
513
|
+
|
514
|
+
def __getattr__(self, attr):
|
515
|
+
attr_map = dict()
|
516
|
+
for col in self._obj:
|
517
|
+
try:
|
518
|
+
val = self._obj[col].ads.__getattr__(attr)
|
519
|
+
except:
|
520
|
+
val = None # if a column does not have the request attr, return None
|
521
|
+
attr_map[col] = val
|
522
|
+
|
523
|
+
if any(
|
524
|
+
callable(x) for x in list(attr_map.values())
|
525
|
+
): # check if attr is a callable, and if yes apply args to all cols.
|
526
|
+
|
527
|
+
def func(*args, **kwargs):
|
528
|
+
out = dict()
|
529
|
+
for k, v in attr_map.items():
|
530
|
+
out[k] = v(*args, **kwargs) if v else None
|
531
|
+
return out
|
532
|
+
|
533
|
+
return func
|
534
|
+
|
535
|
+
return attr_map
|
@@ -0,0 +1,166 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2023 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from __future__ import print_function, absolute_import
|
8
|
+
|
9
|
+
import numpy as np
|
10
|
+
import pandas as pd
|
11
|
+
|
12
|
+
from collections import defaultdict, Counter
|
13
|
+
from itertools import combinations, product
|
14
|
+
|
15
|
+
|
16
|
+
def _chi_squared(count_matrix: np.ndarray, n_obs: int) -> float:
|
17
|
+
"""
|
18
|
+
Computes Chi-squared when given a contingency table
|
19
|
+
"""
|
20
|
+
row_sums = np.tile(np.sum(count_matrix, axis=1), (count_matrix.shape[1], 1)).T
|
21
|
+
col_sums = np.tile(np.sum(count_matrix, axis=0), (count_matrix.shape[0], 1))
|
22
|
+
return np.sum(
|
23
|
+
np.square(count_matrix - row_sums * col_sums / n_obs)
|
24
|
+
/ (row_sums * col_sums / n_obs)
|
25
|
+
)
|
26
|
+
|
27
|
+
|
28
|
+
def _cramers_v(cat1: np.ndarray, cat2: np.ndarray) -> float:
|
29
|
+
"""
|
30
|
+
Calculates the cramers v of two numpy arrays.
|
31
|
+
"""
|
32
|
+
keep_cat1 = ~pd.isnull(cat1)
|
33
|
+
cat1_no_nan = cat1[keep_cat1]
|
34
|
+
cat2_no_nan = cat2[keep_cat1]
|
35
|
+
keep_cat2 = ~pd.isnull(cat2_no_nan)
|
36
|
+
cat1_no_nan = cat1_no_nan[keep_cat2]
|
37
|
+
cat2_no_nan = cat2_no_nan[keep_cat2]
|
38
|
+
n = len(cat1_no_nan)
|
39
|
+
if n == 1:
|
40
|
+
return 0
|
41
|
+
contingency_table, r, k = _count_occurrence(cat1_no_nan, cat2_no_nan)
|
42
|
+
|
43
|
+
if r == 0:
|
44
|
+
return 0.0000
|
45
|
+
|
46
|
+
chi2 = _chi_squared(contingency_table, n)
|
47
|
+
phi2 = chi2 / n
|
48
|
+
|
49
|
+
phi2corr = max(0, phi2 - ((k - 1) * (r - 1)) / (n - 1))
|
50
|
+
rcorr = r - (np.square(r - 1)) / (n - 1)
|
51
|
+
kcorr = k - (np.square(k - 1)) / (n - 1)
|
52
|
+
denominator = min((kcorr - 1), (rcorr - 1))
|
53
|
+
if denominator == 0:
|
54
|
+
return np.nan
|
55
|
+
return np.sqrt(phi2corr / denominator)
|
56
|
+
|
57
|
+
|
58
|
+
def _list_to_dataframe(
|
59
|
+
name_list: list, corr_list: list, normal_form: bool
|
60
|
+
) -> pd.DataFrame:
|
61
|
+
corr_dict = defaultdict(dict)
|
62
|
+
for idx, corr in zip(name_list, corr_list):
|
63
|
+
row_name = idx[0]
|
64
|
+
col_name = idx[1]
|
65
|
+
corr_dict[row_name][col_name] = corr_dict[col_name][row_name] = round(corr, 4)
|
66
|
+
corr_dict[row_name][row_name] = corr_dict[col_name][col_name] = 1.0000
|
67
|
+
correlation_matrix = pd.DataFrame.from_dict(corr_dict).sort_index()
|
68
|
+
correlation_matrix = correlation_matrix.loc[:, correlation_matrix.index]
|
69
|
+
if normal_form:
|
70
|
+
data = []
|
71
|
+
for (col1, col2), corr in correlation_matrix.stack().items():
|
72
|
+
data.append([col1, col2, round(corr, 4)])
|
73
|
+
return pd.DataFrame(data, columns=["Column 1", "Column 2", "Value"])
|
74
|
+
else:
|
75
|
+
return correlation_matrix
|
76
|
+
|
77
|
+
|
78
|
+
def _count_occurrence(cat1: np.ndarray, cat2: np.ndarray) -> (np.ndarray, int, int):
|
79
|
+
"""
|
80
|
+
Calculates the contingency table of two arrays.
|
81
|
+
"""
|
82
|
+
occurrence_cnt = Counter([(x, y) for x, y in zip(cat1, cat2)])
|
83
|
+
nunique_cat1 = np.unique(cat1[~pd.isnull(cat1)])
|
84
|
+
nunique_cat2 = np.unique(cat2[~pd.isnull(cat2)])
|
85
|
+
r = len(nunique_cat1)
|
86
|
+
k = len(nunique_cat2)
|
87
|
+
contingency_table = np.zeros((r, k))
|
88
|
+
for row, num1 in enumerate(nunique_cat1):
|
89
|
+
for col, num2 in enumerate(nunique_cat2):
|
90
|
+
contingency_table[row, col] = occurrence_cnt[(num1, num2)]
|
91
|
+
|
92
|
+
return contingency_table, r, k
|
93
|
+
|
94
|
+
|
95
|
+
def _correlation_ratio(cat: np.ndarray, cts: np.ndarray):
|
96
|
+
"""
|
97
|
+
Calculates the correlation of a pair of a category feature and a continuous feature
|
98
|
+
using correlation ratio when input are two numpy arrays.
|
99
|
+
"""
|
100
|
+
keep_cts = ~pd.isnull(cts)
|
101
|
+
cat_no_nan = cat[keep_cts]
|
102
|
+
cts_no_nan = cts[keep_cts]
|
103
|
+
|
104
|
+
keep_cat = ~pd.isnull(cat_no_nan)
|
105
|
+
cat_no_none = cat_no_nan[keep_cat]
|
106
|
+
cts_no_none = cts_no_nan[keep_cat]
|
107
|
+
|
108
|
+
unq_cat, tags, group_count = np.unique(
|
109
|
+
list(cat_no_none), return_inverse=1, return_counts=1
|
110
|
+
)
|
111
|
+
group_mean = np.bincount(tags, cts_no_none) / group_count
|
112
|
+
overall_mean = np.nanmean(cts_no_none)
|
113
|
+
n = len(cts_no_none)
|
114
|
+
|
115
|
+
dispersion_within = np.dot(group_count, np.square(group_mean - overall_mean))
|
116
|
+
dispersion_population = cts_no_none.var() * n
|
117
|
+
ratio = dispersion_within / dispersion_population
|
118
|
+
|
119
|
+
return np.sqrt(ratio)
|
120
|
+
|
121
|
+
|
122
|
+
def cat_vs_cont(
|
123
|
+
df: pd.DataFrame, categorical_columns, continuous_columns, normal_form: bool = True
|
124
|
+
) -> pd.DataFrame:
|
125
|
+
"""
|
126
|
+
Calculates the correlation of all pairs of categorical features and continuous features.
|
127
|
+
"""
|
128
|
+
numerical_categorical_pairs = list(product(categorical_columns, continuous_columns))
|
129
|
+
corr_list = []
|
130
|
+
for col in numerical_categorical_pairs:
|
131
|
+
corr_list.append(
|
132
|
+
_correlation_ratio(np.array(df[col[0]].values), np.array(df[col[1]].values))
|
133
|
+
)
|
134
|
+
correlation_matrix = _list_to_dataframe(
|
135
|
+
numerical_categorical_pairs, corr_list, normal_form=normal_form
|
136
|
+
)
|
137
|
+
return correlation_matrix
|
138
|
+
|
139
|
+
|
140
|
+
def cat_vs_cat(df: pd.DataFrame, normal_form: bool = True) -> pd.DataFrame:
|
141
|
+
"""
|
142
|
+
Calculates the correlation of all pairs of categorical features and categorical features.
|
143
|
+
"""
|
144
|
+
categorical_columns = df.columns.to_list()
|
145
|
+
categorical_pairs = list(combinations(categorical_columns, 2))
|
146
|
+
corr_list = []
|
147
|
+
for col in categorical_pairs:
|
148
|
+
corr_list.append(
|
149
|
+
_cramers_v(np.array(df[col[0]].values), np.array(df[col[1]].values))
|
150
|
+
)
|
151
|
+
correlation_matrix = _list_to_dataframe(
|
152
|
+
categorical_pairs, corr_list, normal_form=normal_form
|
153
|
+
)
|
154
|
+
return correlation_matrix
|
155
|
+
|
156
|
+
|
157
|
+
def cont_vs_cont(df: pd.DataFrame, normal_form: bool = True) -> pd.DataFrame:
|
158
|
+
"""
|
159
|
+
Calculates the Pearson correlation between two columns of the DataFrame.
|
160
|
+
"""
|
161
|
+
if not normal_form:
|
162
|
+
return df.corr(method="pearson")
|
163
|
+
data = []
|
164
|
+
for (col1, col2), corr in df.corr(method="pearson").stack().items():
|
165
|
+
data.append([col1, col2, round(corr, 4)])
|
166
|
+
return pd.DataFrame(data, columns=["Column 1", "Column 2", "Value"])
|