oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.9rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +40 -0
- ads/aqua/app.py +506 -0
- ads/aqua/cli.py +96 -0
- ads/aqua/client/__init__.py +3 -0
- ads/aqua/client/client.py +836 -0
- ads/aqua/client/openai_client.py +305 -0
- ads/aqua/common/__init__.py +5 -0
- ads/aqua/common/decorator.py +125 -0
- ads/aqua/common/entities.py +269 -0
- ads/aqua/common/enums.py +122 -0
- ads/aqua/common/errors.py +109 -0
- ads/aqua/common/utils.py +1285 -0
- ads/aqua/config/__init__.py +4 -0
- ads/aqua/config/container_config.py +248 -0
- ads/aqua/config/evaluation/__init__.py +4 -0
- ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
- ads/aqua/config/utils/__init__.py +4 -0
- ads/aqua/config/utils/serializer.py +339 -0
- ads/aqua/constants.py +116 -0
- ads/aqua/data.py +14 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation/__init__.py +8 -0
- ads/aqua/evaluation/constants.py +53 -0
- ads/aqua/evaluation/entities.py +186 -0
- ads/aqua/evaluation/errors.py +70 -0
- ads/aqua/evaluation/evaluation.py +1814 -0
- ads/aqua/extension/__init__.py +42 -0
- ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
- ads/aqua/extension/base_handler.py +90 -0
- ads/aqua/extension/common_handler.py +121 -0
- ads/aqua/extension/common_ws_msg_handler.py +36 -0
- ads/aqua/extension/deployment_handler.py +298 -0
- ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
- ads/aqua/extension/errors.py +30 -0
- ads/aqua/extension/evaluation_handler.py +129 -0
- ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
- ads/aqua/extension/finetune_handler.py +96 -0
- ads/aqua/extension/model_handler.py +390 -0
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +145 -0
- ads/aqua/extension/models_ws_msg_handler.py +50 -0
- ads/aqua/extension/ui_handler.py +282 -0
- ads/aqua/extension/ui_websocket_handler.py +130 -0
- ads/aqua/extension/utils.py +133 -0
- ads/aqua/finetuning/__init__.py +7 -0
- ads/aqua/finetuning/constants.py +23 -0
- ads/aqua/finetuning/entities.py +181 -0
- ads/aqua/finetuning/finetuning.py +749 -0
- ads/aqua/model/__init__.py +8 -0
- ads/aqua/model/constants.py +60 -0
- ads/aqua/model/entities.py +385 -0
- ads/aqua/model/enums.py +32 -0
- ads/aqua/model/model.py +2114 -0
- ads/aqua/modeldeployment/__init__.py +8 -0
- ads/aqua/modeldeployment/constants.py +10 -0
- ads/aqua/modeldeployment/deployment.py +1326 -0
- ads/aqua/modeldeployment/entities.py +653 -0
- ads/aqua/modeldeployment/inference.py +74 -0
- ads/aqua/modeldeployment/utils.py +543 -0
- ads/aqua/resources/gpu_shapes_index.json +94 -0
- ads/aqua/server/__init__.py +4 -0
- ads/aqua/server/__main__.py +24 -0
- ads/aqua/server/app.py +47 -0
- ads/aqua/server/aqua_spec.yml +1291 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +476 -0
- ads/aqua/ui.py +499 -0
- ads/automl/__init__.py +9 -0
- ads/automl/driver.py +330 -0
- ads/automl/provider.py +975 -0
- ads/bds/__init__.py +5 -0
- ads/bds/auth.py +127 -0
- ads/bds/big_data_service.py +255 -0
- ads/catalog/__init__.py +19 -0
- ads/catalog/model.py +1576 -0
- ads/catalog/notebook.py +461 -0
- ads/catalog/project.py +468 -0
- ads/catalog/summary.py +178 -0
- ads/common/__init__.py +11 -0
- ads/common/analyzer.py +65 -0
- ads/common/artifact/.model-ignore +63 -0
- ads/common/artifact/__init__.py +10 -0
- ads/common/auth.py +1122 -0
- ads/common/card_identifier.py +83 -0
- ads/common/config.py +647 -0
- ads/common/data.py +165 -0
- ads/common/decorator/__init__.py +9 -0
- ads/common/decorator/argument_to_case.py +88 -0
- ads/common/decorator/deprecate.py +69 -0
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/decorator/runtime_dependency.py +178 -0
- ads/common/decorator/threaded.py +97 -0
- ads/common/decorator/utils.py +35 -0
- ads/common/dsc_file_system.py +303 -0
- ads/common/error.py +14 -0
- ads/common/extended_enum.py +81 -0
- ads/common/function/__init__.py +5 -0
- ads/common/function/fn_util.py +142 -0
- ads/common/function/func_conf.yaml +25 -0
- ads/common/ipython.py +76 -0
- ads/common/model.py +679 -0
- ads/common/model_artifact.py +1759 -0
- ads/common/model_artifact_schema.json +107 -0
- ads/common/model_export_util.py +664 -0
- ads/common/model_metadata.py +24 -0
- ads/common/object_storage_details.py +296 -0
- ads/common/oci_client.py +175 -0
- ads/common/oci_datascience.py +46 -0
- ads/common/oci_logging.py +1144 -0
- ads/common/oci_mixin.py +957 -0
- ads/common/oci_resource.py +136 -0
- ads/common/serializer.py +559 -0
- ads/common/utils.py +1852 -0
- ads/common/word_lists.py +1491 -0
- ads/common/work_request.py +189 -0
- ads/data_labeling/__init__.py +13 -0
- ads/data_labeling/boundingbox.py +253 -0
- ads/data_labeling/constants.py +47 -0
- ads/data_labeling/data_labeling_service.py +244 -0
- ads/data_labeling/interface/__init__.py +5 -0
- ads/data_labeling/interface/loader.py +16 -0
- ads/data_labeling/interface/parser.py +16 -0
- ads/data_labeling/interface/reader.py +23 -0
- ads/data_labeling/loader/__init__.py +5 -0
- ads/data_labeling/loader/file_loader.py +241 -0
- ads/data_labeling/metadata.py +110 -0
- ads/data_labeling/mixin/__init__.py +5 -0
- ads/data_labeling/mixin/data_labeling.py +232 -0
- ads/data_labeling/ner.py +129 -0
- ads/data_labeling/parser/__init__.py +5 -0
- ads/data_labeling/parser/dls_record_parser.py +388 -0
- ads/data_labeling/parser/export_metadata_parser.py +94 -0
- ads/data_labeling/parser/export_record_parser.py +473 -0
- ads/data_labeling/reader/__init__.py +5 -0
- ads/data_labeling/reader/dataset_reader.py +574 -0
- ads/data_labeling/reader/dls_record_reader.py +121 -0
- ads/data_labeling/reader/export_record_reader.py +62 -0
- ads/data_labeling/reader/jsonl_reader.py +75 -0
- ads/data_labeling/reader/metadata_reader.py +203 -0
- ads/data_labeling/reader/record_reader.py +263 -0
- ads/data_labeling/record.py +52 -0
- ads/data_labeling/visualizer/__init__.py +5 -0
- ads/data_labeling/visualizer/image_visualizer.py +525 -0
- ads/data_labeling/visualizer/text_visualizer.py +357 -0
- ads/database/__init__.py +5 -0
- ads/database/connection.py +338 -0
- ads/dataset/__init__.py +10 -0
- ads/dataset/capabilities.md +51 -0
- ads/dataset/classification_dataset.py +339 -0
- ads/dataset/correlation.py +226 -0
- ads/dataset/correlation_plot.py +563 -0
- ads/dataset/dask_series.py +173 -0
- ads/dataset/dataframe_transformer.py +110 -0
- ads/dataset/dataset.py +1979 -0
- ads/dataset/dataset_browser.py +360 -0
- ads/dataset/dataset_with_target.py +995 -0
- ads/dataset/exception.py +25 -0
- ads/dataset/factory.py +987 -0
- ads/dataset/feature_engineering_transformer.py +35 -0
- ads/dataset/feature_selection.py +107 -0
- ads/dataset/forecasting_dataset.py +26 -0
- ads/dataset/helper.py +1450 -0
- ads/dataset/label_encoder.py +99 -0
- ads/dataset/mixin/__init__.py +5 -0
- ads/dataset/mixin/dataset_accessor.py +134 -0
- ads/dataset/pipeline.py +58 -0
- ads/dataset/plot.py +710 -0
- ads/dataset/progress.py +86 -0
- ads/dataset/recommendation.py +297 -0
- ads/dataset/recommendation_transformer.py +502 -0
- ads/dataset/regression_dataset.py +14 -0
- ads/dataset/sampled_dataset.py +1050 -0
- ads/dataset/target.py +98 -0
- ads/dataset/timeseries.py +18 -0
- ads/dbmixin/__init__.py +5 -0
- ads/dbmixin/db_pandas_accessor.py +153 -0
- ads/environment/__init__.py +9 -0
- ads/environment/ml_runtime.py +66 -0
- ads/evaluations/README.md +14 -0
- ads/evaluations/__init__.py +109 -0
- ads/evaluations/evaluation_plot.py +983 -0
- ads/evaluations/evaluator.py +1334 -0
- ads/evaluations/statistical_metrics.py +543 -0
- ads/experiments/__init__.py +9 -0
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +21 -0
- ads/explanations/base_explainer.py +142 -0
- ads/explanations/capabilities.md +83 -0
- ads/explanations/explainer.py +190 -0
- ads/explanations/mlx_global_explainer.py +1050 -0
- ads/explanations/mlx_interface.py +386 -0
- ads/explanations/mlx_local_explainer.py +287 -0
- ads/explanations/mlx_whatif_explainer.py +201 -0
- ads/feature_engineering/__init__.py +20 -0
- ads/feature_engineering/accessor/__init__.py +5 -0
- ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
- ads/feature_engineering/accessor/mixin/__init__.py +5 -0
- ads/feature_engineering/accessor/mixin/correlation.py +166 -0
- ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
- ads/feature_engineering/accessor/mixin/utils.py +65 -0
- ads/feature_engineering/accessor/series_accessor.py +431 -0
- ads/feature_engineering/adsimage/__init__.py +5 -0
- ads/feature_engineering/adsimage/image.py +192 -0
- ads/feature_engineering/adsimage/image_reader.py +170 -0
- ads/feature_engineering/adsimage/interface/__init__.py +5 -0
- ads/feature_engineering/adsimage/interface/reader.py +19 -0
- ads/feature_engineering/adsstring/__init__.py +7 -0
- ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
- ads/feature_engineering/adsstring/string/__init__.py +8 -0
- ads/feature_engineering/data_schema.json +57 -0
- ads/feature_engineering/dataset/__init__.py +5 -0
- ads/feature_engineering/dataset/zip_code_data.py +42062 -0
- ads/feature_engineering/exceptions.py +40 -0
- ads/feature_engineering/feature_type/__init__.py +133 -0
- ads/feature_engineering/feature_type/address.py +184 -0
- ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
- ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
- ads/feature_engineering/feature_type/adsstring/string.py +258 -0
- ads/feature_engineering/feature_type/base.py +58 -0
- ads/feature_engineering/feature_type/boolean.py +183 -0
- ads/feature_engineering/feature_type/category.py +146 -0
- ads/feature_engineering/feature_type/constant.py +137 -0
- ads/feature_engineering/feature_type/continuous.py +151 -0
- ads/feature_engineering/feature_type/creditcard.py +314 -0
- ads/feature_engineering/feature_type/datetime.py +190 -0
- ads/feature_engineering/feature_type/discrete.py +134 -0
- ads/feature_engineering/feature_type/document.py +43 -0
- ads/feature_engineering/feature_type/gis.py +251 -0
- ads/feature_engineering/feature_type/handler/__init__.py +5 -0
- ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
- ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
- ads/feature_engineering/feature_type/handler/warnings.py +128 -0
- ads/feature_engineering/feature_type/integer.py +142 -0
- ads/feature_engineering/feature_type/ip_address.py +144 -0
- ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
- ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
- ads/feature_engineering/feature_type/lat_long.py +256 -0
- ads/feature_engineering/feature_type/object.py +43 -0
- ads/feature_engineering/feature_type/ordinal.py +132 -0
- ads/feature_engineering/feature_type/phone_number.py +135 -0
- ads/feature_engineering/feature_type/string.py +171 -0
- ads/feature_engineering/feature_type/text.py +93 -0
- ads/feature_engineering/feature_type/unknown.py +43 -0
- ads/feature_engineering/feature_type/zip_code.py +164 -0
- ads/feature_engineering/feature_type_manager.py +406 -0
- ads/feature_engineering/schema.py +795 -0
- ads/feature_engineering/utils.py +245 -0
- ads/feature_store/.readthedocs.yaml +19 -0
- ads/feature_store/README.md +65 -0
- ads/feature_store/__init__.py +9 -0
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +339 -0
- ads/feature_store/common/exceptions.py +18 -0
- ads/feature_store/common/spark_session_singleton.py +125 -0
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
- ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
- ads/feature_store/common/utils/transformation_utils.py +82 -0
- ads/feature_store/common/utils/utility.py +403 -0
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +129 -0
- ads/feature_store/dataset.py +1230 -0
- ads/feature_store/dataset_job.py +530 -0
- ads/feature_store/docs/Dockerfile +7 -0
- ads/feature_store/docs/Makefile +44 -0
- ads/feature_store/docs/conf.py +28 -0
- ads/feature_store/docs/requirements.txt +14 -0
- ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
- ads/feature_store/docs/source/cicd.rst +137 -0
- ads/feature_store/docs/source/conf.py +86 -0
- ads/feature_store/docs/source/data_versioning.rst +33 -0
- ads/feature_store/docs/source/dataset.rst +388 -0
- ads/feature_store/docs/source/dataset_job.rst +27 -0
- ads/feature_store/docs/source/demo.rst +70 -0
- ads/feature_store/docs/source/entity.rst +78 -0
- ads/feature_store/docs/source/feature_group.rst +624 -0
- ads/feature_store/docs/source/feature_group_job.rst +29 -0
- ads/feature_store/docs/source/feature_store.rst +122 -0
- ads/feature_store/docs/source/feature_store_class.rst +123 -0
- ads/feature_store/docs/source/feature_validation.rst +66 -0
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +81 -0
- ads/feature_store/docs/source/module.rst +8 -0
- ads/feature_store/docs/source/notebook.rst +94 -0
- ads/feature_store/docs/source/overview.rst +47 -0
- ads/feature_store/docs/source/quickstart.rst +176 -0
- ads/feature_store/docs/source/release_notes.rst +194 -0
- ads/feature_store/docs/source/setup_feature_store.rst +81 -0
- ads/feature_store/docs/source/statistics.rst +58 -0
- ads/feature_store/docs/source/transformation.rst +199 -0
- ads/feature_store/docs/source/ui.rst +65 -0
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
- ads/feature_store/entity.py +718 -0
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
- ads/feature_store/execution_strategy/execution_strategy.py +113 -0
- ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
- ads/feature_store/feature.py +192 -0
- ads/feature_store/feature_group.py +1494 -0
- ads/feature_store/feature_group_expectation.py +346 -0
- ads/feature_store/feature_group_job.py +602 -0
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +180 -0
- ads/feature_store/feature_option_details.py +50 -0
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +99 -0
- ads/feature_store/feature_store.py +699 -0
- ads/feature_store/feature_store_registrar.py +518 -0
- ads/feature_store/input_feature_detail.py +149 -0
- ads/feature_store/mixin/__init__.py +4 -0
- ads/feature_store/mixin/oci_feature_store.py +145 -0
- ads/feature_store/model_details.py +73 -0
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +266 -0
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +298 -0
- ads/feature_store/query/join.py +161 -0
- ads/feature_store/query/query.py +403 -0
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +57 -0
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +68 -0
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +139 -0
- ads/feature_store/service/oci_dataset_job.py +199 -0
- ads/feature_store/service/oci_entity.py +125 -0
- ads/feature_store/service/oci_feature_group.py +164 -0
- ads/feature_store/service/oci_feature_group_job.py +214 -0
- ads/feature_store/service/oci_feature_store.py +182 -0
- ads/feature_store/service/oci_lineage.py +87 -0
- ads/feature_store/service/oci_transformation.py +104 -0
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +49 -0
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
- ads/feature_store/statistics/charts/box_plot.py +148 -0
- ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
- ads/feature_store/statistics/charts/probability_distribution.py +68 -0
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
- ads/feature_store/statistics/feature_stat.py +126 -0
- ads/feature_store/statistics/generic_feature_value.py +33 -0
- ads/feature_store/statistics/statistics.py +41 -0
- ads/feature_store/statistics_config.py +101 -0
- ads/feature_store/templates/feature_store_template.yaml +45 -0
- ads/feature_store/transformation.py +499 -0
- ads/feature_store/validation_output.py +57 -0
- ads/hpo/__init__.py +9 -0
- ads/hpo/_imports.py +91 -0
- ads/hpo/ads_search_space.py +439 -0
- ads/hpo/distributions.py +325 -0
- ads/hpo/objective.py +280 -0
- ads/hpo/search_cv.py +1657 -0
- ads/hpo/stopping_criterion.py +75 -0
- ads/hpo/tuner_artifact.py +413 -0
- ads/hpo/utils.py +91 -0
- ads/hpo/validation.py +140 -0
- ads/hpo/visualization/__init__.py +5 -0
- ads/hpo/visualization/_contour.py +23 -0
- ads/hpo/visualization/_edf.py +20 -0
- ads/hpo/visualization/_intermediate_values.py +21 -0
- ads/hpo/visualization/_optimization_history.py +25 -0
- ads/hpo/visualization/_parallel_coordinate.py +169 -0
- ads/hpo/visualization/_param_importances.py +26 -0
- ads/jobs/__init__.py +53 -0
- ads/jobs/ads_job.py +663 -0
- ads/jobs/builders/__init__.py +5 -0
- ads/jobs/builders/base.py +156 -0
- ads/jobs/builders/infrastructure/__init__.py +6 -0
- ads/jobs/builders/infrastructure/base.py +165 -0
- ads/jobs/builders/infrastructure/dataflow.py +1252 -0
- ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
- ads/jobs/builders/infrastructure/utils.py +65 -0
- ads/jobs/builders/runtimes/__init__.py +5 -0
- ads/jobs/builders/runtimes/artifact.py +338 -0
- ads/jobs/builders/runtimes/base.py +325 -0
- ads/jobs/builders/runtimes/container_runtime.py +242 -0
- ads/jobs/builders/runtimes/python_runtime.py +1016 -0
- ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
- ads/jobs/cli.py +104 -0
- ads/jobs/env_var_parser.py +131 -0
- ads/jobs/extension.py +160 -0
- ads/jobs/schema/__init__.py +5 -0
- ads/jobs/schema/infrastructure_schema.json +116 -0
- ads/jobs/schema/job_schema.json +42 -0
- ads/jobs/schema/runtime_schema.json +183 -0
- ads/jobs/schema/validator.py +141 -0
- ads/jobs/serializer.py +296 -0
- ads/jobs/templates/__init__.py +5 -0
- ads/jobs/templates/container.py +6 -0
- ads/jobs/templates/driver_notebook.py +177 -0
- ads/jobs/templates/driver_oci.py +500 -0
- ads/jobs/templates/driver_python.py +48 -0
- ads/jobs/templates/driver_pytorch.py +852 -0
- ads/jobs/templates/driver_utils.py +615 -0
- ads/jobs/templates/hostname_from_env.c +55 -0
- ads/jobs/templates/oci_metrics.py +181 -0
- ads/jobs/utils.py +104 -0
- ads/llm/__init__.py +28 -0
- ads/llm/autogen/__init__.py +2 -0
- ads/llm/autogen/constants.py +15 -0
- ads/llm/autogen/reports/__init__.py +2 -0
- ads/llm/autogen/reports/base.py +67 -0
- ads/llm/autogen/reports/data.py +103 -0
- ads/llm/autogen/reports/session.py +526 -0
- ads/llm/autogen/reports/templates/chat_box.html +13 -0
- ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
- ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
- ads/llm/autogen/reports/utils.py +56 -0
- ads/llm/autogen/v02/__init__.py +4 -0
- ads/llm/autogen/v02/client.py +295 -0
- ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
- ads/llm/autogen/v02/loggers/__init__.py +6 -0
- ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
- ads/llm/autogen/v02/loggers/session_logger.py +580 -0
- ads/llm/autogen/v02/loggers/utils.py +86 -0
- ads/llm/autogen/v02/runtime_logging.py +163 -0
- ads/llm/chain.py +268 -0
- ads/llm/chat_template.py +31 -0
- ads/llm/deploy.py +63 -0
- ads/llm/guardrails/__init__.py +5 -0
- ads/llm/guardrails/base.py +442 -0
- ads/llm/guardrails/huggingface.py +44 -0
- ads/llm/langchain/__init__.py +5 -0
- ads/llm/langchain/plugins/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
- ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
- ads/llm/langchain/plugins/llms/__init__.py +5 -0
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
- ads/llm/requirements.txt +3 -0
- ads/llm/serialize.py +219 -0
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +153 -0
- ads/llm/serializers/runnable_parallel.py +27 -0
- ads/llm/templates/score_chain.jinja2 +155 -0
- ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
- ads/model/__init__.py +52 -0
- ads/model/artifact.py +573 -0
- ads/model/artifact_downloader.py +254 -0
- ads/model/artifact_uploader.py +267 -0
- ads/model/base_properties.py +238 -0
- ads/model/common/.model-ignore +66 -0
- ads/model/common/__init__.py +5 -0
- ads/model/common/utils.py +142 -0
- ads/model/datascience_model.py +2635 -0
- ads/model/deployment/__init__.py +20 -0
- ads/model/deployment/common/__init__.py +5 -0
- ads/model/deployment/common/utils.py +308 -0
- ads/model/deployment/model_deployer.py +466 -0
- ads/model/deployment/model_deployment.py +1846 -0
- ads/model/deployment/model_deployment_infrastructure.py +671 -0
- ads/model/deployment/model_deployment_properties.py +493 -0
- ads/model/deployment/model_deployment_runtime.py +838 -0
- ads/model/extractor/__init__.py +5 -0
- ads/model/extractor/automl_extractor.py +74 -0
- ads/model/extractor/embedding_onnx_extractor.py +80 -0
- ads/model/extractor/huggingface_extractor.py +88 -0
- ads/model/extractor/keras_extractor.py +84 -0
- ads/model/extractor/lightgbm_extractor.py +93 -0
- ads/model/extractor/model_info_extractor.py +114 -0
- ads/model/extractor/model_info_extractor_factory.py +105 -0
- ads/model/extractor/pytorch_extractor.py +87 -0
- ads/model/extractor/sklearn_extractor.py +112 -0
- ads/model/extractor/spark_extractor.py +89 -0
- ads/model/extractor/tensorflow_extractor.py +85 -0
- ads/model/extractor/xgboost_extractor.py +94 -0
- ads/model/framework/__init__.py +5 -0
- ads/model/framework/automl_model.py +178 -0
- ads/model/framework/embedding_onnx_model.py +438 -0
- ads/model/framework/huggingface_model.py +399 -0
- ads/model/framework/lightgbm_model.py +266 -0
- ads/model/framework/pytorch_model.py +266 -0
- ads/model/framework/sklearn_model.py +250 -0
- ads/model/framework/spark_model.py +326 -0
- ads/model/framework/tensorflow_model.py +254 -0
- ads/model/framework/xgboost_model.py +258 -0
- ads/model/generic_model.py +3518 -0
- ads/model/model_artifact_boilerplate/README.md +381 -0
- ads/model/model_artifact_boilerplate/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
- ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
- ads/model/model_artifact_boilerplate/score.py +61 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_introspect.py +331 -0
- ads/model/model_metadata.py +1810 -0
- ads/model/model_metadata_mixin.py +460 -0
- ads/model/model_properties.py +63 -0
- ads/model/model_version_set.py +739 -0
- ads/model/runtime/__init__.py +5 -0
- ads/model/runtime/env_info.py +306 -0
- ads/model/runtime/model_deployment_details.py +37 -0
- ads/model/runtime/model_provenance_details.py +58 -0
- ads/model/runtime/runtime_info.py +81 -0
- ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
- ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
- ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
- ads/model/runtime/utils.py +201 -0
- ads/model/serde/__init__.py +5 -0
- ads/model/serde/common.py +40 -0
- ads/model/serde/model_input.py +547 -0
- ads/model/serde/model_serializer.py +1184 -0
- ads/model/service/__init__.py +5 -0
- ads/model/service/oci_datascience_model.py +1076 -0
- ads/model/service/oci_datascience_model_deployment.py +500 -0
- ads/model/service/oci_datascience_model_version_set.py +176 -0
- ads/model/transformer/__init__.py +5 -0
- ads/model/transformer/onnx_transformer.py +324 -0
- ads/mysqldb/__init__.py +5 -0
- ads/mysqldb/mysql_db.py +227 -0
- ads/opctl/__init__.py +18 -0
- ads/opctl/anomaly_detection.py +11 -0
- ads/opctl/backend/__init__.py +5 -0
- ads/opctl/backend/ads_dataflow.py +353 -0
- ads/opctl/backend/ads_ml_job.py +710 -0
- ads/opctl/backend/ads_ml_pipeline.py +164 -0
- ads/opctl/backend/ads_model_deployment.py +209 -0
- ads/opctl/backend/base.py +146 -0
- ads/opctl/backend/local.py +1053 -0
- ads/opctl/backend/marketplace/__init__.py +9 -0
- ads/opctl/backend/marketplace/helm_helper.py +173 -0
- ads/opctl/backend/marketplace/local_marketplace.py +271 -0
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
- ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
- ads/opctl/backend/marketplace/models/__init__.py +5 -0
- ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
- ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
- ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
- ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
- ads/opctl/cli.py +707 -0
- ads/opctl/cmds.py +869 -0
- ads/opctl/conda/__init__.py +5 -0
- ads/opctl/conda/cli.py +193 -0
- ads/opctl/conda/cmds.py +749 -0
- ads/opctl/conda/config.yaml +34 -0
- ads/opctl/conda/manifest_template.yaml +13 -0
- ads/opctl/conda/multipart_uploader.py +188 -0
- ads/opctl/conda/pack.py +89 -0
- ads/opctl/config/__init__.py +5 -0
- ads/opctl/config/base.py +57 -0
- ads/opctl/config/diagnostics/__init__.py +5 -0
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
- ads/opctl/config/merger.py +255 -0
- ads/opctl/config/resolver.py +297 -0
- ads/opctl/config/utils.py +79 -0
- ads/opctl/config/validator.py +17 -0
- ads/opctl/config/versioner.py +68 -0
- ads/opctl/config/yaml_parsers/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/base.py +58 -0
- ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
- ads/opctl/constants.py +66 -0
- ads/opctl/decorator/__init__.py +5 -0
- ads/opctl/decorator/common.py +129 -0
- ads/opctl/diagnostics/__init__.py +5 -0
- ads/opctl/diagnostics/__main__.py +25 -0
- ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
- ads/opctl/diagnostics/check_requirements.py +144 -0
- ads/opctl/diagnostics/requirement_exception.py +9 -0
- ads/opctl/distributed/README.md +109 -0
- ads/opctl/distributed/__init__.py +5 -0
- ads/opctl/distributed/certificates.py +32 -0
- ads/opctl/distributed/cli.py +207 -0
- ads/opctl/distributed/cmds.py +731 -0
- ads/opctl/distributed/common/__init__.py +5 -0
- ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
- ads/opctl/distributed/common/cluster_config_helper.py +103 -0
- ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
- ads/opctl/distributed/common/cluster_runner.py +54 -0
- ads/opctl/distributed/common/framework_factory.py +29 -0
- ads/opctl/docker/Dockerfile.job +103 -0
- ads/opctl/docker/Dockerfile.job.arm +107 -0
- ads/opctl/docker/Dockerfile.job.gpu +175 -0
- ads/opctl/docker/base-env.yaml +13 -0
- ads/opctl/docker/cuda.repo +6 -0
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +41 -0
- ads/opctl/docker/operator/Dockerfile.gpu +85 -0
- ads/opctl/docker/operator/cuda.repo +6 -0
- ads/opctl/docker/operator/environment.yaml +8 -0
- ads/opctl/forecast.py +11 -0
- ads/opctl/index.yaml +3 -0
- ads/opctl/model/__init__.py +5 -0
- ads/opctl/model/cli.py +65 -0
- ads/opctl/model/cmds.py +73 -0
- ads/opctl/operator/README.md +4 -0
- ads/opctl/operator/__init__.py +31 -0
- ads/opctl/operator/cli.py +344 -0
- ads/opctl/operator/cmd.py +596 -0
- ads/opctl/operator/common/__init__.py +5 -0
- ads/opctl/operator/common/backend_factory.py +460 -0
- ads/opctl/operator/common/const.py +27 -0
- ads/opctl/operator/common/data/synthetic.csv +16001 -0
- ads/opctl/operator/common/dictionary_merger.py +148 -0
- ads/opctl/operator/common/errors.py +42 -0
- ads/opctl/operator/common/operator_config.py +99 -0
- ads/opctl/operator/common/operator_loader.py +811 -0
- ads/opctl/operator/common/operator_schema.yaml +130 -0
- ads/opctl/operator/common/operator_yaml_generator.py +152 -0
- ads/opctl/operator/common/utils.py +208 -0
- ads/opctl/operator/lowcode/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
- ads/opctl/operator/lowcode/anomaly/README.md +207 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +167 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +116 -0
- ads/opctl/operator/lowcode/common/errors.py +47 -0
- ads/opctl/operator/lowcode/common/transformations.py +296 -0
- ads/opctl/operator/lowcode/common/utils.py +384 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
- ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
- ads/opctl/operator/lowcode/forecast/README.md +209 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
- ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
- ads/opctl/operator/lowcode/forecast/const.py +92 -0
- ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
- ads/opctl/operator/lowcode/forecast/errors.py +26 -0
- ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
- ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
- ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
- ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
- ads/opctl/operator/lowcode/forecast/model/prophet.py +445 -0
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
- ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
- ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
- ads/opctl/operator/lowcode/forecast/utils.py +397 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
- ads/opctl/operator/lowcode/pii/MLoperator +17 -0
- ads/opctl/operator/lowcode/pii/README.md +208 -0
- ads/opctl/operator/lowcode/pii/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/__main__.py +78 -0
- ads/opctl/operator/lowcode/pii/cmd.py +39 -0
- ads/opctl/operator/lowcode/pii/constant.py +84 -0
- ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
- ads/opctl/operator/lowcode/pii/errors.py +27 -0
- ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
- ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
- ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
- ads/opctl/operator/lowcode/pii/model/report.py +487 -0
- ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
- ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
- ads/opctl/operator/lowcode/pii/utils.py +43 -0
- ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
- ads/opctl/operator/lowcode/recommender/README.md +206 -0
- ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
- ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
- ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
- ads/opctl/operator/lowcode/recommender/constant.py +30 -0
- ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
- ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
- ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
- ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
- ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
- ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
- ads/opctl/operator/lowcode/recommender/utils.py +13 -0
- ads/opctl/operator/runtime/__init__.py +5 -0
- ads/opctl/operator/runtime/const.py +17 -0
- ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
- ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/runtime.py +115 -0
- ads/opctl/schema.yaml.yml +36 -0
- ads/opctl/script.py +40 -0
- ads/opctl/spark/__init__.py +5 -0
- ads/opctl/spark/cli.py +43 -0
- ads/opctl/spark/cmds.py +147 -0
- ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
- ads/opctl/utils.py +344 -0
- ads/oracledb/__init__.py +5 -0
- ads/oracledb/oracle_db.py +346 -0
- ads/pipeline/__init__.py +39 -0
- ads/pipeline/ads_pipeline.py +2279 -0
- ads/pipeline/ads_pipeline_run.py +772 -0
- ads/pipeline/ads_pipeline_step.py +605 -0
- ads/pipeline/builders/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/custom_script.py +32 -0
- ads/pipeline/cli.py +119 -0
- ads/pipeline/extension.py +291 -0
- ads/pipeline/schema/__init__.py +5 -0
- ads/pipeline/schema/cs_step_schema.json +35 -0
- ads/pipeline/schema/ml_step_schema.json +31 -0
- ads/pipeline/schema/pipeline_schema.json +71 -0
- ads/pipeline/visualizer/__init__.py +5 -0
- ads/pipeline/visualizer/base.py +570 -0
- ads/pipeline/visualizer/graph_renderer.py +272 -0
- ads/pipeline/visualizer/text_renderer.py +84 -0
- ads/secrets/__init__.py +11 -0
- ads/secrets/adb.py +386 -0
- ads/secrets/auth_token.py +86 -0
- ads/secrets/big_data_service.py +365 -0
- ads/secrets/mysqldb.py +149 -0
- ads/secrets/oracledb.py +160 -0
- ads/secrets/secrets.py +407 -0
- ads/telemetry/__init__.py +7 -0
- ads/telemetry/base.py +69 -0
- ads/telemetry/client.py +125 -0
- ads/telemetry/telemetry.py +257 -0
- ads/templates/dataflow_pyspark.jinja2 +13 -0
- ads/templates/dataflow_sparksql.jinja2 +22 -0
- ads/templates/func.jinja2 +20 -0
- ads/templates/schemas/openapi.json +1740 -0
- ads/templates/score-pkl.jinja2 +173 -0
- ads/templates/score.jinja2 +322 -0
- ads/templates/score_embedding_onnx.jinja2 +202 -0
- ads/templates/score_generic.jinja2 +165 -0
- ads/templates/score_huggingface_pipeline.jinja2 +217 -0
- ads/templates/score_lightgbm.jinja2 +185 -0
- ads/templates/score_onnx.jinja2 +407 -0
- ads/templates/score_onnx_new.jinja2 +473 -0
- ads/templates/score_oracle_automl.jinja2 +185 -0
- ads/templates/score_pyspark.jinja2 +154 -0
- ads/templates/score_pytorch.jinja2 +219 -0
- ads/templates/score_scikit-learn.jinja2 +184 -0
- ads/templates/score_tensorflow.jinja2 +184 -0
- ads/templates/score_xgboost.jinja2 +178 -0
- ads/text_dataset/__init__.py +5 -0
- ads/text_dataset/backends.py +211 -0
- ads/text_dataset/dataset.py +445 -0
- ads/text_dataset/extractor.py +207 -0
- ads/text_dataset/options.py +53 -0
- ads/text_dataset/udfs.py +22 -0
- ads/text_dataset/utils.py +49 -0
- ads/type_discovery/__init__.py +9 -0
- ads/type_discovery/abstract_detector.py +21 -0
- ads/type_discovery/constant_detector.py +41 -0
- ads/type_discovery/continuous_detector.py +54 -0
- ads/type_discovery/credit_card_detector.py +99 -0
- ads/type_discovery/datetime_detector.py +92 -0
- ads/type_discovery/discrete_detector.py +118 -0
- ads/type_discovery/document_detector.py +146 -0
- ads/type_discovery/ip_detector.py +68 -0
- ads/type_discovery/latlon_detector.py +90 -0
- ads/type_discovery/phone_number_detector.py +63 -0
- ads/type_discovery/type_discovery_driver.py +87 -0
- ads/type_discovery/typed_feature.py +594 -0
- ads/type_discovery/unknown_detector.py +41 -0
- ads/type_discovery/zipcode_detector.py +48 -0
- ads/vault/__init__.py +7 -0
- ads/vault/vault.py +237 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/METADATA +150 -150
- oracle_ads-2.13.9rc1.dist-info/RECORD +858 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/WHEEL +1 -2
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/entry_points.txt +2 -1
- oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/licenses/LICENSE.txt +0 -0
ads/common/utils.py
ADDED
@@ -0,0 +1,1852 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Copyright (c) 2020, 2025 Oracle and/or its affiliates.
|
4
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
|
+
|
6
|
+
|
7
|
+
import collections
|
8
|
+
import contextlib
|
9
|
+
import copy
|
10
|
+
import fnmatch
|
11
|
+
import glob
|
12
|
+
import json
|
13
|
+
import math
|
14
|
+
import os
|
15
|
+
import random
|
16
|
+
import re
|
17
|
+
import shutil
|
18
|
+
import string
|
19
|
+
import sys
|
20
|
+
import tempfile
|
21
|
+
from datetime import datetime
|
22
|
+
from enum import Enum
|
23
|
+
from io import DEFAULT_BUFFER_SIZE
|
24
|
+
from textwrap import fill
|
25
|
+
from typing import Any, Dict, Optional, Tuple, Union
|
26
|
+
from urllib import request
|
27
|
+
from urllib.parse import urlparse
|
28
|
+
|
29
|
+
import fsspec
|
30
|
+
import matplotlib as mpl
|
31
|
+
import numpy as np
|
32
|
+
import pandas as pd
|
33
|
+
from cycler import cycler
|
34
|
+
from oci import object_storage
|
35
|
+
from pandas.core.dtypes.common import is_datetime64_dtype, is_numeric_dtype
|
36
|
+
from sklearn.model_selection import train_test_split
|
37
|
+
from tqdm import tqdm
|
38
|
+
|
39
|
+
from ads import config
|
40
|
+
from ads.common import logger
|
41
|
+
from ads.common.decorator.deprecate import deprecated
|
42
|
+
from ads.common.decorator.runtime_dependency import (
|
43
|
+
OptionalDependency,
|
44
|
+
runtime_dependency,
|
45
|
+
)
|
46
|
+
from ads.common.object_storage_details import ObjectStorageDetails
|
47
|
+
from ads.common.oci_client import OCIClientFactory
|
48
|
+
from ads.common.word_lists import adjectives, animals
|
49
|
+
from ads.dataset.progress import TqdmProgressBar
|
50
|
+
|
51
|
+
from . import auth as authutil
|
52
|
+
|
53
|
+
# For Model / Model Artifact libraries
|
54
|
+
lib_translator = {"sklearn": "scikit-learn"}
|
55
|
+
module_ignore = ["builtins", "ads", "automl", "mlx"]
|
56
|
+
|
57
|
+
# up-sample if length of dataframe is less than or equal to MAX_LEN_FOR_UP_SAMPLING
|
58
|
+
MAX_LEN_FOR_UP_SAMPLING = 5000
|
59
|
+
|
60
|
+
# down-sample if ratio of minority to majority class is greater than or equal to MIN_RATIO_FOR_DOWN_SAMPLING
|
61
|
+
MIN_RATIO_FOR_DOWN_SAMPLING = 1 / 20
|
62
|
+
|
63
|
+
# Maximum distinct values by cardinality will be used for plotting
|
64
|
+
MAX_DISPLAY_VALUES = 10
|
65
|
+
|
66
|
+
UNKNOWN = ""
|
67
|
+
|
68
|
+
# par link of the index json file.
|
69
|
+
PAR_LINK = "https://objectstorage.us-ashburn-1.oraclecloud.com/p/WyjtfVIG0uda-P3-2FmAfwaLlXYQZbvPZmfX1qg0-sbkwEQO6jpwabGr2hMDBmBp/n/ociodscdev/b/service-conda-packs/o/service_pack/index.json"
|
70
|
+
|
71
|
+
random_state = 42
|
72
|
+
test_size = 0.3
|
73
|
+
date_format = "%Y-%m-%d %H:%M:%S"
|
74
|
+
|
75
|
+
# at this time, we only support regression and classification tasks.
|
76
|
+
ml_task_types = Enum(
|
77
|
+
"ml_task_types",
|
78
|
+
"REGRESSION BINARY_CLASSIFICATION MULTI_CLASS_CLASSIFICATION BINARY_TEXT_CLASSIFICATION "
|
79
|
+
"MULTI_CLASS_TEXT_CLASSIFICATION UNSUPPORTED",
|
80
|
+
)
|
81
|
+
|
82
|
+
mpl.rcParams["image.cmap"] = "BuGn"
|
83
|
+
mpl.rcParams["axes.prop_cycle"] = cycler(
|
84
|
+
color=["teal", "blueviolet", "forestgreen", "peru", "y", "dodgerblue", "r"]
|
85
|
+
)
|
86
|
+
|
87
|
+
|
88
|
+
# sqlalchemy engines
|
89
|
+
_engines = {}
|
90
|
+
|
91
|
+
ORACLE_DEFAULT_PORT = 1521
|
92
|
+
MYSQL_DEFAULT_PORT = "3306"
|
93
|
+
|
94
|
+
# Maximum number of columns of data to extract model schema.
|
95
|
+
DATA_SCHEMA_MAX_COL_NUM = 2000
|
96
|
+
|
97
|
+
# dimention of np array which can be converted to pd dataframe
|
98
|
+
DIMENSION = 2
|
99
|
+
|
100
|
+
# declare custom exception class
|
101
|
+
|
102
|
+
# The number of worker processes to use in parallel for uploading individual parts of a multipart upload.
|
103
|
+
DEFAULT_PARALLEL_PROCESS_COUNT = 9
|
104
|
+
|
105
|
+
LOG_LEVELS = ["NOTSET", "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
106
|
+
|
107
|
+
|
108
|
+
class FileOverwriteError(Exception): # pragma: no cover
|
109
|
+
pass
|
110
|
+
|
111
|
+
|
112
|
+
def get_cpu_count():
|
113
|
+
"""
|
114
|
+
Returns the number of CPUs available on this machine
|
115
|
+
"""
|
116
|
+
return os.cpu_count()
|
117
|
+
|
118
|
+
|
119
|
+
@deprecated(
|
120
|
+
"2.5.2", details="This method is being deprecated in favor of `get_cpu_count`"
|
121
|
+
)
|
122
|
+
def get_compute_accelerator_ncores():
|
123
|
+
return get_cpu_count()
|
124
|
+
|
125
|
+
|
126
|
+
@deprecated(
|
127
|
+
"2.5.10",
|
128
|
+
details="Deprecated, use: from ads.common.auth import AuthState;"
|
129
|
+
"oci_config_location=AuthState().oci_config_path; profile=AuthState().oci_key_profile",
|
130
|
+
)
|
131
|
+
def get_oci_config():
|
132
|
+
"""
|
133
|
+
Returns the OCI config location, and the OCI config profile.
|
134
|
+
"""
|
135
|
+
oci_config_location = os.environ.get(
|
136
|
+
"OCI_CONFIG_LOCATION", f"{os.environ['HOME']}/.oci/config"
|
137
|
+
) # os.environ['HOME'] == home/datascience
|
138
|
+
oci_config_profile = os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT")
|
139
|
+
return oci_config_location, oci_config_profile
|
140
|
+
|
141
|
+
|
142
|
+
@deprecated(
|
143
|
+
"2.5.10",
|
144
|
+
details="Deprecated, use: from ads.common.auth import AuthState; os.path.dirname(AuthState().oci_config_path)",
|
145
|
+
)
|
146
|
+
def oci_key_location():
|
147
|
+
"""
|
148
|
+
Returns the OCI key location
|
149
|
+
"""
|
150
|
+
return os.environ.get(
|
151
|
+
"OCI_CONFIG_DIR", os.path.join(os.path.expanduser("~"), ".oci")
|
152
|
+
)
|
153
|
+
|
154
|
+
|
155
|
+
@deprecated(
|
156
|
+
"2.5.10",
|
157
|
+
details="Deprecated, use: from ads.common.auth import AuthState; AuthState().oci_config_path",
|
158
|
+
)
|
159
|
+
def oci_config_file():
|
160
|
+
"""
|
161
|
+
Returns the OCI config file location
|
162
|
+
"""
|
163
|
+
return os.path.join(oci_key_location(), "config")
|
164
|
+
|
165
|
+
|
166
|
+
@deprecated(
|
167
|
+
"2.5.10",
|
168
|
+
details="Deprecated, use: from ads.common.auth import AuthState; AuthState().oci_key_profile",
|
169
|
+
)
|
170
|
+
def oci_config_profile():
|
171
|
+
"""
|
172
|
+
Returns the OCI config profile location.
|
173
|
+
"""
|
174
|
+
return os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT")
|
175
|
+
|
176
|
+
|
177
|
+
def numeric_pandas_dtypes():
|
178
|
+
"""
|
179
|
+
Returns a list of the "numeric" pandas data types
|
180
|
+
"""
|
181
|
+
return ["int16", "int32", "int64", "float16", "float32", "float64"]
|
182
|
+
|
183
|
+
|
184
|
+
@deprecated(
|
185
|
+
"2.5.10",
|
186
|
+
details="Deprecated, use: ads.set_auth(auth='api_key', oci_config_location='~/.oci/config', profile='DEFAULT')",
|
187
|
+
)
|
188
|
+
def set_oci_config(oci_config_location, oci_config_profile):
|
189
|
+
"""
|
190
|
+
:param oci_config_location: location of the config file, for example, ~/.oci/config
|
191
|
+
:param oci_config_profile: The profile to load from the config file. Defaults to "DEFAULT"
|
192
|
+
"""
|
193
|
+
if not os.path.exists(f"{oci_config_location}"):
|
194
|
+
raise ValueError("The oci config file doesn't exist.")
|
195
|
+
os.environ["OCI_CONFIG_LOCATION"] = oci_config_location
|
196
|
+
os.environ["OCI_CONFIG_PROFILE"] = oci_config_profile
|
197
|
+
|
198
|
+
|
199
|
+
def random_valid_ocid(prefix="ocid1.dataflowapplication.oc1.iad"):
|
200
|
+
"""
|
201
|
+
Generates a random valid ocid.
|
202
|
+
|
203
|
+
Parameters
|
204
|
+
----------
|
205
|
+
prefix: `str`
|
206
|
+
A prefix, corresponding to a region location.
|
207
|
+
|
208
|
+
Returns
|
209
|
+
-------
|
210
|
+
ocid: `str`
|
211
|
+
a valid ocid with the given prefix.
|
212
|
+
"""
|
213
|
+
left, right = prefix.rsplit(".", 1)
|
214
|
+
fake = "".join([random.choice(string.ascii_lowercase) for i in range(60)])
|
215
|
+
return f"{left}.{fake}"
|
216
|
+
|
217
|
+
|
218
|
+
def parse_bool(value: Any) -> bool:
|
219
|
+
"""
|
220
|
+
Converts a value to boolean. For strings, it interprets 'true', '1', or 'yes'
|
221
|
+
(case insensitive) as True; everything else as False.
|
222
|
+
|
223
|
+
Parameters
|
224
|
+
----------
|
225
|
+
value : Any
|
226
|
+
The value to convert to boolean.
|
227
|
+
|
228
|
+
Returns
|
229
|
+
-------
|
230
|
+
bool
|
231
|
+
The boolean interpretation of the value.
|
232
|
+
"""
|
233
|
+
if isinstance(value, bool):
|
234
|
+
return value
|
235
|
+
if isinstance(value, str):
|
236
|
+
return value.strip().lower() in ("true", "1", "yes")
|
237
|
+
return bool(value)
|
238
|
+
|
239
|
+
|
240
|
+
def read_file(file_path: str, **kwargs) -> str:
|
241
|
+
try:
|
242
|
+
with fsspec.open(file_path, "r", **kwargs.get("auth", {})) as f:
|
243
|
+
return f.read()
|
244
|
+
except Exception as e:
|
245
|
+
logger.debug(f"Failed to read file {file_path}. {e}")
|
246
|
+
return UNKNOWN
|
247
|
+
|
248
|
+
|
249
|
+
def get_dataframe_styles(max_width=75):
|
250
|
+
"""Styles used for dataframe, example usage:
|
251
|
+
|
252
|
+
df.style\
|
253
|
+
.set_table_styles(utils.get_dataframe_styles())\
|
254
|
+
.set_table_attributes('class=table')\
|
255
|
+
.render())
|
256
|
+
|
257
|
+
Returns
|
258
|
+
-------
|
259
|
+
styles: array
|
260
|
+
A list of dataframe table styler styles.
|
261
|
+
"""
|
262
|
+
|
263
|
+
alt_props = [
|
264
|
+
("background-color", "#F8F8F8"),
|
265
|
+
]
|
266
|
+
|
267
|
+
th_props = [
|
268
|
+
("font-size", "12px"),
|
269
|
+
("text-align", "center"),
|
270
|
+
("font-weight", "bold"),
|
271
|
+
("background-color", "#D3D3D3"),
|
272
|
+
("padding-left", "5px"),
|
273
|
+
("padding-right", "5px"),
|
274
|
+
("text-align", "right"),
|
275
|
+
]
|
276
|
+
|
277
|
+
td_props = [
|
278
|
+
("font-size", "12px"),
|
279
|
+
("text_wrap", False),
|
280
|
+
("white-space", "nowrap"),
|
281
|
+
("overflow", "hidden"),
|
282
|
+
("text-overflow", "ellipsis"),
|
283
|
+
("max-width", f"{max_width}px"),
|
284
|
+
]
|
285
|
+
|
286
|
+
hover_props = [("background-color", "#D9EDFD")]
|
287
|
+
|
288
|
+
styles = [
|
289
|
+
dict(selector="tbody tr:nth-child(even)", props=alt_props),
|
290
|
+
dict(selector="tbody tr:hover", props=hover_props),
|
291
|
+
dict(selector="th", props=th_props),
|
292
|
+
dict(selector="td", props=td_props),
|
293
|
+
]
|
294
|
+
|
295
|
+
return styles
|
296
|
+
|
297
|
+
|
298
|
+
def get_bootstrap_styles():
|
299
|
+
"""
|
300
|
+
Returns HTML bootstrap style information
|
301
|
+
"""
|
302
|
+
return """<style>
|
303
|
+
|
304
|
+
code {
|
305
|
+
padding: 2px 4px;
|
306
|
+
font-size: 90%;
|
307
|
+
color: #c7254e;
|
308
|
+
background-color: #f9f2f4;
|
309
|
+
border-radius: 4px;
|
310
|
+
font-family: Menlo,Monaco,Consolas,"Courier New",monospace;
|
311
|
+
}
|
312
|
+
|
313
|
+
.label {
|
314
|
+
display: inline;
|
315
|
+
padding: .2em .6em .3em;
|
316
|
+
font-weight: 700;
|
317
|
+
line-height: 1;
|
318
|
+
color: #fff;
|
319
|
+
font-size: 85%;
|
320
|
+
text-align: center;
|
321
|
+
white-space: nowrap;
|
322
|
+
vertical-align: baseline;
|
323
|
+
border-radius: .25em;
|
324
|
+
}
|
325
|
+
|
326
|
+
.label-high-cardinality {
|
327
|
+
background-color: #fe7c1b;
|
328
|
+
}
|
329
|
+
|
330
|
+
.label-missing {
|
331
|
+
background-color: #214761;
|
332
|
+
}
|
333
|
+
|
334
|
+
.label-zeros {
|
335
|
+
background-color: #333796;
|
336
|
+
}
|
337
|
+
|
338
|
+
.label-warning {
|
339
|
+
background-color: #e2007e;
|
340
|
+
}
|
341
|
+
|
342
|
+
.label-skew {
|
343
|
+
background-color: #ffdb58;
|
344
|
+
color: black;
|
345
|
+
}
|
346
|
+
|
347
|
+
.label-duplicate-rows {
|
348
|
+
background-color: #d90773;
|
349
|
+
}
|
350
|
+
</style>"""
|
351
|
+
|
352
|
+
|
353
|
+
def highlight_text(text):
|
354
|
+
"""Returns text with html highlights.
|
355
|
+
Parameters
|
356
|
+
----------
|
357
|
+
text: String
|
358
|
+
The text to be highlighted.
|
359
|
+
|
360
|
+
Returns
|
361
|
+
-------
|
362
|
+
ht: String
|
363
|
+
The text with html highlight information.
|
364
|
+
"""
|
365
|
+
return f"""<code style="background:yellow; color:black; padding-top: 5px; padding-bottom: 5px">
|
366
|
+
{text}
|
367
|
+
</code>""".strip()
|
368
|
+
|
369
|
+
|
370
|
+
def horizontal_scrollable_div(html):
|
371
|
+
"""Wrap html with the necessary html to make horizontal scrolling possible.
|
372
|
+
|
373
|
+
Examples
|
374
|
+
________
|
375
|
+
|
376
|
+
display(HTML(utils.horizontal_scrollable_div(my_html)))
|
377
|
+
|
378
|
+
Parameters
|
379
|
+
----------
|
380
|
+
html: str
|
381
|
+
Your HTML to wrap.
|
382
|
+
|
383
|
+
Returns
|
384
|
+
-------
|
385
|
+
type
|
386
|
+
Wrapped HTML.
|
387
|
+
"""
|
388
|
+
|
389
|
+
return f"""
|
390
|
+
<style>
|
391
|
+
.mostly-customized-scrollbar {{
|
392
|
+
display: block;
|
393
|
+
width: 100%;
|
394
|
+
overflow: auto;
|
395
|
+
}}
|
396
|
+
|
397
|
+
.mostly-customized-scrollbar::-webkit-scrollbar {{
|
398
|
+
width: 5px;
|
399
|
+
height: 8px;
|
400
|
+
background-color: #aaa;
|
401
|
+
}}
|
402
|
+
|
403
|
+
.mostly-customized-scrollbar::-webkit-scrollbar-thumb {{
|
404
|
+
background: #000;
|
405
|
+
border-radius: 10px;
|
406
|
+
}}
|
407
|
+
</style>
|
408
|
+
|
409
|
+
<div style="width=100%; display: flex; flex-wrap: nowrap; overflow-x: auto;">
|
410
|
+
<div class="mostly-customized-scrollbar">
|
411
|
+
{html}
|
412
|
+
</div>
|
413
|
+
</div>
|
414
|
+
"""
|
415
|
+
|
416
|
+
|
417
|
+
def is_notebook():
|
418
|
+
"""Returns true if the environment is a jupyter notebook."""
|
419
|
+
try:
|
420
|
+
from IPython import get_ipython
|
421
|
+
|
422
|
+
shell = get_ipython().__class__.__name__
|
423
|
+
if shell == "ZMQInteractiveShell": # pragma: no cover
|
424
|
+
return True # Jupyter notebook or qtconsole
|
425
|
+
elif shell == "TerminalInteractiveShell":
|
426
|
+
return False # Terminal running IPython
|
427
|
+
else:
|
428
|
+
return False # Other type (?)
|
429
|
+
except ModuleNotFoundError or NameError:
|
430
|
+
return False # Probably standard Python interpreter
|
431
|
+
|
432
|
+
|
433
|
+
def is_test(): # pragma: no cover
|
434
|
+
"""
|
435
|
+
Returns true if ADS is in test mode.
|
436
|
+
"""
|
437
|
+
from ads import test_mode
|
438
|
+
|
439
|
+
return test_mode
|
440
|
+
|
441
|
+
|
442
|
+
@deprecated(
|
443
|
+
"2.6.8",
|
444
|
+
details="Deprecated, use: ads.set_auth(auth='resource_principal')",
|
445
|
+
)
|
446
|
+
def is_resource_principal_mode(): # pragma: no cover
|
447
|
+
"""
|
448
|
+
Returns true if ADS is in resource principal mode.
|
449
|
+
"""
|
450
|
+
from ads import resource_principal_mode
|
451
|
+
|
452
|
+
return resource_principal_mode
|
453
|
+
|
454
|
+
|
455
|
+
@deprecated(
|
456
|
+
"2.6.8",
|
457
|
+
details="Deprecated, use: from ads.common.auth import AuthState; AuthState().oci_config_path",
|
458
|
+
)
|
459
|
+
def oci_config_location(): # pragma: no cover
|
460
|
+
"""
|
461
|
+
Returns oci configuration file location.
|
462
|
+
"""
|
463
|
+
from ads.common.auth import AuthState
|
464
|
+
|
465
|
+
return AuthState().oci_config_path
|
466
|
+
|
467
|
+
|
468
|
+
@deprecated(
|
469
|
+
"2.6.8",
|
470
|
+
details="Deprecated, use: from ads.common.auth import AuthState; AuthState().oci_key_profile",
|
471
|
+
)
|
472
|
+
def oci_key_profile(): # pragma: no cover
|
473
|
+
"""
|
474
|
+
Returns key profile value specified in oci configuration file.
|
475
|
+
"""
|
476
|
+
from ads.common.auth import AuthState
|
477
|
+
|
478
|
+
return AuthState().oci_key_profile
|
479
|
+
|
480
|
+
|
481
|
+
def is_documentation_mode(): # pragma: no cover
|
482
|
+
"""
|
483
|
+
Returns true if ADS is in documentation mode.
|
484
|
+
"""
|
485
|
+
from ads import documentation_mode
|
486
|
+
|
487
|
+
return documentation_mode
|
488
|
+
|
489
|
+
|
490
|
+
def is_debug_mode(): # pragma: no cover
|
491
|
+
"""
|
492
|
+
Returns true if ADS is in debug mode.
|
493
|
+
"""
|
494
|
+
from ads import debug_mode
|
495
|
+
|
496
|
+
return debug_mode
|
497
|
+
|
498
|
+
|
499
|
+
@deprecated("2.3.1")
|
500
|
+
@runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
|
501
|
+
def print_user_message(
|
502
|
+
msg, display_type="tip", see_also_links=None, title="Tip"
|
503
|
+
): # pragma: no cover
|
504
|
+
"""This method is deprecated and will be removed in future releases.
|
505
|
+
Prints in html formatted block one of tip|info|warn type.
|
506
|
+
|
507
|
+
Parameters
|
508
|
+
----------
|
509
|
+
msg : str or list
|
510
|
+
The actual message to display.
|
511
|
+
display_type is "module', msg can be a list of [module name, module package name], i.e. ["automl", "ads[ml]"]
|
512
|
+
display_type : str (default 'tip')
|
513
|
+
The type of user message.
|
514
|
+
see_also_links : list of tuples in the form of [('display_name', 'url')]
|
515
|
+
title : str (default 'tip')
|
516
|
+
The title of user message.
|
517
|
+
"""
|
518
|
+
|
519
|
+
if display_type.lower() == "error" and not is_documentation_mode():
|
520
|
+
print("ERROR: {}".format(re.sub("<[^>]*>", "", msg)))
|
521
|
+
|
522
|
+
if display_type.lower() == "module":
|
523
|
+
if isinstance(msg, list()):
|
524
|
+
module_name = msg[0]
|
525
|
+
module_pkg = msg[1]
|
526
|
+
else:
|
527
|
+
module_name, module_pkg = msg, msg
|
528
|
+
print(
|
529
|
+
f"ERROR: {module_name} module not found. Make sure you have installed {module_pkg} in order to download all of necessary modules."
|
530
|
+
)
|
531
|
+
|
532
|
+
if is_documentation_mode() and is_notebook():
|
533
|
+
if display_type.lower() == "tip":
|
534
|
+
if "\n" in msg:
|
535
|
+
t = f"<b>{title.upper().strip()}:</b>" if title else ""
|
536
|
+
|
537
|
+
user_message = "{}{}".format(
|
538
|
+
t,
|
539
|
+
"".join(
|
540
|
+
[
|
541
|
+
f"<br> + {x.strip()}"
|
542
|
+
for x in msg.strip().split("\n")
|
543
|
+
]
|
544
|
+
),
|
545
|
+
)
|
546
|
+
else:
|
547
|
+
user_message = "{}".format(msg.strip().replace("\n", "<br>"))
|
548
|
+
|
549
|
+
from IPython.core.display import HTML, display
|
550
|
+
|
551
|
+
display(
|
552
|
+
HTML(
|
553
|
+
f"""
|
554
|
+
<div style="padding: 7px;
|
555
|
+
border-radius: 4px;
|
556
|
+
background-color: #d4ecd9;
|
557
|
+
margin_bottom: 5px;">
|
558
|
+
<p>{user_message}</p>
|
559
|
+
</div>
|
560
|
+
|
561
|
+
"""
|
562
|
+
)
|
563
|
+
)
|
564
|
+
|
565
|
+
elif display_type.lower() == "warning":
|
566
|
+
user_message = "{}".format(msg.strip().replace("\n", "<br>"))
|
567
|
+
|
568
|
+
display(
|
569
|
+
HTML(
|
570
|
+
f"""
|
571
|
+
<div style="padding: 7px;
|
572
|
+
border-radius: 4px;
|
573
|
+
background-color: #fcc5c5;
|
574
|
+
margin_bottom: 5px;">
|
575
|
+
<h3>Warning:</h3>
|
576
|
+
<p>{user_message}</p>
|
577
|
+
</div>
|
578
|
+
|
579
|
+
"""
|
580
|
+
)
|
581
|
+
)
|
582
|
+
|
583
|
+
elif display_type.lower() == "error":
|
584
|
+
user_message = "{}".format(msg.strip().replace("\n", "<br>"))
|
585
|
+
|
586
|
+
display(
|
587
|
+
HTML(
|
588
|
+
f"""
|
589
|
+
<div style="padding: 7px;
|
590
|
+
border-radius: 4px;
|
591
|
+
background-color: #4f053b;
|
592
|
+
color: white;
|
593
|
+
margin_bottom: 5px;">
|
594
|
+
<h2>Error:</h2>
|
595
|
+
<p>{user_message}</p>
|
596
|
+
</div>
|
597
|
+
|
598
|
+
"""
|
599
|
+
)
|
600
|
+
)
|
601
|
+
|
602
|
+
elif display_type.startswith("info"):
|
603
|
+
user_message = msg.strip().replace("\n", "<br>")
|
604
|
+
|
605
|
+
if see_also_links:
|
606
|
+
see_also_html = f"""
|
607
|
+
<ul>
|
608
|
+
{'<li>'.join([f"<a src='{url}'>{display_name}</a>"
|
609
|
+
for (display_name, url) in see_also_links])}
|
610
|
+
</ul>
|
611
|
+
"""
|
612
|
+
else:
|
613
|
+
see_also_html = ""
|
614
|
+
|
615
|
+
if title:
|
616
|
+
title_html = f"""<div style="padding: 5px;
|
617
|
+
color: #588864;
|
618
|
+
border_bottom: 1px solid grey;
|
619
|
+
margin_bottom: 5px;">
|
620
|
+
<h2>{title.upper()}</h2>
|
621
|
+
</div>"""
|
622
|
+
else:
|
623
|
+
title_html = ""
|
624
|
+
|
625
|
+
display(
|
626
|
+
HTML(
|
627
|
+
f"""
|
628
|
+
<br>
|
629
|
+
|
630
|
+
<div style="width: calc(100% -20px);
|
631
|
+
border-left: 8px solid #588864;
|
632
|
+
margin: 10px, 0, 10px, 0px;
|
633
|
+
padding: 10px">
|
634
|
+
|
635
|
+
{title_html}
|
636
|
+
<p>{user_message}</p>
|
637
|
+
{see_also_html}
|
638
|
+
</div>
|
639
|
+
|
640
|
+
"""
|
641
|
+
)
|
642
|
+
)
|
643
|
+
|
644
|
+
|
645
|
+
# take a series which can be interpreted as a dict, index=key, this
|
646
|
+
# function sorts by the values and takes the top-n values, returning
|
647
|
+
# a new series
|
648
|
+
#
|
649
|
+
def truncate_series_top_n(series, n=24):
|
650
|
+
"""
|
651
|
+
take a series which can be interpreted as a dict, index=key, this
|
652
|
+
function sorts by the values and takes the top-n values, and returns
|
653
|
+
a new series
|
654
|
+
"""
|
655
|
+
return series.sort_values(ascending=False).head(n)
|
656
|
+
|
657
|
+
|
658
|
+
#
|
659
|
+
# take a sequence (<string>, list(<string>), tuple(<string>), pd.Series(<string>) and Ellipsis'ize them at position n
|
660
|
+
#
|
661
|
+
def ellipsis_strings(raw, n=24):
|
662
|
+
"""
|
663
|
+
takes a sequence (<string>, list(<string>), tuple(<string>), pd.Series(<string>) and Ellipsis'ize them at position n
|
664
|
+
"""
|
665
|
+
if isinstance(raw, pd.core.indexes.base.Index):
|
666
|
+
sequence = raw.astype(str).to_list()
|
667
|
+
|
668
|
+
if isinstance(raw, str):
|
669
|
+
return ellipsis_strings([raw], n)[0]
|
670
|
+
|
671
|
+
sequence = list(raw) if not isinstance(raw, list) else raw
|
672
|
+
|
673
|
+
result = []
|
674
|
+
for s in sequence:
|
675
|
+
if len(str(s)) <= n:
|
676
|
+
result.append(s)
|
677
|
+
else:
|
678
|
+
n2 = int(n) // 2 - 3
|
679
|
+
n1 = n - n2 - 3
|
680
|
+
result.append(f"{s[:n1]}...{s[-n2:]}")
|
681
|
+
|
682
|
+
return result
|
683
|
+
|
684
|
+
|
685
|
+
def first_not_none(itr):
|
686
|
+
"""
|
687
|
+
Returns the first non-none result from an iterable,
|
688
|
+
similar to any() but return value not true/false
|
689
|
+
"""
|
690
|
+
for x in itr:
|
691
|
+
if x:
|
692
|
+
return x
|
693
|
+
return None
|
694
|
+
|
695
|
+
|
696
|
+
#
|
697
|
+
# checks to see if object is the same class as cls
|
698
|
+
#
|
699
|
+
def is_same_class(obj, cls):
|
700
|
+
"""
|
701
|
+
checks to see if object is the same class as cls
|
702
|
+
"""
|
703
|
+
if isinstance(cls, (list, tuple)):
|
704
|
+
return any([obj.__class__.__name__ == x.__name__ for x in cls])
|
705
|
+
else:
|
706
|
+
return obj.__class__.__name__ == cls.__name__
|
707
|
+
|
708
|
+
|
709
|
+
def replace_spaces(lst):
|
710
|
+
"""
|
711
|
+
Replace all spaces with underscores for strings in the list.
|
712
|
+
|
713
|
+
Requires that the list contains strings for each element.
|
714
|
+
|
715
|
+
lst: list of strings
|
716
|
+
"""
|
717
|
+
return [s.replace(" ", "_") for s in lst]
|
718
|
+
|
719
|
+
|
720
|
+
def get_progress_bar(
|
721
|
+
max_progress: int, description: str = "Initializing", verbose: bool = False
|
722
|
+
) -> TqdmProgressBar:
|
723
|
+
"""Returns an instance of the TqdmProgressBar class.
|
724
|
+
|
725
|
+
Parameters
|
726
|
+
----------
|
727
|
+
max_progress: int
|
728
|
+
The number of steps for the progressbar.
|
729
|
+
description: (str, optional). Defaults to "Initializing".
|
730
|
+
The first step description.
|
731
|
+
verbose: (bool, optional). Defaults to `False`
|
732
|
+
If the progress should show the debug information.
|
733
|
+
|
734
|
+
Returns
|
735
|
+
-------
|
736
|
+
TqdmProgressBar
|
737
|
+
An instance of the TqdmProgressBar.
|
738
|
+
"""
|
739
|
+
return TqdmProgressBar(
|
740
|
+
max_progress, description=description, verbose=verbose or is_debug_mode()
|
741
|
+
)
|
742
|
+
|
743
|
+
|
744
|
+
class JsonConverter(json.JSONEncoder):
|
745
|
+
def default(self, obj):
|
746
|
+
"""
|
747
|
+
Converts an object to JSON based on its type
|
748
|
+
|
749
|
+
Parameters
|
750
|
+
----------
|
751
|
+
obj: Object
|
752
|
+
An object which is being converted to Json, supported types are pandas Timestamp, series, dataframe, or categorical or numpy ndarrays.
|
753
|
+
|
754
|
+
Returns
|
755
|
+
-------
|
756
|
+
Json: A json repersentation of the object.
|
757
|
+
"""
|
758
|
+
if isinstance(obj, pd.Timestamp):
|
759
|
+
return obj.__str__()
|
760
|
+
if isinstance(obj, pd.Series):
|
761
|
+
return obj.values
|
762
|
+
if isinstance(obj, pd.Categorical):
|
763
|
+
return obj.get_values()
|
764
|
+
if isinstance(obj, pd.DataFrame):
|
765
|
+
return json.loads(obj.to_json())
|
766
|
+
if isinstance(obj, np.ndarray):
|
767
|
+
return obj.tolist()
|
768
|
+
if isinstance(
|
769
|
+
obj,
|
770
|
+
(
|
771
|
+
np.int_,
|
772
|
+
np.intc,
|
773
|
+
np.intp,
|
774
|
+
np.int8,
|
775
|
+
np.int16,
|
776
|
+
np.int32,
|
777
|
+
np.int64,
|
778
|
+
np.uint8,
|
779
|
+
np.uint16,
|
780
|
+
np.uint32,
|
781
|
+
np.uint64,
|
782
|
+
),
|
783
|
+
):
|
784
|
+
return int(obj)
|
785
|
+
elif isinstance(
|
786
|
+
obj, (np.float_, np.float16, np.float32, np.float64, np.double)
|
787
|
+
):
|
788
|
+
return float(obj)
|
789
|
+
elif isinstance(obj, (np.ndarray,)):
|
790
|
+
return obj.tolist()
|
791
|
+
return json.JSONEncoder.default(self, obj)
|
792
|
+
|
793
|
+
|
794
|
+
def split_data(X, y, random_state=random_state, test_size=test_size):
|
795
|
+
"""
|
796
|
+
Splits data using Sklearn based on the input type of the data.
|
797
|
+
|
798
|
+
Parameters
|
799
|
+
----------
|
800
|
+
X: a Pandas Dataframe
|
801
|
+
The data points.
|
802
|
+
y: a Pandas Dataframe
|
803
|
+
The labels.
|
804
|
+
random_state: int
|
805
|
+
A random state for reproducability.
|
806
|
+
test_size: int
|
807
|
+
The number of elements that should be included in the test dataset.
|
808
|
+
"""
|
809
|
+
return train_test_split(
|
810
|
+
X, y, train_size=1 - test_size, test_size=test_size, random_state=random_state
|
811
|
+
)
|
812
|
+
|
813
|
+
|
814
|
+
@runtime_dependency(module="sqlalchemy", install_from=OptionalDependency.DATA)
|
815
|
+
def get_sqlalchemy_engine(connection_url, *args, **kwargs):
|
816
|
+
"""
|
817
|
+
The SqlAlchemny docs say to use a single engine per connection_url, this class will take
|
818
|
+
care of that.
|
819
|
+
|
820
|
+
Parameters
|
821
|
+
----------
|
822
|
+
|
823
|
+
connection_url: string
|
824
|
+
The URL to connect to
|
825
|
+
|
826
|
+
Returns
|
827
|
+
-------
|
828
|
+
engine: SqlAlchemny engine
|
829
|
+
The engine from which SqlAlchemny commands can be ran on
|
830
|
+
"""
|
831
|
+
global _engines
|
832
|
+
if connection_url not in _engines:
|
833
|
+
#
|
834
|
+
# Note: pool_recycle=1 is used here because sqlalchemy is free to drop inactive
|
835
|
+
# connections. This will make sure they are recycled and available when we
|
836
|
+
# need them.
|
837
|
+
#
|
838
|
+
# DAR: note: use echo=True to log engine output
|
839
|
+
_engines[connection_url] = sqlalchemy.create_engine(
|
840
|
+
connection_url, pool_recycle=10, *args, **kwargs
|
841
|
+
)
|
842
|
+
|
843
|
+
return _engines[connection_url]
|
844
|
+
|
845
|
+
|
846
|
+
def inject_and_copy_kwargs(kwargs, **args):
|
847
|
+
"""Takes in a dictionary and returns a copy with the args injected
|
848
|
+
|
849
|
+
Examples
|
850
|
+
________
|
851
|
+
>>> foo(arg1, args, utils.inject_and_copy_kwargs(kwargs, arg3=12, arg4=42))
|
852
|
+
|
853
|
+
Parameters
|
854
|
+
----------
|
855
|
+
kwargs : dict
|
856
|
+
The original `kwargs`.
|
857
|
+
**args : type
|
858
|
+
A series of arguments, foo=42, bar=12 etc
|
859
|
+
|
860
|
+
Returns
|
861
|
+
-------
|
862
|
+
d: dict
|
863
|
+
new dictionary object that you can use in place of kwargs
|
864
|
+
|
865
|
+
"""
|
866
|
+
|
867
|
+
d = kwargs.copy()
|
868
|
+
for k, v in args.items():
|
869
|
+
if k not in kwargs:
|
870
|
+
d[k] = v # inject args iff not already found
|
871
|
+
return d
|
872
|
+
|
873
|
+
|
874
|
+
def flatten(d, parent_key=""):
|
875
|
+
"""
|
876
|
+
Flattens nested dictionaries to a single layer dictionary
|
877
|
+
|
878
|
+
Parameters
|
879
|
+
----------
|
880
|
+
d : dict
|
881
|
+
The dictionary that needs to be flattened
|
882
|
+
parent_key : str
|
883
|
+
Keys in the dictionary that are nested
|
884
|
+
|
885
|
+
Returns
|
886
|
+
-------
|
887
|
+
a_dict: dict
|
888
|
+
a single layer dictionary
|
889
|
+
"""
|
890
|
+
items = []
|
891
|
+
for k, v in d.items():
|
892
|
+
new_key = k if parent_key else k
|
893
|
+
if isinstance(v, collections.abc.MutableMapping):
|
894
|
+
items.extend(flatten(v, new_key).items())
|
895
|
+
else:
|
896
|
+
items.append((new_key, v))
|
897
|
+
|
898
|
+
return dict(items)
|
899
|
+
|
900
|
+
|
901
|
+
def wrap_lines(li, heading=""):
|
902
|
+
"""
|
903
|
+
Wraps the elements of iterable into multi line string of fixed width
|
904
|
+
"""
|
905
|
+
return heading + "\n" + fill(str(list(li)), width=30) if len(li) > 0 else ""
|
906
|
+
|
907
|
+
|
908
|
+
def get_base_modules(model):
|
909
|
+
"""
|
910
|
+
Get the base modules from an ADS model
|
911
|
+
"""
|
912
|
+
add_bases = []
|
913
|
+
if hasattr(model, "est"):
|
914
|
+
add_bases = get_base_modules(model.est)
|
915
|
+
try:
|
916
|
+
if hasattr(model, "steps") and isinstance(model.steps, list):
|
917
|
+
[add_bases.extend(get_base_modules(step)) for _, step in model.steps]
|
918
|
+
except:
|
919
|
+
pass
|
920
|
+
return (
|
921
|
+
add_bases + list(type(model).__mro__)
|
922
|
+
if hasattr(type(model), "__mro__")
|
923
|
+
else add_bases
|
924
|
+
)
|
925
|
+
|
926
|
+
|
927
|
+
def extract_lib_dependencies_from_model(model) -> dict:
|
928
|
+
"""
|
929
|
+
Extract a dictionary of library dependencies for a model
|
930
|
+
|
931
|
+
Parameters
|
932
|
+
----------
|
933
|
+
model
|
934
|
+
|
935
|
+
Returns
|
936
|
+
-------
|
937
|
+
Dict: A dictionary of library dependencies.
|
938
|
+
"""
|
939
|
+
from pkg_resources import get_distribution
|
940
|
+
|
941
|
+
module_versions = {}
|
942
|
+
modules_to_include = set(
|
943
|
+
mod.__module__.split(".")[0]
|
944
|
+
for mod in get_base_modules(model)
|
945
|
+
if hasattr(mod, "__module__")
|
946
|
+
)
|
947
|
+
for mod in modules_to_include:
|
948
|
+
if mod not in module_ignore:
|
949
|
+
try:
|
950
|
+
mod_name = lib_translator.get(mod, mod)
|
951
|
+
module_versions[mod_name] = get_distribution(mod_name).version
|
952
|
+
except:
|
953
|
+
pass
|
954
|
+
return module_versions
|
955
|
+
|
956
|
+
|
957
|
+
def generate_requirement_file(
|
958
|
+
requirements: dict, file_path: str, file_name: str = "requirements.txt"
|
959
|
+
):
|
960
|
+
"""
|
961
|
+
Generate requirements file at file_path.
|
962
|
+
|
963
|
+
Parameters
|
964
|
+
----------
|
965
|
+
requirements : dict
|
966
|
+
Key is the library name and value is the version
|
967
|
+
file_path : str
|
968
|
+
Directory to save requirements.txt
|
969
|
+
file_name : str
|
970
|
+
Opional parameter to specify the file name
|
971
|
+
"""
|
972
|
+
|
973
|
+
with open(os.path.join(file_path, file_name), "w") as req_file:
|
974
|
+
for lib in requirements:
|
975
|
+
if requirements[lib]:
|
976
|
+
req_file.write(f"{lib}=={requirements[lib]}\n")
|
977
|
+
else:
|
978
|
+
req_file.write(f"{lib}\n")
|
979
|
+
|
980
|
+
|
981
|
+
def _get_feature_type_and_dtype(column):
|
982
|
+
feature_type = "unknown"
|
983
|
+
dtype = column.dtype
|
984
|
+
if dtype.name in ["category", "object", "boolean"]:
|
985
|
+
feature_type = "categorical"
|
986
|
+
elif is_numeric_dtype(dtype):
|
987
|
+
feature_type = "continuous"
|
988
|
+
elif is_datetime64_dtype(dtype):
|
989
|
+
feature_type = "datetime"
|
990
|
+
return feature_type, dtype.name
|
991
|
+
|
992
|
+
|
993
|
+
def to_dataframe(
|
994
|
+
data: Union[
|
995
|
+
list,
|
996
|
+
tuple,
|
997
|
+
pd.Series,
|
998
|
+
np.ndarray,
|
999
|
+
pd.DataFrame,
|
1000
|
+
],
|
1001
|
+
):
|
1002
|
+
"""
|
1003
|
+
Convert to pandas DataFrame.
|
1004
|
+
|
1005
|
+
Parameters
|
1006
|
+
----------
|
1007
|
+
data: Union[list, tuple, pd.Series, np.ndarray, pd.DataFrame]
|
1008
|
+
Convert data to pandas DataFrame.
|
1009
|
+
|
1010
|
+
Returns
|
1011
|
+
_______
|
1012
|
+
pd.DataFrame
|
1013
|
+
pandas DataFrame.
|
1014
|
+
|
1015
|
+
"""
|
1016
|
+
if isinstance(data, np.ndarray) and len(data.shape) > DIMENSION:
|
1017
|
+
raise NotImplementedError(
|
1018
|
+
f"Cannot convert a numpy array with size {data.shape} to a pandas DataFrame."
|
1019
|
+
)
|
1020
|
+
if (
|
1021
|
+
isinstance(data, np.ndarray)
|
1022
|
+
or isinstance(data, list)
|
1023
|
+
or isinstance(data, tuple)
|
1024
|
+
):
|
1025
|
+
return pd.DataFrame(data)
|
1026
|
+
elif isinstance(data, pd.Series):
|
1027
|
+
return data.to_frame()
|
1028
|
+
elif isinstance(data, dict):
|
1029
|
+
try:
|
1030
|
+
return pd.DataFrame.from_dict(data)
|
1031
|
+
except:
|
1032
|
+
raise NotImplementedError(
|
1033
|
+
"Cannot convert this dictionary to a pandas DataFrame. \
|
1034
|
+
Check the structure to ensure it is tabular."
|
1035
|
+
)
|
1036
|
+
elif isinstance(data, pd.DataFrame):
|
1037
|
+
return data
|
1038
|
+
elif _is_dask_dataframe(data):
|
1039
|
+
return data.compute()
|
1040
|
+
else:
|
1041
|
+
raise NotImplementedError(
|
1042
|
+
f"The data type `{type(data)}` is not supported. Convert it to a pandas DataFrame."
|
1043
|
+
)
|
1044
|
+
|
1045
|
+
|
1046
|
+
def _is_dask_dataframe(ddf):
|
1047
|
+
"""
|
1048
|
+
Will determine if the given arg is a dask dataframe.
|
1049
|
+
Returns False if dask is not installed.
|
1050
|
+
"""
|
1051
|
+
try:
|
1052
|
+
import dask.dataframe as dd
|
1053
|
+
|
1054
|
+
return isinstance(ddf, dd.DataFrame)
|
1055
|
+
except:
|
1056
|
+
return False
|
1057
|
+
|
1058
|
+
|
1059
|
+
def _is_dask_series(ddf):
|
1060
|
+
"""
|
1061
|
+
Will determine if the given arg is a dask dataframe.
|
1062
|
+
Returns False if dask is not installed.
|
1063
|
+
"""
|
1064
|
+
try:
|
1065
|
+
import dask.dataframe as dd
|
1066
|
+
|
1067
|
+
return isinstance(ddf, dd.Series)
|
1068
|
+
except:
|
1069
|
+
return False
|
1070
|
+
|
1071
|
+
|
1072
|
+
def _log_missing_module(module, package):
|
1073
|
+
"""
|
1074
|
+
Log message for missing module
|
1075
|
+
"""
|
1076
|
+
logger.error(f"The {module} module was not found. Install {package}.")
|
1077
|
+
|
1078
|
+
|
1079
|
+
def _log_multivalue_feature_column_error():
|
1080
|
+
logger.error(
|
1081
|
+
"A feature column has more than one value. Only a single value is allowed."
|
1082
|
+
)
|
1083
|
+
|
1084
|
+
|
1085
|
+
def _log_plot_high_cardinality_warning(s, length):
|
1086
|
+
logger.warning(
|
1087
|
+
f"There are too many distinct values for {s} ({length:,}) to plot. Only the top {MAX_DISPLAY_VALUES}, by cardinality, will be used."
|
1088
|
+
)
|
1089
|
+
|
1090
|
+
|
1091
|
+
def snake_to_camel(name: str, capitalized_first_token: Optional[bool] = False) -> str:
|
1092
|
+
"""Converts the snake case string to the camel representation.
|
1093
|
+
|
1094
|
+
Parameters
|
1095
|
+
----------
|
1096
|
+
name: str
|
1097
|
+
The name to convert.
|
1098
|
+
capitalized_first_token: (bool, optional). Defaults to False.
|
1099
|
+
Wether the first token needs to be capitalized or not.
|
1100
|
+
|
1101
|
+
Returns
|
1102
|
+
-------
|
1103
|
+
str: The name converted to the camel representation.
|
1104
|
+
"""
|
1105
|
+
tokens = name.split("_")
|
1106
|
+
return (tokens[0].capitalize() if capitalized_first_token else tokens[0]) + "".join(
|
1107
|
+
x.capitalize() if not x.isupper() else x for x in tokens[1:]
|
1108
|
+
)
|
1109
|
+
|
1110
|
+
|
1111
|
+
def camel_to_snake(name: str) -> str:
|
1112
|
+
"""Converts the camel case string to the snake representation.
|
1113
|
+
|
1114
|
+
Parameters
|
1115
|
+
----------
|
1116
|
+
name: str
|
1117
|
+
The name to convert.
|
1118
|
+
|
1119
|
+
Returns
|
1120
|
+
-------
|
1121
|
+
str: The name converted to the snake representation.
|
1122
|
+
"""
|
1123
|
+
s = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
|
1124
|
+
return re.sub("([a-z0-9])([A-Z])", r"\1_\2", s).lower()
|
1125
|
+
|
1126
|
+
|
1127
|
+
def is_data_too_wide(
|
1128
|
+
data: Union[
|
1129
|
+
list,
|
1130
|
+
tuple,
|
1131
|
+
pd.Series,
|
1132
|
+
np.ndarray,
|
1133
|
+
pd.DataFrame,
|
1134
|
+
],
|
1135
|
+
max_col_num: int,
|
1136
|
+
) -> bool:
|
1137
|
+
"""
|
1138
|
+
Returns true if the data has too many columns.
|
1139
|
+
|
1140
|
+
Parameters
|
1141
|
+
----------
|
1142
|
+
|
1143
|
+
data: Union[list, tuple, pd.Series, np.ndarray, pd.DataFrame]
|
1144
|
+
A sample of data that will be used to generate schema.
|
1145
|
+
max_col_num : int.
|
1146
|
+
The maximum column size of the data that allows to auto generate schema.
|
1147
|
+
"""
|
1148
|
+
assert (
|
1149
|
+
max_col_num and isinstance(max_col_num, int) and max_col_num > 0
|
1150
|
+
), "The parameter `max_col_num` must be a positive integer."
|
1151
|
+
|
1152
|
+
data_type = type(data)
|
1153
|
+
if data_type == pd.Series or _is_dask_series(data):
|
1154
|
+
return False
|
1155
|
+
elif data_type == pd.DataFrame or _is_dask_dataframe(data):
|
1156
|
+
col_num = len(data.columns)
|
1157
|
+
elif (
|
1158
|
+
# check the column size in model_schema() after converting to pd.dataframe
|
1159
|
+
isinstance(data, np.ndarray)
|
1160
|
+
or isinstance(data, list)
|
1161
|
+
or isinstance(data, tuple)
|
1162
|
+
):
|
1163
|
+
return False
|
1164
|
+
else:
|
1165
|
+
raise TypeError(f"The data type `{type(data)}` is not supported.")
|
1166
|
+
|
1167
|
+
return col_num > max_col_num
|
1168
|
+
|
1169
|
+
|
1170
|
+
def get_files(directory: str, auth: Optional[Dict] = None):
|
1171
|
+
"""List out all the file names under this directory.
|
1172
|
+
|
1173
|
+
Parameters
|
1174
|
+
----------
|
1175
|
+
directory: str
|
1176
|
+
The directory to list out all the files from.
|
1177
|
+
auth: (Dict, optional). Defaults to None.
|
1178
|
+
The default authentication is set using `ads.set_auth` API. If you need to override the
|
1179
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
1180
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
1181
|
+
|
1182
|
+
Returns
|
1183
|
+
-------
|
1184
|
+
List
|
1185
|
+
List of the files in the directory.
|
1186
|
+
"""
|
1187
|
+
directory = directory.rstrip("/")
|
1188
|
+
path_scheme = urlparse(directory).scheme or "file"
|
1189
|
+
storage_options = auth or authutil.default_signer()
|
1190
|
+
model_ignore_path = os.path.join(directory, ".model-ignore")
|
1191
|
+
if is_path_exists(model_ignore_path, auth=auth):
|
1192
|
+
with fsspec.open(model_ignore_path, "r", **storage_options) as f:
|
1193
|
+
ignore_patterns = f.read().strip().split("\n")
|
1194
|
+
else:
|
1195
|
+
ignore_patterns = []
|
1196
|
+
file_names = []
|
1197
|
+
fs = fsspec.filesystem(path_scheme, **storage_options)
|
1198
|
+
for root, dirs, files in fs.walk(directory):
|
1199
|
+
for name in files:
|
1200
|
+
file_names.append(os.path.join(root, name))
|
1201
|
+
for name in dirs:
|
1202
|
+
file_names.append(os.path.join(root, name))
|
1203
|
+
|
1204
|
+
# return all files in remote directory.
|
1205
|
+
if directory.startswith("oci://"):
|
1206
|
+
directory = directory.lstrip("oci://")
|
1207
|
+
|
1208
|
+
for ignore in ignore_patterns:
|
1209
|
+
if not ignore.startswith("#") and ignore.strip() != "":
|
1210
|
+
matches = []
|
1211
|
+
for file_name in file_names:
|
1212
|
+
if ignore.endswith("/"):
|
1213
|
+
ignore = ignore[:-1] + "*"
|
1214
|
+
if not re.search(fnmatch.translate("/%s" % ignore.strip()), file_name):
|
1215
|
+
matches.append(file_name)
|
1216
|
+
file_names = matches
|
1217
|
+
return [matched_file[len(directory) + 1 :] for matched_file in file_names]
|
1218
|
+
|
1219
|
+
|
1220
|
+
def download_from_web(url: str, to_path: str) -> None:
|
1221
|
+
"""Downloads a single file from http/https/ftp.
|
1222
|
+
|
1223
|
+
Parameters
|
1224
|
+
----------
|
1225
|
+
url : str
|
1226
|
+
The URL of the source file.
|
1227
|
+
to_path : path-like object
|
1228
|
+
Local destination path.
|
1229
|
+
|
1230
|
+
Returns
|
1231
|
+
-------
|
1232
|
+
None
|
1233
|
+
Nothing
|
1234
|
+
"""
|
1235
|
+
url_response = request.urlopen(url)
|
1236
|
+
with contextlib.closing(url_response) as fp:
|
1237
|
+
with open(to_path, "wb") as out_file:
|
1238
|
+
block_size = DEFAULT_BUFFER_SIZE * 8
|
1239
|
+
while True:
|
1240
|
+
block = fp.read(block_size)
|
1241
|
+
if not block:
|
1242
|
+
break
|
1243
|
+
out_file.write(block)
|
1244
|
+
|
1245
|
+
|
1246
|
+
def copy_from_uri(
|
1247
|
+
uri: str,
|
1248
|
+
to_path: str,
|
1249
|
+
unpack: Optional[bool] = False,
|
1250
|
+
force_overwrite: Optional[bool] = False,
|
1251
|
+
auth: Optional[Dict] = None,
|
1252
|
+
) -> None:
|
1253
|
+
"""Copies file(s) to local path. Can be a folder, archived folder or a separate file.
|
1254
|
+
The source files can be located in a local folder or in OCI Object Storage.
|
1255
|
+
|
1256
|
+
Parameters
|
1257
|
+
----------
|
1258
|
+
uri: str
|
1259
|
+
The URI of the source file or directory, which can be local path or
|
1260
|
+
OCI object storage URI.
|
1261
|
+
to_path: str
|
1262
|
+
The local destination path.
|
1263
|
+
If this is a directory, the source files will be placed under it.
|
1264
|
+
unpack : (bool, optional). Defaults to False.
|
1265
|
+
Indicate if zip or tar.gz file specified by the uri should be unpacked.
|
1266
|
+
This option has no effect on other files.
|
1267
|
+
force_overwrite: (bool, optional). Defaults to False.
|
1268
|
+
Whether to overwrite existing files or not.
|
1269
|
+
auth: (Dict, optional). Defaults to None.
|
1270
|
+
The default authentication is set using `ads.set_auth` API. If you need to override the
|
1271
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
1272
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
1273
|
+
|
1274
|
+
Returns
|
1275
|
+
-------
|
1276
|
+
None
|
1277
|
+
Nothing
|
1278
|
+
|
1279
|
+
Raises
|
1280
|
+
------
|
1281
|
+
ValueError
|
1282
|
+
If destination path is already exist and `force_overwrite` is set to False.
|
1283
|
+
"""
|
1284
|
+
if os.path.exists(to_path):
|
1285
|
+
if not force_overwrite:
|
1286
|
+
raise ValueError(
|
1287
|
+
"The destination path already exists. "
|
1288
|
+
"Set `force_overwrite` to True if you wish to overwrite."
|
1289
|
+
)
|
1290
|
+
shutil.rmtree(to_path, ignore_errors=True)
|
1291
|
+
|
1292
|
+
scheme = urlparse(uri).scheme
|
1293
|
+
auth = auth or authutil.default_signer()
|
1294
|
+
|
1295
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
1296
|
+
if unpack and str(uri).lower().endswith((".zip", ".tar.gz", ".gztar")):
|
1297
|
+
unpack_path = to_path
|
1298
|
+
to_path = temp_dir
|
1299
|
+
else:
|
1300
|
+
unpack_path = None
|
1301
|
+
|
1302
|
+
fs = fsspec.filesystem(scheme, **auth)
|
1303
|
+
|
1304
|
+
if not (uri.endswith("/") or fs.isdir(uri)) and os.path.isdir(to_path):
|
1305
|
+
to_path = os.path.join(to_path, os.path.basename(str(uri).rstrip("/")))
|
1306
|
+
|
1307
|
+
fs.get(uri, to_path, recursive=True)
|
1308
|
+
|
1309
|
+
if unpack_path:
|
1310
|
+
shutil.unpack_archive(to_path, unpack_path)
|
1311
|
+
|
1312
|
+
|
1313
|
+
def copy_file(
|
1314
|
+
uri_src: str,
|
1315
|
+
uri_dst: str,
|
1316
|
+
force_overwrite: Optional[bool] = False,
|
1317
|
+
auth: Optional[Dict] = None,
|
1318
|
+
chunk_size: Optional[int] = DEFAULT_BUFFER_SIZE,
|
1319
|
+
progressbar_description: Optional[str] = "Copying `{uri_src}` to `{uri_dst}`",
|
1320
|
+
ignore_if_src_not_exists: Optional[bool] = False,
|
1321
|
+
) -> str:
|
1322
|
+
"""
|
1323
|
+
Copies file from `uri_src` to `uri_dst`.
|
1324
|
+
If `uri_dst` specifies a directory, the file will be copied into `uri_dst`
|
1325
|
+
using the base filename from `uri_src`.
|
1326
|
+
Returns the path to the newly created file.
|
1327
|
+
|
1328
|
+
Parameters
|
1329
|
+
----------
|
1330
|
+
uri_src: str
|
1331
|
+
The URI of the source file, which can be local path or OCI object storage URI.
|
1332
|
+
uri_dst: str
|
1333
|
+
The URI of the destination file, which can be local path or OCI object storage URI.
|
1334
|
+
force_overwrite: (bool, optional). Defaults to False.
|
1335
|
+
Whether to overwrite existing files or not.
|
1336
|
+
auth: (Dict, optional). Defaults to None.
|
1337
|
+
The default authentication is set using `ads.set_auth` API. If you need to override the
|
1338
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
1339
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
1340
|
+
chunk_size: (int, optional). Defaults to `DEFAULT_BUFFER_SIZE`
|
1341
|
+
How much data can be copied in one iteration.
|
1342
|
+
progressbar_description: (str, optional). Defaults to `"Copying `{uri_src}` to `{uri_dst}`"`.
|
1343
|
+
Prefix for the progressbar.
|
1344
|
+
|
1345
|
+
Returns
|
1346
|
+
-------
|
1347
|
+
str
|
1348
|
+
The path to the newly created file.
|
1349
|
+
|
1350
|
+
Raises
|
1351
|
+
------
|
1352
|
+
FileExistsError
|
1353
|
+
If a destination file exists and `force_overwrite` set to `False`.
|
1354
|
+
"""
|
1355
|
+
chunk_size = chunk_size or DEFAULT_BUFFER_SIZE
|
1356
|
+
|
1357
|
+
if not os.path.basename(uri_dst):
|
1358
|
+
uri_dst = os.path.join(uri_dst, os.path.basename(uri_src))
|
1359
|
+
src_path_scheme = urlparse(uri_src).scheme or "file"
|
1360
|
+
|
1361
|
+
auth = auth or {}
|
1362
|
+
if src_path_scheme.lower() == "oci" and not auth:
|
1363
|
+
auth = authutil.default_signer()
|
1364
|
+
|
1365
|
+
src_file_system = fsspec.filesystem(src_path_scheme, **auth)
|
1366
|
+
|
1367
|
+
if not fsspec.filesystem(src_path_scheme, **auth).exists(uri_src):
|
1368
|
+
if ignore_if_src_not_exists:
|
1369
|
+
return uri_dst
|
1370
|
+
raise FileNotFoundError(f"The `{uri_src}` not exists.")
|
1371
|
+
|
1372
|
+
file_size = src_file_system.info(uri_src)["size"]
|
1373
|
+
if not force_overwrite:
|
1374
|
+
dst_path_scheme = urlparse(uri_dst).scheme or "file"
|
1375
|
+
if fsspec.filesystem(dst_path_scheme, **auth).exists(uri_dst):
|
1376
|
+
raise FileExistsError(
|
1377
|
+
f"The `{uri_dst}` exists. Please use a new file name or "
|
1378
|
+
"set force_overwrite to True if you wish to overwrite."
|
1379
|
+
)
|
1380
|
+
|
1381
|
+
with fsspec.open(uri_dst, mode="wb", **auth) as fwrite:
|
1382
|
+
with fsspec.open(uri_src, mode="rb", encoding=None, **auth) as fread:
|
1383
|
+
with tqdm.wrapattr(
|
1384
|
+
fread,
|
1385
|
+
"read",
|
1386
|
+
desc=progressbar_description.format(uri_src=uri_src, uri_dst=uri_dst),
|
1387
|
+
total=file_size,
|
1388
|
+
position=0,
|
1389
|
+
leave=False,
|
1390
|
+
colour="blue",
|
1391
|
+
file=sys.stdout,
|
1392
|
+
) as ffrom:
|
1393
|
+
while True:
|
1394
|
+
chunk = ffrom.read(chunk_size)
|
1395
|
+
if not chunk:
|
1396
|
+
break
|
1397
|
+
fwrite.write(chunk)
|
1398
|
+
|
1399
|
+
return uri_dst
|
1400
|
+
|
1401
|
+
|
1402
|
+
def remove_file(file_path: str, auth: Optional[Dict] = None) -> None:
|
1403
|
+
"""
|
1404
|
+
Reoves file.
|
1405
|
+
|
1406
|
+
Parameters
|
1407
|
+
----------
|
1408
|
+
file_path: str
|
1409
|
+
The path of the source file, which can be local path or OCI object storage URI.
|
1410
|
+
auth: (Dict, optional). Defaults to None.
|
1411
|
+
The default authentication is set using `ads.set_auth` API. If you need to override the
|
1412
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
1413
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
1414
|
+
|
1415
|
+
Returns
|
1416
|
+
-------
|
1417
|
+
None
|
1418
|
+
Nothing.
|
1419
|
+
"""
|
1420
|
+
scheme = urlparse(file_path).scheme
|
1421
|
+
auth = auth or (scheme and authutil.default_signer()) or {}
|
1422
|
+
fs = fsspec.filesystem(scheme, **auth)
|
1423
|
+
try:
|
1424
|
+
fs.rm(file_path)
|
1425
|
+
except FileNotFoundError:
|
1426
|
+
raise FileNotFoundError(f"`{file_path}` not found.")
|
1427
|
+
except Exception as e:
|
1428
|
+
raise e
|
1429
|
+
|
1430
|
+
|
1431
|
+
def folder_size(path: str) -> int:
|
1432
|
+
"""Recursively calculating a size of the `path` folder.
|
1433
|
+
|
1434
|
+
Parameters
|
1435
|
+
----------
|
1436
|
+
path: str
|
1437
|
+
Path to the folder.
|
1438
|
+
|
1439
|
+
Returns
|
1440
|
+
-------
|
1441
|
+
int
|
1442
|
+
The size fo the folder in bytes.
|
1443
|
+
"""
|
1444
|
+
if not path:
|
1445
|
+
return 0
|
1446
|
+
|
1447
|
+
if os.path.isfile(path):
|
1448
|
+
return os.path.getsize(path)
|
1449
|
+
|
1450
|
+
path = os.path.join(path.rstrip("/"), "**")
|
1451
|
+
return sum(
|
1452
|
+
os.path.getsize(f) for f in glob.glob(path, recursive=True) if os.path.isfile(f)
|
1453
|
+
)
|
1454
|
+
|
1455
|
+
|
1456
|
+
def human_size(num_bytes: int, precision: Optional[int] = 2) -> str:
|
1457
|
+
"""Converts bytes size to a string representing its value in B, KB, MB and GB.
|
1458
|
+
|
1459
|
+
Parameters
|
1460
|
+
----------
|
1461
|
+
num_bytes: int
|
1462
|
+
The size in bytes.
|
1463
|
+
precision: (int, optional). Defaults to 2.
|
1464
|
+
The precision of converting the bytes value.
|
1465
|
+
|
1466
|
+
Returns
|
1467
|
+
-------
|
1468
|
+
str
|
1469
|
+
A string representing the size in B, KB, MB and GB.
|
1470
|
+
"""
|
1471
|
+
if not num_bytes:
|
1472
|
+
return "0B"
|
1473
|
+
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
|
1474
|
+
size_index = int(math.floor(math.log(num_bytes, 1024)))
|
1475
|
+
result_size = round(num_bytes / math.pow(1024, size_index), precision)
|
1476
|
+
return f"{result_size}{size_name[size_index]}"
|
1477
|
+
|
1478
|
+
|
1479
|
+
def get_value(obj, attr, default=None):
|
1480
|
+
"""Gets a copy of the value from a nested dictionary of an object with nested attributes.
|
1481
|
+
|
1482
|
+
Parameters
|
1483
|
+
----------
|
1484
|
+
obj :
|
1485
|
+
An object or a dictionary
|
1486
|
+
attr :
|
1487
|
+
Attributes as a string seprated by dot(.)
|
1488
|
+
default :
|
1489
|
+
Default value to be returned if attribute is not found.
|
1490
|
+
|
1491
|
+
Returns
|
1492
|
+
-------
|
1493
|
+
Any:
|
1494
|
+
A copy of the attribute value. For dict or list, a deepcopy will be returned.
|
1495
|
+
|
1496
|
+
"""
|
1497
|
+
keys = attr.split(".")
|
1498
|
+
val = default
|
1499
|
+
for key in keys:
|
1500
|
+
if hasattr(obj, key):
|
1501
|
+
val = getattr(obj, key)
|
1502
|
+
elif hasattr(obj, "get"):
|
1503
|
+
val = obj.get(key, default)
|
1504
|
+
else:
|
1505
|
+
return default
|
1506
|
+
obj = val
|
1507
|
+
return copy.deepcopy(val)
|
1508
|
+
|
1509
|
+
|
1510
|
+
def _filter_fn(adjective: str, word: str) -> bool:
|
1511
|
+
"""Used to filter list of adjectives phonetically
|
1512
|
+
|
1513
|
+
Parameters
|
1514
|
+
----------
|
1515
|
+
adjective: str
|
1516
|
+
adjective word
|
1517
|
+
word: str
|
1518
|
+
word to see if should be included in list of alliterations
|
1519
|
+
|
1520
|
+
Returns
|
1521
|
+
-------
|
1522
|
+
bool:
|
1523
|
+
filter or not
|
1524
|
+
"""
|
1525
|
+
if adjective.startswith("f"):
|
1526
|
+
return word.startswith("f") or word.startswith("ph")
|
1527
|
+
elif adjective.startswith("q"):
|
1528
|
+
return word.startswith("q") or word.startswith("k")
|
1529
|
+
else:
|
1530
|
+
return word.startswith(adjective[0])
|
1531
|
+
|
1532
|
+
|
1533
|
+
def get_random_name_for_resource() -> str:
|
1534
|
+
"""Returns randomly generated easy to remember name. It consists from 1 adjective and 1 animal word,
|
1535
|
+
tailed by UTC timestamp (joined with '-'). This is an ADS default resource name generated for
|
1536
|
+
models, jobs, jobruns, model deployments, pipelines.
|
1537
|
+
|
1538
|
+
Returns
|
1539
|
+
-------
|
1540
|
+
str
|
1541
|
+
Randomly generated easy to remember name for oci resources - models, jobs, jobruns, model deployments, pipelines.
|
1542
|
+
Example: polite-panther-2022-08-17-21:15.46; strange-spider-2022-08-17-23:55.02
|
1543
|
+
"""
|
1544
|
+
|
1545
|
+
adjective = random.choice(adjectives)
|
1546
|
+
animal = random.choice(
|
1547
|
+
list(filter(lambda x: _filter_fn(adjective, x), animals)) or animals
|
1548
|
+
)
|
1549
|
+
|
1550
|
+
return "-".join(
|
1551
|
+
(
|
1552
|
+
adjective,
|
1553
|
+
animal,
|
1554
|
+
datetime.utcnow().strftime("%Y-%m-%d-%H:%M.%S"),
|
1555
|
+
)
|
1556
|
+
)
|
1557
|
+
|
1558
|
+
|
1559
|
+
def batch_convert_case(spec: dict, to_fmt: str) -> Dict:
|
1560
|
+
"""
|
1561
|
+
Convert the case of a dictionary of spec from camel to snake or vice versa.
|
1562
|
+
|
1563
|
+
Parameters
|
1564
|
+
----------
|
1565
|
+
spec: dict
|
1566
|
+
dictionary of spec to convert
|
1567
|
+
to_fmt: str
|
1568
|
+
format to convert to, can be "camel" or "snake"
|
1569
|
+
|
1570
|
+
Returns
|
1571
|
+
-------
|
1572
|
+
dict
|
1573
|
+
dictionary of converted spec
|
1574
|
+
"""
|
1575
|
+
if not spec:
|
1576
|
+
return spec
|
1577
|
+
|
1578
|
+
converted = {}
|
1579
|
+
if to_fmt == "camel":
|
1580
|
+
converter = snake_to_camel
|
1581
|
+
else:
|
1582
|
+
converter = camel_to_snake
|
1583
|
+
for k, v in spec.items():
|
1584
|
+
if k == "spec":
|
1585
|
+
converted[converter(k)] = batch_convert_case(v, to_fmt)
|
1586
|
+
else:
|
1587
|
+
converted[converter(k)] = v
|
1588
|
+
return converted
|
1589
|
+
|
1590
|
+
|
1591
|
+
def extract_region(auth: Optional[Dict] = None) -> Union[str, None]:
|
1592
|
+
"""Extracts region information from the environment variables and signer.
|
1593
|
+
|
1594
|
+
Parameters
|
1595
|
+
----------
|
1596
|
+
auth: Dict
|
1597
|
+
The ADS authentication config used to initialize the client.
|
1598
|
+
Contains keys - config, signer and client_kwargs.
|
1599
|
+
|
1600
|
+
Returns
|
1601
|
+
-------
|
1602
|
+
Union[str, None]
|
1603
|
+
The region identifier. For example: `us-ashburn-1`.
|
1604
|
+
Returns `None` if region cannot be extracted.
|
1605
|
+
"""
|
1606
|
+
auth = auth or authutil.default_signer()
|
1607
|
+
|
1608
|
+
if auth.get("config", {}).get("region"):
|
1609
|
+
return auth["config"]["region"]
|
1610
|
+
|
1611
|
+
if (
|
1612
|
+
auth.get("signer")
|
1613
|
+
and hasattr(auth["signer"], "region")
|
1614
|
+
and auth["signer"].region
|
1615
|
+
):
|
1616
|
+
return auth["signer"].region
|
1617
|
+
|
1618
|
+
try:
|
1619
|
+
return json.loads(config.OCI_REGION_METADATA)["regionIdentifier"]
|
1620
|
+
except:
|
1621
|
+
pass
|
1622
|
+
|
1623
|
+
return None
|
1624
|
+
|
1625
|
+
|
1626
|
+
def is_path_exists(uri: str, auth: Optional[Dict] = None) -> bool:
|
1627
|
+
"""Check if the given path which can be local path or OCI object storage URI exists.
|
1628
|
+
|
1629
|
+
Parameters
|
1630
|
+
----------
|
1631
|
+
uri: str
|
1632
|
+
The URI of the target, which can be local path or OCI object storage URI.
|
1633
|
+
auth: (Dict, optional). Defaults to None.
|
1634
|
+
The default authentication is set using `ads.set_auth` API. If you need to override the
|
1635
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
1636
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
1637
|
+
|
1638
|
+
Returns
|
1639
|
+
-------
|
1640
|
+
bool: return True if the path exists.
|
1641
|
+
"""
|
1642
|
+
path_scheme = urlparse(uri).scheme or "file"
|
1643
|
+
storage_options = {}
|
1644
|
+
if path_scheme != "file":
|
1645
|
+
storage_options = auth or authutil.default_signer()
|
1646
|
+
if fsspec.filesystem(path_scheme, **storage_options).exists(uri):
|
1647
|
+
return True
|
1648
|
+
return False
|
1649
|
+
|
1650
|
+
|
1651
|
+
def upload_to_os(
|
1652
|
+
src_uri: str,
|
1653
|
+
dst_uri: str,
|
1654
|
+
auth: dict = None,
|
1655
|
+
parallel_process_count: int = DEFAULT_PARALLEL_PROCESS_COUNT,
|
1656
|
+
progressbar_description: str = "Uploading `{src_uri}` to `{dst_uri}`.",
|
1657
|
+
force_overwrite: bool = False,
|
1658
|
+
):
|
1659
|
+
"""Utilizes `oci.object_storage.Uploadmanager` to upload file to Object Storage.
|
1660
|
+
|
1661
|
+
Parameters
|
1662
|
+
----------
|
1663
|
+
src_uri: str
|
1664
|
+
The path to the file to upload. This should be local path.
|
1665
|
+
dst_uri: str
|
1666
|
+
Object Storage path, eg. `oci://my-bucket@my-tenancy/prefix``.
|
1667
|
+
auth: (Dict, optional) Defaults to None.
|
1668
|
+
default_signer()
|
1669
|
+
parallel_process_count: (int, optional) Defaults to 3.
|
1670
|
+
The number of worker processes to use in parallel for uploading individual
|
1671
|
+
parts of a multipart upload.
|
1672
|
+
progressbar_description: (str, optional) Defaults to `"Uploading `{src_uri}` to `{dst_uri}`"`.
|
1673
|
+
Prefix for the progressbar.
|
1674
|
+
force_overwrite: (bool, optional). Defaults to False.
|
1675
|
+
Whether to overwrite existing files or not.
|
1676
|
+
|
1677
|
+
Returns
|
1678
|
+
-------
|
1679
|
+
Response: oci.response.Response
|
1680
|
+
The response from multipart commit operation or the put operation.
|
1681
|
+
|
1682
|
+
Raise
|
1683
|
+
-----
|
1684
|
+
ValueError
|
1685
|
+
When the given `dst_uri` is not a valid Object Storage path.
|
1686
|
+
FileNotFoundError
|
1687
|
+
When the given `src_uri` does not exist.
|
1688
|
+
RuntimeError
|
1689
|
+
When upload operation fails.
|
1690
|
+
"""
|
1691
|
+
if not os.path.exists(src_uri):
|
1692
|
+
raise FileNotFoundError(f"The give src_uri: {src_uri} does not exist.")
|
1693
|
+
|
1694
|
+
if not ObjectStorageDetails.is_oci_path(
|
1695
|
+
dst_uri
|
1696
|
+
) or not ObjectStorageDetails.is_valid_uri(dst_uri):
|
1697
|
+
raise ValueError(
|
1698
|
+
f"The given dst_uri:{dst_uri} is not a valid Object Storage path."
|
1699
|
+
)
|
1700
|
+
|
1701
|
+
auth = auth or authutil.default_signer()
|
1702
|
+
|
1703
|
+
if not force_overwrite and is_path_exists(dst_uri):
|
1704
|
+
raise FileExistsError(
|
1705
|
+
f"The `{dst_uri}` exists. Please use a new file name or "
|
1706
|
+
"set force_overwrite to True if you wish to overwrite."
|
1707
|
+
)
|
1708
|
+
|
1709
|
+
upload_manager = object_storage.UploadManager(
|
1710
|
+
object_storage_client=OCIClientFactory(**auth).object_storage,
|
1711
|
+
parallel_process_count=parallel_process_count,
|
1712
|
+
allow_multipart_uploads=True,
|
1713
|
+
allow_parallel_uploads=True,
|
1714
|
+
)
|
1715
|
+
|
1716
|
+
file_size = os.path.getsize(src_uri)
|
1717
|
+
with open(src_uri, "rb") as fs:
|
1718
|
+
with tqdm(
|
1719
|
+
total=file_size,
|
1720
|
+
unit="B",
|
1721
|
+
unit_scale=True,
|
1722
|
+
unit_divisor=1024,
|
1723
|
+
position=0,
|
1724
|
+
leave=False,
|
1725
|
+
file=sys.stdout,
|
1726
|
+
desc=progressbar_description,
|
1727
|
+
) as pbar:
|
1728
|
+
|
1729
|
+
def progress_callback(progress):
|
1730
|
+
pbar.update(progress)
|
1731
|
+
|
1732
|
+
bucket_details = ObjectStorageDetails.from_path(dst_uri)
|
1733
|
+
response = upload_manager.upload_stream(
|
1734
|
+
namespace_name=bucket_details.namespace,
|
1735
|
+
bucket_name=bucket_details.bucket,
|
1736
|
+
object_name=bucket_details.filepath,
|
1737
|
+
stream_ref=fs,
|
1738
|
+
progress_callback=progress_callback,
|
1739
|
+
)
|
1740
|
+
|
1741
|
+
if response.status == 200:
|
1742
|
+
print(f"{src_uri} has been successfully uploaded to {dst_uri}.")
|
1743
|
+
else:
|
1744
|
+
raise RuntimeError(
|
1745
|
+
f"Failed to upload {src_uri}. Response code is {response.status}"
|
1746
|
+
)
|
1747
|
+
|
1748
|
+
return response
|
1749
|
+
|
1750
|
+
|
1751
|
+
def get_console_link(
|
1752
|
+
resource: str,
|
1753
|
+
ocid: str,
|
1754
|
+
region: str,
|
1755
|
+
) -> str:
|
1756
|
+
"""
|
1757
|
+
This method returns the web console link for the given resource.
|
1758
|
+
Parameters
|
1759
|
+
----------
|
1760
|
+
resource: str
|
1761
|
+
identify the type of OCI resource. {model, model-deployments, notebook-sessions, jobs} is supported.
|
1762
|
+
ocid: str
|
1763
|
+
OCID of the resource
|
1764
|
+
region: str
|
1765
|
+
The Region Identifier that the client should connect to.
|
1766
|
+
|
1767
|
+
Returns
|
1768
|
+
-------
|
1769
|
+
console_link_url: str
|
1770
|
+
a valid link to the console for the given resource
|
1771
|
+
"""
|
1772
|
+
console_link_url = (
|
1773
|
+
f"https://cloud.oracle.com/data-science/{resource}/{ocid}?region={region}"
|
1774
|
+
)
|
1775
|
+
return console_link_url
|
1776
|
+
|
1777
|
+
|
1778
|
+
def get_log_links(
|
1779
|
+
region: str,
|
1780
|
+
log_group_id: str,
|
1781
|
+
compartment_id: str = None,
|
1782
|
+
log_id: str = None,
|
1783
|
+
source_id: str = None,
|
1784
|
+
) -> str:
|
1785
|
+
"""
|
1786
|
+
This method returns the web console link for the given log ids.
|
1787
|
+
|
1788
|
+
Parameters
|
1789
|
+
----------
|
1790
|
+
log_group_id: str, required
|
1791
|
+
OCID of the resource
|
1792
|
+
log_id: str, optional
|
1793
|
+
OCID of the resource
|
1794
|
+
region: str
|
1795
|
+
The Region Identifier that the client should connect to.
|
1796
|
+
compartment_id: str, optional
|
1797
|
+
The compartment OCID of the resource.
|
1798
|
+
source_id: str, optional
|
1799
|
+
The OCID of the resource.
|
1800
|
+
|
1801
|
+
Returns
|
1802
|
+
-------
|
1803
|
+
console_link_url: str
|
1804
|
+
a valid link to the console for the given resource.
|
1805
|
+
"""
|
1806
|
+
console_link_url = ""
|
1807
|
+
if log_group_id and log_id:
|
1808
|
+
# format: https://cloud.oracle.com/logging/search?searchQuery=search "<compartment>/<log_group>/<log>" | source='<source>' | sort by datetime desc®ions=<region>
|
1809
|
+
query_range = f'''search "{compartment_id}/{log_group_id}/{log_id}"'''
|
1810
|
+
query_source = f"source='{source_id}'"
|
1811
|
+
sort_condition = f"sort by datetime desc®ions={region}"
|
1812
|
+
search_query = (
|
1813
|
+
f"search?searchQuery={query_range} | {query_source} | {sort_condition}"
|
1814
|
+
)
|
1815
|
+
console_link_url = f"https://cloud.oracle.com/logging/{search_query}"
|
1816
|
+
elif log_group_id:
|
1817
|
+
console_link_url = f"https://cloud.oracle.com/logging/log-groups/{log_group_id}?region={region}"
|
1818
|
+
|
1819
|
+
return console_link_url
|
1820
|
+
|
1821
|
+
|
1822
|
+
def parse_content_disposition(header: str) -> Tuple[str, Dict[str, str]]:
|
1823
|
+
"""
|
1824
|
+
Parses a Content-Disposition header into its main disposition and a dictionary of parameters.
|
1825
|
+
|
1826
|
+
For example:
|
1827
|
+
'attachment; filename="example.txt"'
|
1828
|
+
will be parsed into:
|
1829
|
+
('attachment', {'filename': 'example.txt'})
|
1830
|
+
|
1831
|
+
Parameters
|
1832
|
+
----------
|
1833
|
+
header (str): The Content-Disposition header string.
|
1834
|
+
|
1835
|
+
Returns
|
1836
|
+
-------
|
1837
|
+
Tuple[str, Dict[str, str]]: A tuple containing the disposition and a dictionary of parameters.
|
1838
|
+
"""
|
1839
|
+
if not header:
|
1840
|
+
return "", {}
|
1841
|
+
|
1842
|
+
parts = header.split(";")
|
1843
|
+
# The first part is the main disposition (e.g., "attachment").
|
1844
|
+
disposition = parts[0].strip().lower()
|
1845
|
+
params: Dict[str, str] = {}
|
1846
|
+
|
1847
|
+
# Process each subsequent part to extract key-value pairs.
|
1848
|
+
for part in parts[1:]:
|
1849
|
+
if "=" in part:
|
1850
|
+
key, value = part.split("=", 1)
|
1851
|
+
params[key.strip().lower()] = value.strip().strip('"')
|
1852
|
+
return disposition, params
|