oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +40 -0
- ads/aqua/app.py +507 -0
- ads/aqua/cli.py +96 -0
- ads/aqua/client/__init__.py +3 -0
- ads/aqua/client/client.py +836 -0
- ads/aqua/client/openai_client.py +305 -0
- ads/aqua/common/__init__.py +5 -0
- ads/aqua/common/decorator.py +125 -0
- ads/aqua/common/entities.py +274 -0
- ads/aqua/common/enums.py +134 -0
- ads/aqua/common/errors.py +109 -0
- ads/aqua/common/utils.py +1295 -0
- ads/aqua/config/__init__.py +4 -0
- ads/aqua/config/container_config.py +246 -0
- ads/aqua/config/evaluation/__init__.py +4 -0
- ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
- ads/aqua/config/utils/__init__.py +4 -0
- ads/aqua/config/utils/serializer.py +339 -0
- ads/aqua/constants.py +116 -0
- ads/aqua/data.py +14 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation/__init__.py +8 -0
- ads/aqua/evaluation/constants.py +53 -0
- ads/aqua/evaluation/entities.py +186 -0
- ads/aqua/evaluation/errors.py +70 -0
- ads/aqua/evaluation/evaluation.py +1814 -0
- ads/aqua/extension/__init__.py +42 -0
- ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
- ads/aqua/extension/base_handler.py +90 -0
- ads/aqua/extension/common_handler.py +121 -0
- ads/aqua/extension/common_ws_msg_handler.py +36 -0
- ads/aqua/extension/deployment_handler.py +381 -0
- ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
- ads/aqua/extension/errors.py +30 -0
- ads/aqua/extension/evaluation_handler.py +129 -0
- ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
- ads/aqua/extension/finetune_handler.py +96 -0
- ads/aqua/extension/model_handler.py +390 -0
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +145 -0
- ads/aqua/extension/models_ws_msg_handler.py +50 -0
- ads/aqua/extension/ui_handler.py +300 -0
- ads/aqua/extension/ui_websocket_handler.py +130 -0
- ads/aqua/extension/utils.py +133 -0
- ads/aqua/finetuning/__init__.py +7 -0
- ads/aqua/finetuning/constants.py +23 -0
- ads/aqua/finetuning/entities.py +181 -0
- ads/aqua/finetuning/finetuning.py +749 -0
- ads/aqua/model/__init__.py +8 -0
- ads/aqua/model/constants.py +60 -0
- ads/aqua/model/entities.py +385 -0
- ads/aqua/model/enums.py +32 -0
- ads/aqua/model/model.py +2134 -0
- ads/aqua/model/utils.py +52 -0
- ads/aqua/modeldeployment/__init__.py +6 -0
- ads/aqua/modeldeployment/constants.py +10 -0
- ads/aqua/modeldeployment/deployment.py +1315 -0
- ads/aqua/modeldeployment/entities.py +653 -0
- ads/aqua/modeldeployment/utils.py +543 -0
- ads/aqua/resources/gpu_shapes_index.json +94 -0
- ads/aqua/server/__init__.py +4 -0
- ads/aqua/server/__main__.py +24 -0
- ads/aqua/server/app.py +47 -0
- ads/aqua/server/aqua_spec.yml +1291 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +476 -0
- ads/aqua/ui.py +519 -0
- ads/automl/__init__.py +9 -0
- ads/automl/driver.py +330 -0
- ads/automl/provider.py +975 -0
- ads/bds/__init__.py +5 -0
- ads/bds/auth.py +127 -0
- ads/bds/big_data_service.py +255 -0
- ads/catalog/__init__.py +19 -0
- ads/catalog/model.py +1576 -0
- ads/catalog/notebook.py +461 -0
- ads/catalog/project.py +468 -0
- ads/catalog/summary.py +178 -0
- ads/common/__init__.py +11 -0
- ads/common/analyzer.py +65 -0
- ads/common/artifact/.model-ignore +63 -0
- ads/common/artifact/__init__.py +10 -0
- ads/common/auth.py +1122 -0
- ads/common/card_identifier.py +83 -0
- ads/common/config.py +647 -0
- ads/common/data.py +165 -0
- ads/common/decorator/__init__.py +9 -0
- ads/common/decorator/argument_to_case.py +88 -0
- ads/common/decorator/deprecate.py +69 -0
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/decorator/runtime_dependency.py +178 -0
- ads/common/decorator/threaded.py +97 -0
- ads/common/decorator/utils.py +35 -0
- ads/common/dsc_file_system.py +303 -0
- ads/common/error.py +14 -0
- ads/common/extended_enum.py +81 -0
- ads/common/function/__init__.py +5 -0
- ads/common/function/fn_util.py +142 -0
- ads/common/function/func_conf.yaml +25 -0
- ads/common/ipython.py +76 -0
- ads/common/model.py +679 -0
- ads/common/model_artifact.py +1759 -0
- ads/common/model_artifact_schema.json +107 -0
- ads/common/model_export_util.py +664 -0
- ads/common/model_metadata.py +24 -0
- ads/common/object_storage_details.py +296 -0
- ads/common/oci_client.py +179 -0
- ads/common/oci_datascience.py +46 -0
- ads/common/oci_logging.py +1144 -0
- ads/common/oci_mixin.py +957 -0
- ads/common/oci_resource.py +136 -0
- ads/common/serializer.py +559 -0
- ads/common/utils.py +1852 -0
- ads/common/word_lists.py +1491 -0
- ads/common/work_request.py +189 -0
- ads/config.py +1 -0
- ads/data_labeling/__init__.py +13 -0
- ads/data_labeling/boundingbox.py +253 -0
- ads/data_labeling/constants.py +47 -0
- ads/data_labeling/data_labeling_service.py +244 -0
- ads/data_labeling/interface/__init__.py +5 -0
- ads/data_labeling/interface/loader.py +16 -0
- ads/data_labeling/interface/parser.py +16 -0
- ads/data_labeling/interface/reader.py +23 -0
- ads/data_labeling/loader/__init__.py +5 -0
- ads/data_labeling/loader/file_loader.py +241 -0
- ads/data_labeling/metadata.py +110 -0
- ads/data_labeling/mixin/__init__.py +5 -0
- ads/data_labeling/mixin/data_labeling.py +232 -0
- ads/data_labeling/ner.py +129 -0
- ads/data_labeling/parser/__init__.py +5 -0
- ads/data_labeling/parser/dls_record_parser.py +388 -0
- ads/data_labeling/parser/export_metadata_parser.py +94 -0
- ads/data_labeling/parser/export_record_parser.py +473 -0
- ads/data_labeling/reader/__init__.py +5 -0
- ads/data_labeling/reader/dataset_reader.py +574 -0
- ads/data_labeling/reader/dls_record_reader.py +121 -0
- ads/data_labeling/reader/export_record_reader.py +62 -0
- ads/data_labeling/reader/jsonl_reader.py +75 -0
- ads/data_labeling/reader/metadata_reader.py +203 -0
- ads/data_labeling/reader/record_reader.py +263 -0
- ads/data_labeling/record.py +52 -0
- ads/data_labeling/visualizer/__init__.py +5 -0
- ads/data_labeling/visualizer/image_visualizer.py +525 -0
- ads/data_labeling/visualizer/text_visualizer.py +357 -0
- ads/database/__init__.py +5 -0
- ads/database/connection.py +338 -0
- ads/dataset/__init__.py +10 -0
- ads/dataset/capabilities.md +51 -0
- ads/dataset/classification_dataset.py +339 -0
- ads/dataset/correlation.py +226 -0
- ads/dataset/correlation_plot.py +563 -0
- ads/dataset/dask_series.py +173 -0
- ads/dataset/dataframe_transformer.py +110 -0
- ads/dataset/dataset.py +1979 -0
- ads/dataset/dataset_browser.py +360 -0
- ads/dataset/dataset_with_target.py +995 -0
- ads/dataset/exception.py +25 -0
- ads/dataset/factory.py +987 -0
- ads/dataset/feature_engineering_transformer.py +35 -0
- ads/dataset/feature_selection.py +107 -0
- ads/dataset/forecasting_dataset.py +26 -0
- ads/dataset/helper.py +1450 -0
- ads/dataset/label_encoder.py +99 -0
- ads/dataset/mixin/__init__.py +5 -0
- ads/dataset/mixin/dataset_accessor.py +134 -0
- ads/dataset/pipeline.py +58 -0
- ads/dataset/plot.py +710 -0
- ads/dataset/progress.py +86 -0
- ads/dataset/recommendation.py +297 -0
- ads/dataset/recommendation_transformer.py +502 -0
- ads/dataset/regression_dataset.py +14 -0
- ads/dataset/sampled_dataset.py +1050 -0
- ads/dataset/target.py +98 -0
- ads/dataset/timeseries.py +18 -0
- ads/dbmixin/__init__.py +5 -0
- ads/dbmixin/db_pandas_accessor.py +153 -0
- ads/environment/__init__.py +9 -0
- ads/environment/ml_runtime.py +66 -0
- ads/evaluations/README.md +14 -0
- ads/evaluations/__init__.py +109 -0
- ads/evaluations/evaluation_plot.py +983 -0
- ads/evaluations/evaluator.py +1334 -0
- ads/evaluations/statistical_metrics.py +543 -0
- ads/experiments/__init__.py +9 -0
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +21 -0
- ads/explanations/base_explainer.py +142 -0
- ads/explanations/capabilities.md +83 -0
- ads/explanations/explainer.py +190 -0
- ads/explanations/mlx_global_explainer.py +1050 -0
- ads/explanations/mlx_interface.py +386 -0
- ads/explanations/mlx_local_explainer.py +287 -0
- ads/explanations/mlx_whatif_explainer.py +201 -0
- ads/feature_engineering/__init__.py +20 -0
- ads/feature_engineering/accessor/__init__.py +5 -0
- ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
- ads/feature_engineering/accessor/mixin/__init__.py +5 -0
- ads/feature_engineering/accessor/mixin/correlation.py +166 -0
- ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
- ads/feature_engineering/accessor/mixin/utils.py +65 -0
- ads/feature_engineering/accessor/series_accessor.py +431 -0
- ads/feature_engineering/adsimage/__init__.py +5 -0
- ads/feature_engineering/adsimage/image.py +192 -0
- ads/feature_engineering/adsimage/image_reader.py +170 -0
- ads/feature_engineering/adsimage/interface/__init__.py +5 -0
- ads/feature_engineering/adsimage/interface/reader.py +19 -0
- ads/feature_engineering/adsstring/__init__.py +7 -0
- ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
- ads/feature_engineering/adsstring/string/__init__.py +8 -0
- ads/feature_engineering/data_schema.json +57 -0
- ads/feature_engineering/dataset/__init__.py +5 -0
- ads/feature_engineering/dataset/zip_code_data.py +42062 -0
- ads/feature_engineering/exceptions.py +40 -0
- ads/feature_engineering/feature_type/__init__.py +133 -0
- ads/feature_engineering/feature_type/address.py +184 -0
- ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
- ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
- ads/feature_engineering/feature_type/adsstring/string.py +258 -0
- ads/feature_engineering/feature_type/base.py +58 -0
- ads/feature_engineering/feature_type/boolean.py +183 -0
- ads/feature_engineering/feature_type/category.py +146 -0
- ads/feature_engineering/feature_type/constant.py +137 -0
- ads/feature_engineering/feature_type/continuous.py +151 -0
- ads/feature_engineering/feature_type/creditcard.py +314 -0
- ads/feature_engineering/feature_type/datetime.py +190 -0
- ads/feature_engineering/feature_type/discrete.py +134 -0
- ads/feature_engineering/feature_type/document.py +43 -0
- ads/feature_engineering/feature_type/gis.py +251 -0
- ads/feature_engineering/feature_type/handler/__init__.py +5 -0
- ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
- ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
- ads/feature_engineering/feature_type/handler/warnings.py +128 -0
- ads/feature_engineering/feature_type/integer.py +142 -0
- ads/feature_engineering/feature_type/ip_address.py +144 -0
- ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
- ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
- ads/feature_engineering/feature_type/lat_long.py +256 -0
- ads/feature_engineering/feature_type/object.py +43 -0
- ads/feature_engineering/feature_type/ordinal.py +132 -0
- ads/feature_engineering/feature_type/phone_number.py +135 -0
- ads/feature_engineering/feature_type/string.py +171 -0
- ads/feature_engineering/feature_type/text.py +93 -0
- ads/feature_engineering/feature_type/unknown.py +43 -0
- ads/feature_engineering/feature_type/zip_code.py +164 -0
- ads/feature_engineering/feature_type_manager.py +406 -0
- ads/feature_engineering/schema.py +795 -0
- ads/feature_engineering/utils.py +245 -0
- ads/feature_store/.readthedocs.yaml +19 -0
- ads/feature_store/README.md +65 -0
- ads/feature_store/__init__.py +9 -0
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +339 -0
- ads/feature_store/common/exceptions.py +18 -0
- ads/feature_store/common/spark_session_singleton.py +125 -0
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
- ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
- ads/feature_store/common/utils/transformation_utils.py +82 -0
- ads/feature_store/common/utils/utility.py +403 -0
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +129 -0
- ads/feature_store/dataset.py +1230 -0
- ads/feature_store/dataset_job.py +530 -0
- ads/feature_store/docs/Dockerfile +7 -0
- ads/feature_store/docs/Makefile +44 -0
- ads/feature_store/docs/conf.py +28 -0
- ads/feature_store/docs/requirements.txt +14 -0
- ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
- ads/feature_store/docs/source/cicd.rst +137 -0
- ads/feature_store/docs/source/conf.py +86 -0
- ads/feature_store/docs/source/data_versioning.rst +33 -0
- ads/feature_store/docs/source/dataset.rst +388 -0
- ads/feature_store/docs/source/dataset_job.rst +27 -0
- ads/feature_store/docs/source/demo.rst +70 -0
- ads/feature_store/docs/source/entity.rst +78 -0
- ads/feature_store/docs/source/feature_group.rst +624 -0
- ads/feature_store/docs/source/feature_group_job.rst +29 -0
- ads/feature_store/docs/source/feature_store.rst +122 -0
- ads/feature_store/docs/source/feature_store_class.rst +123 -0
- ads/feature_store/docs/source/feature_validation.rst +66 -0
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +81 -0
- ads/feature_store/docs/source/module.rst +8 -0
- ads/feature_store/docs/source/notebook.rst +94 -0
- ads/feature_store/docs/source/overview.rst +47 -0
- ads/feature_store/docs/source/quickstart.rst +176 -0
- ads/feature_store/docs/source/release_notes.rst +194 -0
- ads/feature_store/docs/source/setup_feature_store.rst +81 -0
- ads/feature_store/docs/source/statistics.rst +58 -0
- ads/feature_store/docs/source/transformation.rst +199 -0
- ads/feature_store/docs/source/ui.rst +65 -0
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
- ads/feature_store/entity.py +718 -0
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
- ads/feature_store/execution_strategy/execution_strategy.py +113 -0
- ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
- ads/feature_store/feature.py +192 -0
- ads/feature_store/feature_group.py +1494 -0
- ads/feature_store/feature_group_expectation.py +346 -0
- ads/feature_store/feature_group_job.py +602 -0
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +180 -0
- ads/feature_store/feature_option_details.py +50 -0
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +99 -0
- ads/feature_store/feature_store.py +699 -0
- ads/feature_store/feature_store_registrar.py +518 -0
- ads/feature_store/input_feature_detail.py +149 -0
- ads/feature_store/mixin/__init__.py +4 -0
- ads/feature_store/mixin/oci_feature_store.py +145 -0
- ads/feature_store/model_details.py +73 -0
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +266 -0
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +298 -0
- ads/feature_store/query/join.py +161 -0
- ads/feature_store/query/query.py +403 -0
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +57 -0
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +68 -0
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +139 -0
- ads/feature_store/service/oci_dataset_job.py +199 -0
- ads/feature_store/service/oci_entity.py +125 -0
- ads/feature_store/service/oci_feature_group.py +164 -0
- ads/feature_store/service/oci_feature_group_job.py +214 -0
- ads/feature_store/service/oci_feature_store.py +182 -0
- ads/feature_store/service/oci_lineage.py +87 -0
- ads/feature_store/service/oci_transformation.py +104 -0
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +49 -0
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
- ads/feature_store/statistics/charts/box_plot.py +148 -0
- ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
- ads/feature_store/statistics/charts/probability_distribution.py +68 -0
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
- ads/feature_store/statistics/feature_stat.py +126 -0
- ads/feature_store/statistics/generic_feature_value.py +33 -0
- ads/feature_store/statistics/statistics.py +41 -0
- ads/feature_store/statistics_config.py +101 -0
- ads/feature_store/templates/feature_store_template.yaml +45 -0
- ads/feature_store/transformation.py +499 -0
- ads/feature_store/validation_output.py +57 -0
- ads/hpo/__init__.py +9 -0
- ads/hpo/_imports.py +91 -0
- ads/hpo/ads_search_space.py +439 -0
- ads/hpo/distributions.py +325 -0
- ads/hpo/objective.py +280 -0
- ads/hpo/search_cv.py +1657 -0
- ads/hpo/stopping_criterion.py +75 -0
- ads/hpo/tuner_artifact.py +413 -0
- ads/hpo/utils.py +91 -0
- ads/hpo/validation.py +140 -0
- ads/hpo/visualization/__init__.py +5 -0
- ads/hpo/visualization/_contour.py +23 -0
- ads/hpo/visualization/_edf.py +20 -0
- ads/hpo/visualization/_intermediate_values.py +21 -0
- ads/hpo/visualization/_optimization_history.py +25 -0
- ads/hpo/visualization/_parallel_coordinate.py +169 -0
- ads/hpo/visualization/_param_importances.py +26 -0
- ads/jobs/__init__.py +53 -0
- ads/jobs/ads_job.py +663 -0
- ads/jobs/builders/__init__.py +5 -0
- ads/jobs/builders/base.py +156 -0
- ads/jobs/builders/infrastructure/__init__.py +6 -0
- ads/jobs/builders/infrastructure/base.py +165 -0
- ads/jobs/builders/infrastructure/dataflow.py +1252 -0
- ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
- ads/jobs/builders/infrastructure/utils.py +65 -0
- ads/jobs/builders/runtimes/__init__.py +5 -0
- ads/jobs/builders/runtimes/artifact.py +338 -0
- ads/jobs/builders/runtimes/base.py +325 -0
- ads/jobs/builders/runtimes/container_runtime.py +242 -0
- ads/jobs/builders/runtimes/python_runtime.py +1016 -0
- ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
- ads/jobs/cli.py +104 -0
- ads/jobs/env_var_parser.py +131 -0
- ads/jobs/extension.py +160 -0
- ads/jobs/schema/__init__.py +5 -0
- ads/jobs/schema/infrastructure_schema.json +116 -0
- ads/jobs/schema/job_schema.json +42 -0
- ads/jobs/schema/runtime_schema.json +183 -0
- ads/jobs/schema/validator.py +141 -0
- ads/jobs/serializer.py +296 -0
- ads/jobs/templates/__init__.py +5 -0
- ads/jobs/templates/container.py +6 -0
- ads/jobs/templates/driver_notebook.py +177 -0
- ads/jobs/templates/driver_oci.py +500 -0
- ads/jobs/templates/driver_python.py +48 -0
- ads/jobs/templates/driver_pytorch.py +852 -0
- ads/jobs/templates/driver_utils.py +615 -0
- ads/jobs/templates/hostname_from_env.c +55 -0
- ads/jobs/templates/oci_metrics.py +181 -0
- ads/jobs/utils.py +104 -0
- ads/llm/__init__.py +28 -0
- ads/llm/autogen/__init__.py +2 -0
- ads/llm/autogen/constants.py +15 -0
- ads/llm/autogen/reports/__init__.py +2 -0
- ads/llm/autogen/reports/base.py +67 -0
- ads/llm/autogen/reports/data.py +103 -0
- ads/llm/autogen/reports/session.py +526 -0
- ads/llm/autogen/reports/templates/chat_box.html +13 -0
- ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
- ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
- ads/llm/autogen/reports/utils.py +56 -0
- ads/llm/autogen/v02/__init__.py +4 -0
- ads/llm/autogen/v02/client.py +295 -0
- ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
- ads/llm/autogen/v02/loggers/__init__.py +6 -0
- ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
- ads/llm/autogen/v02/loggers/session_logger.py +580 -0
- ads/llm/autogen/v02/loggers/utils.py +86 -0
- ads/llm/autogen/v02/runtime_logging.py +163 -0
- ads/llm/chain.py +268 -0
- ads/llm/chat_template.py +31 -0
- ads/llm/deploy.py +63 -0
- ads/llm/guardrails/__init__.py +5 -0
- ads/llm/guardrails/base.py +442 -0
- ads/llm/guardrails/huggingface.py +44 -0
- ads/llm/langchain/__init__.py +5 -0
- ads/llm/langchain/plugins/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
- ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
- ads/llm/langchain/plugins/llms/__init__.py +5 -0
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
- ads/llm/requirements.txt +3 -0
- ads/llm/serialize.py +219 -0
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +153 -0
- ads/llm/serializers/runnable_parallel.py +27 -0
- ads/llm/templates/score_chain.jinja2 +155 -0
- ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
- ads/model/__init__.py +52 -0
- ads/model/artifact.py +573 -0
- ads/model/artifact_downloader.py +254 -0
- ads/model/artifact_uploader.py +267 -0
- ads/model/base_properties.py +238 -0
- ads/model/common/.model-ignore +66 -0
- ads/model/common/__init__.py +5 -0
- ads/model/common/utils.py +142 -0
- ads/model/datascience_model.py +2635 -0
- ads/model/deployment/__init__.py +20 -0
- ads/model/deployment/common/__init__.py +5 -0
- ads/model/deployment/common/utils.py +308 -0
- ads/model/deployment/model_deployer.py +466 -0
- ads/model/deployment/model_deployment.py +1846 -0
- ads/model/deployment/model_deployment_infrastructure.py +671 -0
- ads/model/deployment/model_deployment_properties.py +493 -0
- ads/model/deployment/model_deployment_runtime.py +838 -0
- ads/model/extractor/__init__.py +5 -0
- ads/model/extractor/automl_extractor.py +74 -0
- ads/model/extractor/embedding_onnx_extractor.py +80 -0
- ads/model/extractor/huggingface_extractor.py +88 -0
- ads/model/extractor/keras_extractor.py +84 -0
- ads/model/extractor/lightgbm_extractor.py +93 -0
- ads/model/extractor/model_info_extractor.py +114 -0
- ads/model/extractor/model_info_extractor_factory.py +105 -0
- ads/model/extractor/pytorch_extractor.py +87 -0
- ads/model/extractor/sklearn_extractor.py +112 -0
- ads/model/extractor/spark_extractor.py +89 -0
- ads/model/extractor/tensorflow_extractor.py +85 -0
- ads/model/extractor/xgboost_extractor.py +94 -0
- ads/model/framework/__init__.py +5 -0
- ads/model/framework/automl_model.py +178 -0
- ads/model/framework/embedding_onnx_model.py +438 -0
- ads/model/framework/huggingface_model.py +399 -0
- ads/model/framework/lightgbm_model.py +266 -0
- ads/model/framework/pytorch_model.py +266 -0
- ads/model/framework/sklearn_model.py +250 -0
- ads/model/framework/spark_model.py +326 -0
- ads/model/framework/tensorflow_model.py +254 -0
- ads/model/framework/xgboost_model.py +258 -0
- ads/model/generic_model.py +3518 -0
- ads/model/model_artifact_boilerplate/README.md +381 -0
- ads/model/model_artifact_boilerplate/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
- ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
- ads/model/model_artifact_boilerplate/score.py +61 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_introspect.py +331 -0
- ads/model/model_metadata.py +1810 -0
- ads/model/model_metadata_mixin.py +460 -0
- ads/model/model_properties.py +63 -0
- ads/model/model_version_set.py +739 -0
- ads/model/runtime/__init__.py +5 -0
- ads/model/runtime/env_info.py +306 -0
- ads/model/runtime/model_deployment_details.py +37 -0
- ads/model/runtime/model_provenance_details.py +58 -0
- ads/model/runtime/runtime_info.py +81 -0
- ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
- ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
- ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
- ads/model/runtime/utils.py +201 -0
- ads/model/serde/__init__.py +5 -0
- ads/model/serde/common.py +40 -0
- ads/model/serde/model_input.py +547 -0
- ads/model/serde/model_serializer.py +1184 -0
- ads/model/service/__init__.py +5 -0
- ads/model/service/oci_datascience_model.py +1076 -0
- ads/model/service/oci_datascience_model_deployment.py +500 -0
- ads/model/service/oci_datascience_model_version_set.py +176 -0
- ads/model/transformer/__init__.py +5 -0
- ads/model/transformer/onnx_transformer.py +324 -0
- ads/mysqldb/__init__.py +5 -0
- ads/mysqldb/mysql_db.py +227 -0
- ads/opctl/__init__.py +18 -0
- ads/opctl/anomaly_detection.py +11 -0
- ads/opctl/backend/__init__.py +5 -0
- ads/opctl/backend/ads_dataflow.py +353 -0
- ads/opctl/backend/ads_ml_job.py +710 -0
- ads/opctl/backend/ads_ml_pipeline.py +164 -0
- ads/opctl/backend/ads_model_deployment.py +209 -0
- ads/opctl/backend/base.py +146 -0
- ads/opctl/backend/local.py +1053 -0
- ads/opctl/backend/marketplace/__init__.py +9 -0
- ads/opctl/backend/marketplace/helm_helper.py +173 -0
- ads/opctl/backend/marketplace/local_marketplace.py +271 -0
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
- ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
- ads/opctl/backend/marketplace/models/__init__.py +5 -0
- ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
- ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
- ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
- ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
- ads/opctl/cli.py +707 -0
- ads/opctl/cmds.py +869 -0
- ads/opctl/conda/__init__.py +5 -0
- ads/opctl/conda/cli.py +193 -0
- ads/opctl/conda/cmds.py +749 -0
- ads/opctl/conda/config.yaml +34 -0
- ads/opctl/conda/manifest_template.yaml +13 -0
- ads/opctl/conda/multipart_uploader.py +188 -0
- ads/opctl/conda/pack.py +89 -0
- ads/opctl/config/__init__.py +5 -0
- ads/opctl/config/base.py +57 -0
- ads/opctl/config/diagnostics/__init__.py +5 -0
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
- ads/opctl/config/merger.py +255 -0
- ads/opctl/config/resolver.py +297 -0
- ads/opctl/config/utils.py +79 -0
- ads/opctl/config/validator.py +17 -0
- ads/opctl/config/versioner.py +68 -0
- ads/opctl/config/yaml_parsers/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/base.py +58 -0
- ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
- ads/opctl/constants.py +66 -0
- ads/opctl/decorator/__init__.py +5 -0
- ads/opctl/decorator/common.py +129 -0
- ads/opctl/diagnostics/__init__.py +5 -0
- ads/opctl/diagnostics/__main__.py +25 -0
- ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
- ads/opctl/diagnostics/check_requirements.py +144 -0
- ads/opctl/diagnostics/requirement_exception.py +9 -0
- ads/opctl/distributed/README.md +109 -0
- ads/opctl/distributed/__init__.py +5 -0
- ads/opctl/distributed/certificates.py +32 -0
- ads/opctl/distributed/cli.py +207 -0
- ads/opctl/distributed/cmds.py +731 -0
- ads/opctl/distributed/common/__init__.py +5 -0
- ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
- ads/opctl/distributed/common/cluster_config_helper.py +103 -0
- ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
- ads/opctl/distributed/common/cluster_runner.py +54 -0
- ads/opctl/distributed/common/framework_factory.py +29 -0
- ads/opctl/docker/Dockerfile.job +103 -0
- ads/opctl/docker/Dockerfile.job.arm +107 -0
- ads/opctl/docker/Dockerfile.job.gpu +175 -0
- ads/opctl/docker/base-env.yaml +13 -0
- ads/opctl/docker/cuda.repo +6 -0
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +41 -0
- ads/opctl/docker/operator/Dockerfile.gpu +85 -0
- ads/opctl/docker/operator/cuda.repo +6 -0
- ads/opctl/docker/operator/environment.yaml +8 -0
- ads/opctl/forecast.py +11 -0
- ads/opctl/index.yaml +3 -0
- ads/opctl/model/__init__.py +5 -0
- ads/opctl/model/cli.py +65 -0
- ads/opctl/model/cmds.py +73 -0
- ads/opctl/operator/README.md +4 -0
- ads/opctl/operator/__init__.py +31 -0
- ads/opctl/operator/cli.py +344 -0
- ads/opctl/operator/cmd.py +596 -0
- ads/opctl/operator/common/__init__.py +5 -0
- ads/opctl/operator/common/backend_factory.py +460 -0
- ads/opctl/operator/common/const.py +27 -0
- ads/opctl/operator/common/data/synthetic.csv +16001 -0
- ads/opctl/operator/common/dictionary_merger.py +148 -0
- ads/opctl/operator/common/errors.py +42 -0
- ads/opctl/operator/common/operator_config.py +99 -0
- ads/opctl/operator/common/operator_loader.py +811 -0
- ads/opctl/operator/common/operator_schema.yaml +130 -0
- ads/opctl/operator/common/operator_yaml_generator.py +152 -0
- ads/opctl/operator/common/utils.py +208 -0
- ads/opctl/operator/lowcode/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
- ads/opctl/operator/lowcode/anomaly/README.md +207 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +167 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +116 -0
- ads/opctl/operator/lowcode/common/errors.py +47 -0
- ads/opctl/operator/lowcode/common/transformations.py +296 -0
- ads/opctl/operator/lowcode/common/utils.py +384 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
- ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
- ads/opctl/operator/lowcode/forecast/README.md +209 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
- ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
- ads/opctl/operator/lowcode/forecast/const.py +92 -0
- ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
- ads/opctl/operator/lowcode/forecast/errors.py +26 -0
- ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
- ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
- ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
- ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
- ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
- ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
- ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
- ads/opctl/operator/lowcode/forecast/utils.py +397 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
- ads/opctl/operator/lowcode/pii/MLoperator +17 -0
- ads/opctl/operator/lowcode/pii/README.md +208 -0
- ads/opctl/operator/lowcode/pii/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/__main__.py +78 -0
- ads/opctl/operator/lowcode/pii/cmd.py +39 -0
- ads/opctl/operator/lowcode/pii/constant.py +84 -0
- ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
- ads/opctl/operator/lowcode/pii/errors.py +27 -0
- ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
- ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
- ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
- ads/opctl/operator/lowcode/pii/model/report.py +487 -0
- ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
- ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
- ads/opctl/operator/lowcode/pii/utils.py +43 -0
- ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
- ads/opctl/operator/lowcode/recommender/README.md +206 -0
- ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
- ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
- ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
- ads/opctl/operator/lowcode/recommender/constant.py +30 -0
- ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
- ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
- ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
- ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
- ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
- ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
- ads/opctl/operator/lowcode/recommender/utils.py +13 -0
- ads/opctl/operator/runtime/__init__.py +5 -0
- ads/opctl/operator/runtime/const.py +17 -0
- ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
- ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/runtime.py +115 -0
- ads/opctl/schema.yaml.yml +36 -0
- ads/opctl/script.py +40 -0
- ads/opctl/spark/__init__.py +5 -0
- ads/opctl/spark/cli.py +43 -0
- ads/opctl/spark/cmds.py +147 -0
- ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
- ads/opctl/utils.py +344 -0
- ads/oracledb/__init__.py +5 -0
- ads/oracledb/oracle_db.py +346 -0
- ads/pipeline/__init__.py +39 -0
- ads/pipeline/ads_pipeline.py +2279 -0
- ads/pipeline/ads_pipeline_run.py +772 -0
- ads/pipeline/ads_pipeline_step.py +605 -0
- ads/pipeline/builders/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/custom_script.py +32 -0
- ads/pipeline/cli.py +119 -0
- ads/pipeline/extension.py +291 -0
- ads/pipeline/schema/__init__.py +5 -0
- ads/pipeline/schema/cs_step_schema.json +35 -0
- ads/pipeline/schema/ml_step_schema.json +31 -0
- ads/pipeline/schema/pipeline_schema.json +71 -0
- ads/pipeline/visualizer/__init__.py +5 -0
- ads/pipeline/visualizer/base.py +570 -0
- ads/pipeline/visualizer/graph_renderer.py +272 -0
- ads/pipeline/visualizer/text_renderer.py +84 -0
- ads/secrets/__init__.py +11 -0
- ads/secrets/adb.py +386 -0
- ads/secrets/auth_token.py +86 -0
- ads/secrets/big_data_service.py +365 -0
- ads/secrets/mysqldb.py +149 -0
- ads/secrets/oracledb.py +160 -0
- ads/secrets/secrets.py +407 -0
- ads/telemetry/__init__.py +7 -0
- ads/telemetry/base.py +69 -0
- ads/telemetry/client.py +122 -0
- ads/telemetry/telemetry.py +257 -0
- ads/templates/dataflow_pyspark.jinja2 +13 -0
- ads/templates/dataflow_sparksql.jinja2 +22 -0
- ads/templates/func.jinja2 +20 -0
- ads/templates/schemas/openapi.json +1740 -0
- ads/templates/score-pkl.jinja2 +173 -0
- ads/templates/score.jinja2 +322 -0
- ads/templates/score_embedding_onnx.jinja2 +202 -0
- ads/templates/score_generic.jinja2 +165 -0
- ads/templates/score_huggingface_pipeline.jinja2 +217 -0
- ads/templates/score_lightgbm.jinja2 +185 -0
- ads/templates/score_onnx.jinja2 +407 -0
- ads/templates/score_onnx_new.jinja2 +473 -0
- ads/templates/score_oracle_automl.jinja2 +185 -0
- ads/templates/score_pyspark.jinja2 +154 -0
- ads/templates/score_pytorch.jinja2 +219 -0
- ads/templates/score_scikit-learn.jinja2 +184 -0
- ads/templates/score_tensorflow.jinja2 +184 -0
- ads/templates/score_xgboost.jinja2 +178 -0
- ads/text_dataset/__init__.py +5 -0
- ads/text_dataset/backends.py +211 -0
- ads/text_dataset/dataset.py +445 -0
- ads/text_dataset/extractor.py +207 -0
- ads/text_dataset/options.py +53 -0
- ads/text_dataset/udfs.py +22 -0
- ads/text_dataset/utils.py +49 -0
- ads/type_discovery/__init__.py +9 -0
- ads/type_discovery/abstract_detector.py +21 -0
- ads/type_discovery/constant_detector.py +41 -0
- ads/type_discovery/continuous_detector.py +54 -0
- ads/type_discovery/credit_card_detector.py +99 -0
- ads/type_discovery/datetime_detector.py +92 -0
- ads/type_discovery/discrete_detector.py +118 -0
- ads/type_discovery/document_detector.py +146 -0
- ads/type_discovery/ip_detector.py +68 -0
- ads/type_discovery/latlon_detector.py +90 -0
- ads/type_discovery/phone_number_detector.py +63 -0
- ads/type_discovery/type_discovery_driver.py +87 -0
- ads/type_discovery/typed_feature.py +594 -0
- ads/type_discovery/unknown_detector.py +41 -0
- ads/type_discovery/zipcode_detector.py +48 -0
- ads/vault/__init__.py +7 -0
- ads/vault/vault.py +237 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/METADATA +150 -149
- oracle_ads-2.13.10.dist-info/RECORD +858 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/WHEEL +1 -2
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/entry_points.txt +2 -1
- oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,296 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Copyright (c) 2023, 2025 Oracle and/or its affiliates.
|
4
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
|
+
|
6
|
+
from abc import ABC
|
7
|
+
|
8
|
+
import numpy as np
|
9
|
+
import pandas as pd
|
10
|
+
|
11
|
+
from ads.opctl import logger
|
12
|
+
from ads.opctl.operator.lowcode.common.const import DataColumns
|
13
|
+
from ads.opctl.operator.lowcode.common.errors import (
|
14
|
+
DataMismatchError,
|
15
|
+
InvalidParameterError,
|
16
|
+
)
|
17
|
+
from ads.opctl.operator.lowcode.common.utils import merge_category_columns
|
18
|
+
from ads.opctl.operator.lowcode.forecast.operator_config import ForecastOperatorSpec
|
19
|
+
|
20
|
+
|
21
|
+
class Transformations(ABC):
|
22
|
+
"""A class which implements transformation for forecast operator"""
|
23
|
+
|
24
|
+
def __init__(self, dataset_info, name="historical_data"):
|
25
|
+
"""
|
26
|
+
Initializes the transformation.
|
27
|
+
|
28
|
+
Parameters
|
29
|
+
----------
|
30
|
+
data: The Pandas DataFrame.
|
31
|
+
dataset_info : ForecastOperatorConfig
|
32
|
+
"""
|
33
|
+
self.name = name
|
34
|
+
self.dataset_info = dataset_info
|
35
|
+
self.target_category_columns = dataset_info.target_category_columns
|
36
|
+
self.target_column_name = dataset_info.target_column
|
37
|
+
self.raw_column_names = None
|
38
|
+
self.dt_column_name = (
|
39
|
+
dataset_info.datetime_column.name if dataset_info.datetime_column else None
|
40
|
+
)
|
41
|
+
self.dt_column_format = (
|
42
|
+
dataset_info.datetime_column.format
|
43
|
+
if dataset_info.datetime_column
|
44
|
+
else None
|
45
|
+
)
|
46
|
+
self.preprocessing = dataset_info.preprocessing
|
47
|
+
|
48
|
+
def run(self, data):
|
49
|
+
"""
|
50
|
+
The function runs all the transformation in a particular order.
|
51
|
+
|
52
|
+
Returns
|
53
|
+
-------
|
54
|
+
A new Pandas DataFrame with treated / transformed target values. Specifically:
|
55
|
+
- Data will be in a multiIndex with Datetime always first (level 0)
|
56
|
+
- whether 0, 1 or 2+, all target_category_columns will be merged into a single index column: Series
|
57
|
+
- All datetime columns will be formatted as such
|
58
|
+
- all data will be imputed (unless preprocessing disabled)
|
59
|
+
- all trailing whitespace will be removed
|
60
|
+
- the data will be sorted by Datetime then Series
|
61
|
+
|
62
|
+
"""
|
63
|
+
clean_df = self._remove_trailing_whitespace(data)
|
64
|
+
if isinstance(self.dataset_info, ForecastOperatorSpec):
|
65
|
+
clean_df = self._clean_column_names(clean_df)
|
66
|
+
if self.name == "historical_data":
|
67
|
+
self._check_historical_dataset(clean_df)
|
68
|
+
clean_df = self._set_series_id_column(clean_df)
|
69
|
+
if self.dt_column_name:
|
70
|
+
clean_df = self._format_datetime_col(clean_df)
|
71
|
+
clean_df = self._set_multi_index(clean_df)
|
72
|
+
clean_df = self._fill_na(clean_df) if not self.dt_column_name else clean_df
|
73
|
+
|
74
|
+
if self.preprocessing and self.preprocessing.enabled:
|
75
|
+
if self.name == "historical_data":
|
76
|
+
if self.preprocessing.steps.missing_value_imputation:
|
77
|
+
try:
|
78
|
+
clean_df = self._missing_value_imputation_hist(clean_df)
|
79
|
+
except Exception as e:
|
80
|
+
logger.debug(f"Missing value imputation failed with {e.args}")
|
81
|
+
else:
|
82
|
+
logger.info(
|
83
|
+
"Skipping missing value imputation because it is disabled"
|
84
|
+
)
|
85
|
+
if self.preprocessing.steps.outlier_treatment:
|
86
|
+
try:
|
87
|
+
clean_df = self._outlier_treatment(clean_df)
|
88
|
+
except Exception as e:
|
89
|
+
logger.debug(f"Outlier Treatment failed with {e.args}")
|
90
|
+
else:
|
91
|
+
logger.info("Skipping outlier treatment because it is disabled")
|
92
|
+
elif self.name == "additional_data":
|
93
|
+
clean_df = self._missing_value_imputation_add(clean_df)
|
94
|
+
elif self.name == "input_data" and self.preprocessing.steps.missing_value_imputation:
|
95
|
+
clean_df = self._fill_na(clean_df)
|
96
|
+
else:
|
97
|
+
logger.info(
|
98
|
+
"Skipping all preprocessing steps because preprocessing is disabled"
|
99
|
+
)
|
100
|
+
return clean_df
|
101
|
+
|
102
|
+
def _remove_trailing_whitespace(self, df):
|
103
|
+
return df.apply(
|
104
|
+
lambda x: x.str.strip()
|
105
|
+
if hasattr(x, "dtype") and x.dtype == "object"
|
106
|
+
else x
|
107
|
+
)
|
108
|
+
|
109
|
+
def _clean_column_names(self, df):
|
110
|
+
"""
|
111
|
+
Remove all whitespaces from column names in a DataFrame and store the original names.
|
112
|
+
|
113
|
+
Parameters:
|
114
|
+
df (pd.DataFrame): The DataFrame whose column names need to be cleaned.
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
pd.DataFrame: The DataFrame with cleaned column names.
|
118
|
+
"""
|
119
|
+
|
120
|
+
self.raw_column_names = {
|
121
|
+
col: col.replace(" ", "") for col in df.columns if " " in col
|
122
|
+
}
|
123
|
+
df.columns = [self.raw_column_names.get(col, col) for col in df.columns]
|
124
|
+
|
125
|
+
if self.target_column_name:
|
126
|
+
self.target_column_name = self.raw_column_names.get(
|
127
|
+
self.target_column_name, self.target_column_name
|
128
|
+
)
|
129
|
+
self.dt_column_name = self.raw_column_names.get(
|
130
|
+
self.dt_column_name, self.dt_column_name
|
131
|
+
)
|
132
|
+
|
133
|
+
if self.target_category_columns:
|
134
|
+
self.target_category_columns = [
|
135
|
+
self.raw_column_names.get(col, col)
|
136
|
+
for col in self.target_category_columns
|
137
|
+
]
|
138
|
+
return df
|
139
|
+
|
140
|
+
def _set_series_id_column(self, df):
|
141
|
+
self._target_category_columns_map = {}
|
142
|
+
if not self.target_category_columns:
|
143
|
+
df[DataColumns.Series] = "Series 1"
|
144
|
+
else:
|
145
|
+
df[DataColumns.Series] = merge_category_columns(
|
146
|
+
df, self.target_category_columns
|
147
|
+
)
|
148
|
+
merged_values = df[DataColumns.Series].unique().tolist()
|
149
|
+
if self.target_category_columns:
|
150
|
+
for value in merged_values:
|
151
|
+
self._target_category_columns_map[value] = (
|
152
|
+
df[df[DataColumns.Series] == value][
|
153
|
+
self.target_category_columns
|
154
|
+
]
|
155
|
+
.drop_duplicates()
|
156
|
+
.iloc[0]
|
157
|
+
.to_dict()
|
158
|
+
)
|
159
|
+
|
160
|
+
if self.target_category_columns != [DataColumns.Series]:
|
161
|
+
df = df.drop(self.target_category_columns, axis=1)
|
162
|
+
return df
|
163
|
+
|
164
|
+
def _format_datetime_col(self, df):
|
165
|
+
try:
|
166
|
+
df[self.dt_column_name] = pd.to_datetime(
|
167
|
+
df[self.dt_column_name], format=self.dt_column_format
|
168
|
+
)
|
169
|
+
except Exception as ee:
|
170
|
+
raise InvalidParameterError(
|
171
|
+
f"Unable to determine the datetime type for column: {self.dt_column_name} in dataset: {self.name}. Please specify the format explicitly. (For example adding 'format: %d/%m/%Y' underneath 'name: {self.dt_column_name}' in the datetime_column section of the yaml file if you haven't already. For reference, here is the first datetime given: {df[self.dt_column_name].values[0]}"
|
172
|
+
) from ee
|
173
|
+
return df
|
174
|
+
|
175
|
+
def _set_multi_index(self, df):
|
176
|
+
"""
|
177
|
+
Function sorts by date
|
178
|
+
|
179
|
+
Parameters
|
180
|
+
----------
|
181
|
+
df : The Pandas DataFrame.
|
182
|
+
|
183
|
+
Returns
|
184
|
+
-------
|
185
|
+
A new Pandas DataFrame with sorted dates for each series
|
186
|
+
"""
|
187
|
+
if self.dt_column_name:
|
188
|
+
df = df.set_index([self.dt_column_name, DataColumns.Series])
|
189
|
+
return df.sort_values(
|
190
|
+
[self.dt_column_name, DataColumns.Series], ascending=True
|
191
|
+
)
|
192
|
+
return df.set_index([df.index, DataColumns.Series])
|
193
|
+
|
194
|
+
def _missing_value_imputation_hist(self, df):
|
195
|
+
"""
|
196
|
+
Function fills missing values in the pandas dataframe using liner interpolation
|
197
|
+
|
198
|
+
Parameters
|
199
|
+
----------
|
200
|
+
df : The Pandas DataFrame.
|
201
|
+
|
202
|
+
Returns
|
203
|
+
-------
|
204
|
+
A new Pandas DataFrame without missing values.
|
205
|
+
"""
|
206
|
+
# missing value imputation using linear interpolation
|
207
|
+
df[self.target_column_name] = (
|
208
|
+
df[self.target_column_name]
|
209
|
+
.groupby(DataColumns.Series)
|
210
|
+
.transform(lambda x: x.interpolate(limit_direction="both"))
|
211
|
+
)
|
212
|
+
return df
|
213
|
+
|
214
|
+
def _missing_value_imputation_add(self, df):
|
215
|
+
"""
|
216
|
+
Function fills missing values with zero
|
217
|
+
|
218
|
+
Parameters
|
219
|
+
----------
|
220
|
+
df : The Pandas DataFrame.
|
221
|
+
|
222
|
+
Returns
|
223
|
+
-------
|
224
|
+
A new Pandas DataFrame without missing values.
|
225
|
+
"""
|
226
|
+
return df.fillna(0)
|
227
|
+
|
228
|
+
def _outlier_treatment(self, df):
|
229
|
+
"""
|
230
|
+
Function finds outliears using z_score and treats with mean value.
|
231
|
+
|
232
|
+
Parameters
|
233
|
+
----------
|
234
|
+
df : The Pandas DataFrame.
|
235
|
+
|
236
|
+
Returns
|
237
|
+
-------
|
238
|
+
A new Pandas DataFrame with treated outliears.
|
239
|
+
"""
|
240
|
+
return df
|
241
|
+
df["__z_score"] = (
|
242
|
+
df[self.target_column_name]
|
243
|
+
.groupby(DataColumns.Series)
|
244
|
+
.transform(lambda x: (x - x.mean()) / x.std())
|
245
|
+
)
|
246
|
+
outliers_mask = df["__z_score"].abs() > 3
|
247
|
+
|
248
|
+
if df[self.target_column_name].dtype == np.int:
|
249
|
+
df[self.target_column_name].astype(np.float)
|
250
|
+
|
251
|
+
df.loc[outliers_mask, self.target_column_name] = (
|
252
|
+
df[self.target_column_name]
|
253
|
+
.groupby(DataColumns.Series)
|
254
|
+
.transform(lambda x: np.median(x))
|
255
|
+
)
|
256
|
+
df_ret = df.drop("__z_score", axis=1)
|
257
|
+
return df_ret
|
258
|
+
|
259
|
+
def _check_historical_dataset(self, df):
|
260
|
+
expected_names = [self.target_column_name, self.dt_column_name] + (
|
261
|
+
self.target_category_columns if self.target_category_columns else []
|
262
|
+
)
|
263
|
+
|
264
|
+
if self.raw_column_names:
|
265
|
+
expected_names.extend(list(self.raw_column_names.values()))
|
266
|
+
|
267
|
+
if set(df.columns) != set(expected_names):
|
268
|
+
raise DataMismatchError(
|
269
|
+
f"Expected {self.name} to have columns: {expected_names}, but instead found column names: {df.columns}. Is the {self.name} path correct?"
|
270
|
+
)
|
271
|
+
|
272
|
+
"""
|
273
|
+
Map between merged target category column values and target category column and its value
|
274
|
+
If target category columns are PPG_Code, Class, Num
|
275
|
+
Merged target category column values are Product Category 1__A__1, Product Category 2__A__2
|
276
|
+
Then target_category_columns_map would be
|
277
|
+
{
|
278
|
+
"Product Category 1__A__1": {
|
279
|
+
"PPG_Code": "Product Category 1",
|
280
|
+
"Class": "A",
|
281
|
+
"Num": 1
|
282
|
+
},
|
283
|
+
"Product Category 2__A__2": {
|
284
|
+
"PPG_Code": "Product Category 2",
|
285
|
+
"Class": "A",
|
286
|
+
"Num": 2
|
287
|
+
},
|
288
|
+
}
|
289
|
+
"""
|
290
|
+
|
291
|
+
def get_target_category_columns_map(self):
|
292
|
+
return self._target_category_columns_map
|
293
|
+
|
294
|
+
def _fill_na(self, df: pd.DataFrame, na_value=0) -> pd.DataFrame:
|
295
|
+
"""Fill nans in dataframe"""
|
296
|
+
return df.fillna(value=na_value)
|
@@ -0,0 +1,384 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Copyright (c) 2024, 2025 Oracle and/or its affiliates.
|
4
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
5
|
+
|
6
|
+
import json
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
import shutil
|
10
|
+
import sys
|
11
|
+
import tempfile
|
12
|
+
from typing import List, Union
|
13
|
+
|
14
|
+
import cloudpickle
|
15
|
+
import fsspec
|
16
|
+
import oracledb
|
17
|
+
import pandas as pd
|
18
|
+
|
19
|
+
from ads.common.object_storage_details import ObjectStorageDetails
|
20
|
+
from ads.opctl import logger
|
21
|
+
from ads.opctl.operator.common.operator_config import OutputDirectory
|
22
|
+
from ads.opctl.operator.lowcode.common.errors import (
|
23
|
+
InvalidParameterError,
|
24
|
+
)
|
25
|
+
from ads.secrets import ADBSecretKeeper
|
26
|
+
|
27
|
+
|
28
|
+
def call_pandas_fsspec(pd_fn, filename, storage_options, **kwargs):
|
29
|
+
if fsspec.utils.get_protocol(filename) == "file" or fsspec.utils.get_protocol(
|
30
|
+
filename
|
31
|
+
) in ["http", "https"]:
|
32
|
+
return pd_fn(filename, **kwargs)
|
33
|
+
|
34
|
+
storage_options = storage_options or (
|
35
|
+
default_signer() if ObjectStorageDetails.is_oci_path(filename) else {}
|
36
|
+
)
|
37
|
+
|
38
|
+
return pd_fn(filename, storage_options=storage_options, **kwargs)
|
39
|
+
|
40
|
+
|
41
|
+
def load_data(data_spec, storage_options=None, **kwargs):
|
42
|
+
if data_spec is None:
|
43
|
+
raise InvalidParameterError("No details provided for this data source.")
|
44
|
+
filename = data_spec.url
|
45
|
+
data = data_spec.data
|
46
|
+
format = data_spec.format
|
47
|
+
columns = data_spec.columns
|
48
|
+
connect_args = data_spec.connect_args
|
49
|
+
sql = data_spec.sql
|
50
|
+
table_name = data_spec.table_name
|
51
|
+
limit = data_spec.limit
|
52
|
+
vault_secret_id = data_spec.vault_secret_id
|
53
|
+
storage_options = storage_options or (
|
54
|
+
default_signer() if ObjectStorageDetails.is_oci_path(filename) else {}
|
55
|
+
)
|
56
|
+
if vault_secret_id is not None and connect_args is None:
|
57
|
+
connect_args = {}
|
58
|
+
|
59
|
+
if data is not None:
|
60
|
+
if format == "spark":
|
61
|
+
data = data.toPandas()
|
62
|
+
elif filename is not None:
|
63
|
+
if not format:
|
64
|
+
_, format = os.path.splitext(filename)
|
65
|
+
format = format[1:]
|
66
|
+
if format in ["json", "clipboard", "excel", "csv", "feather", "hdf", "parquet"]:
|
67
|
+
read_fn = getattr(pd, f"read_{format}")
|
68
|
+
data = call_pandas_fsspec(
|
69
|
+
read_fn, filename, storage_options=storage_options
|
70
|
+
)
|
71
|
+
elif format in ["tsv"]:
|
72
|
+
data = call_pandas_fsspec(
|
73
|
+
pd.read_csv, filename, storage_options=storage_options, sep="\t"
|
74
|
+
)
|
75
|
+
else:
|
76
|
+
raise InvalidParameterError(
|
77
|
+
f"The format {format} is not currently supported for reading data. Please reformat the data source: {filename} ."
|
78
|
+
)
|
79
|
+
elif connect_args is not None:
|
80
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
81
|
+
if vault_secret_id is not None:
|
82
|
+
try:
|
83
|
+
with ADBSecretKeeper.load_secret(
|
84
|
+
vault_secret_id, wallet_dir=temp_dir
|
85
|
+
) as adwsecret:
|
86
|
+
if (
|
87
|
+
"wallet_location" in adwsecret
|
88
|
+
and "wallet_location" not in connect_args
|
89
|
+
):
|
90
|
+
shutil.unpack_archive(
|
91
|
+
adwsecret["wallet_location"], temp_dir
|
92
|
+
)
|
93
|
+
connect_args["wallet_location"] = temp_dir
|
94
|
+
if "user_name" in adwsecret and "user" not in connect_args:
|
95
|
+
connect_args["user"] = adwsecret["user_name"]
|
96
|
+
if "password" in adwsecret and "password" not in connect_args:
|
97
|
+
connect_args["password"] = adwsecret["password"]
|
98
|
+
if (
|
99
|
+
"service_name" in adwsecret
|
100
|
+
and "service_name" not in connect_args
|
101
|
+
):
|
102
|
+
connect_args["service_name"] = adwsecret["service_name"]
|
103
|
+
|
104
|
+
except Exception as e:
|
105
|
+
raise Exception(
|
106
|
+
f"Could not retrieve database credentials from vault {vault_secret_id}: {e}"
|
107
|
+
) from e
|
108
|
+
|
109
|
+
con = oracledb.connect(**connect_args)
|
110
|
+
if table_name is not None:
|
111
|
+
data = pd.read_sql(f"SELECT * FROM {table_name}", con)
|
112
|
+
elif sql is not None:
|
113
|
+
data = pd.read_sql(sql, con)
|
114
|
+
else:
|
115
|
+
raise InvalidParameterError(
|
116
|
+
"Database `connect_args` provided without sql query or table name. Please specify either `sql` or `table_name`."
|
117
|
+
)
|
118
|
+
else:
|
119
|
+
raise InvalidParameterError(
|
120
|
+
"No filename/url provided, and no connect_args provided. Please specify one of these if you want to read data from a file or a database respectively."
|
121
|
+
)
|
122
|
+
if columns:
|
123
|
+
# keep only these columns, done after load because only CSV supports stream filtering
|
124
|
+
data = data[columns]
|
125
|
+
if limit:
|
126
|
+
data = data[:limit]
|
127
|
+
return data
|
128
|
+
|
129
|
+
|
130
|
+
def _safe_write(fn, **kwargs):
|
131
|
+
try:
|
132
|
+
fn(**kwargs)
|
133
|
+
except Exception:
|
134
|
+
logger.warning(f'Failed to write file {kwargs.get("filename", "UNKNOWN")}')
|
135
|
+
|
136
|
+
|
137
|
+
def write_data(data, filename, format, storage_options=None, index=False, **kwargs):
|
138
|
+
return _safe_write(
|
139
|
+
fn=_write_data,
|
140
|
+
data=data,
|
141
|
+
filename=filename,
|
142
|
+
format=format,
|
143
|
+
storage_options=storage_options,
|
144
|
+
index=index,
|
145
|
+
**kwargs,
|
146
|
+
)
|
147
|
+
|
148
|
+
|
149
|
+
def _write_data(data, filename, format, storage_options=None, index=False, **kwargs):
|
150
|
+
disable_print()
|
151
|
+
if not format:
|
152
|
+
_, format = os.path.splitext(filename)
|
153
|
+
format = format[1:]
|
154
|
+
if format in ["json", "clipboard", "excel", "csv", "feather", "hdf"]:
|
155
|
+
write_fn = getattr(data, f"to_{format}")
|
156
|
+
return call_pandas_fsspec(
|
157
|
+
write_fn, filename, index=index, storage_options=storage_options, **kwargs
|
158
|
+
)
|
159
|
+
enable_print()
|
160
|
+
raise InvalidParameterError(
|
161
|
+
f"The format {format} is not currently supported for writing data. Please change the format parameter for the data output: {filename} ."
|
162
|
+
)
|
163
|
+
|
164
|
+
|
165
|
+
def write_json(json_dict, filename, storage_options=None):
|
166
|
+
return _safe_write(
|
167
|
+
fn=_write_json,
|
168
|
+
json_dict=json_dict,
|
169
|
+
filename=filename,
|
170
|
+
storage_options=storage_options,
|
171
|
+
)
|
172
|
+
|
173
|
+
|
174
|
+
def _write_json(json_dict, filename, storage_options=None):
|
175
|
+
with fsspec.open(filename, mode="w", **storage_options) as f:
|
176
|
+
f.write(json.dumps(json_dict))
|
177
|
+
|
178
|
+
|
179
|
+
def write_simple_json(data, path):
|
180
|
+
return _safe_write(fn=_write_simple_json, data=data, path=path)
|
181
|
+
|
182
|
+
|
183
|
+
def _write_simple_json(data, path):
|
184
|
+
if ObjectStorageDetails.is_oci_path(path):
|
185
|
+
storage_options = default_signer()
|
186
|
+
else:
|
187
|
+
storage_options = {}
|
188
|
+
with fsspec.open(path, mode="w", **storage_options) as f:
|
189
|
+
json.dump(data, f, indent=4)
|
190
|
+
|
191
|
+
|
192
|
+
def write_file(local_filename, remote_filename, storage_options, **kwargs):
|
193
|
+
return _safe_write(
|
194
|
+
fn=_write_file,
|
195
|
+
local_filename=local_filename,
|
196
|
+
remote_filename=remote_filename,
|
197
|
+
storage_options=storage_options,
|
198
|
+
**kwargs,
|
199
|
+
)
|
200
|
+
|
201
|
+
|
202
|
+
def _write_file(local_filename, remote_filename, storage_options, **kwargs):
|
203
|
+
with open(local_filename) as f1:
|
204
|
+
with fsspec.open(
|
205
|
+
remote_filename,
|
206
|
+
"w",
|
207
|
+
**storage_options,
|
208
|
+
) as f2:
|
209
|
+
f2.write(f1.read())
|
210
|
+
|
211
|
+
|
212
|
+
def load_pkl(filepath):
|
213
|
+
return _safe_write(fn=_load_pkl, filepath=filepath)
|
214
|
+
|
215
|
+
|
216
|
+
def _load_pkl(filepath):
|
217
|
+
storage_options = {}
|
218
|
+
if ObjectStorageDetails.is_oci_path(filepath):
|
219
|
+
storage_options = default_signer()
|
220
|
+
|
221
|
+
with fsspec.open(filepath, "rb", **storage_options) as f:
|
222
|
+
return cloudpickle.load(f)
|
223
|
+
return None
|
224
|
+
|
225
|
+
|
226
|
+
def write_pkl(obj, filename, output_dir, storage_options):
|
227
|
+
return _safe_write(
|
228
|
+
fn=_write_pkl,
|
229
|
+
obj=obj,
|
230
|
+
filename=filename,
|
231
|
+
output_dir=output_dir,
|
232
|
+
storage_options=storage_options,
|
233
|
+
)
|
234
|
+
|
235
|
+
|
236
|
+
def _write_pkl(obj, filename, output_dir, storage_options):
|
237
|
+
pkl_path = os.path.join(output_dir, filename)
|
238
|
+
with fsspec.open(
|
239
|
+
pkl_path,
|
240
|
+
"wb",
|
241
|
+
**storage_options,
|
242
|
+
) as f:
|
243
|
+
cloudpickle.dump(obj, f)
|
244
|
+
|
245
|
+
|
246
|
+
def merge_category_columns(data, target_category_columns):
|
247
|
+
result = data.apply(
|
248
|
+
lambda x: "__".join([str(x[col]) for col in target_category_columns]), axis=1
|
249
|
+
)
|
250
|
+
return result if not result.empty else pd.Series([], dtype=str)
|
251
|
+
|
252
|
+
|
253
|
+
def merged_category_column_name(target_category_columns: Union[List, None]):
|
254
|
+
if not target_category_columns or len(target_category_columns) == 0:
|
255
|
+
return None
|
256
|
+
return "__".join([str(x) for x in target_category_columns])
|
257
|
+
|
258
|
+
|
259
|
+
def datetime_to_seconds(s: pd.Series):
|
260
|
+
"""
|
261
|
+
Method converts a datetime column into an integer number of seconds.
|
262
|
+
This method has many uses, most notably for enabling libraries like shap
|
263
|
+
to read datetime columns
|
264
|
+
------------
|
265
|
+
s: pd.Series
|
266
|
+
A Series of type datetime
|
267
|
+
Returns
|
268
|
+
pd.Series of type int
|
269
|
+
"""
|
270
|
+
return s.apply(lambda x: x.timestamp())
|
271
|
+
|
272
|
+
|
273
|
+
def seconds_to_datetime(s: pd.Series, dt_format=None):
|
274
|
+
"""
|
275
|
+
Inverse of `datetime_to_second`
|
276
|
+
------------
|
277
|
+
s: pd.Series
|
278
|
+
A Series of type int
|
279
|
+
Returns
|
280
|
+
pd.Series of type datetime
|
281
|
+
"""
|
282
|
+
s = pd.to_datetime(s, unit="s")
|
283
|
+
if dt_format is not None:
|
284
|
+
return pd.to_datetime(s, format=dt_format)
|
285
|
+
return s
|
286
|
+
|
287
|
+
|
288
|
+
def default_signer(**kwargs):
|
289
|
+
os.environ["EXTRA_USER_AGENT_INFO"] = "Operator"
|
290
|
+
from ads.common.auth import default_signer
|
291
|
+
|
292
|
+
return default_signer(**kwargs)
|
293
|
+
|
294
|
+
|
295
|
+
def get_frequency_in_seconds(s: pd.Series, sample_size=100, ignore_duplicates=True):
|
296
|
+
"""
|
297
|
+
Returns frequency of data in seconds
|
298
|
+
|
299
|
+
Parameters
|
300
|
+
------------
|
301
|
+
dt_col: pd.Series Datetime column
|
302
|
+
ignore_duplicates: bool if True, duplicates will be dropped before computing frequency
|
303
|
+
|
304
|
+
Returns
|
305
|
+
--------
|
306
|
+
int Minimum difference in seconds
|
307
|
+
"""
|
308
|
+
s1 = pd.Series(s).drop_duplicates() if ignore_duplicates else s
|
309
|
+
return s1.tail(20).diff().min().total_seconds()
|
310
|
+
|
311
|
+
|
312
|
+
def get_frequency_of_datetime(dt_col: pd.Series, ignore_duplicates=True):
|
313
|
+
"""
|
314
|
+
Returns string frequency of data
|
315
|
+
|
316
|
+
Parameters
|
317
|
+
------------
|
318
|
+
dt_col: pd.Series Datetime column
|
319
|
+
ignore_duplicates: bool if True, duplicates will be dropped before computing frequency
|
320
|
+
|
321
|
+
Returns
|
322
|
+
--------
|
323
|
+
str Pandas Datetime Frequency
|
324
|
+
"""
|
325
|
+
s = pd.Series(dt_col).drop_duplicates() if ignore_duplicates else dt_col
|
326
|
+
return pd.infer_freq(s)
|
327
|
+
|
328
|
+
|
329
|
+
def human_time_friendly(seconds):
|
330
|
+
TIME_DURATION_UNITS = (
|
331
|
+
("week", 60 * 60 * 24 * 7),
|
332
|
+
("day", 60 * 60 * 24),
|
333
|
+
("hour", 60 * 60),
|
334
|
+
("min", 60),
|
335
|
+
)
|
336
|
+
if seconds == 0:
|
337
|
+
return "inf"
|
338
|
+
accumulator = []
|
339
|
+
for unit, div in TIME_DURATION_UNITS:
|
340
|
+
amount, seconds = divmod(float(seconds), div)
|
341
|
+
if amount > 0:
|
342
|
+
accumulator.append(
|
343
|
+
"{} {}{}".format(int(amount), unit, "" if amount == 1 else "s")
|
344
|
+
)
|
345
|
+
accumulator.append(f"{round(seconds, 2)} secs")
|
346
|
+
return ", ".join(accumulator)
|
347
|
+
|
348
|
+
|
349
|
+
def find_output_dirname(output_dir: OutputDirectory):
|
350
|
+
if output_dir and output_dir.url:
|
351
|
+
return output_dir.url
|
352
|
+
output_dir = "results"
|
353
|
+
|
354
|
+
# If the directory exists, find the next unique directory name by appending an incrementing suffix
|
355
|
+
counter = 1
|
356
|
+
unique_output_dir = f"{output_dir}"
|
357
|
+
while os.path.exists(unique_output_dir):
|
358
|
+
unique_output_dir = f"{output_dir}_{counter}"
|
359
|
+
counter += 1
|
360
|
+
logger.warning(
|
361
|
+
f"Since the output directory was not specified, the output will be saved to {unique_output_dir} directory."
|
362
|
+
)
|
363
|
+
return unique_output_dir
|
364
|
+
|
365
|
+
|
366
|
+
def set_log_level(pkg_name: str, level: int):
|
367
|
+
pkg_logger = logging.getLogger(pkg_name)
|
368
|
+
pkg_logger.addHandler(logging.NullHandler())
|
369
|
+
pkg_logger.propagate = False
|
370
|
+
pkg_logger.setLevel(level)
|
371
|
+
|
372
|
+
|
373
|
+
# Disable
|
374
|
+
def disable_print():
|
375
|
+
sys.stdout = open(os.devnull, "w")
|
376
|
+
|
377
|
+
|
378
|
+
# Restore
|
379
|
+
def enable_print():
|
380
|
+
try:
|
381
|
+
sys.stdout.close()
|
382
|
+
except Exception:
|
383
|
+
pass
|
384
|
+
sys.stdout = sys.__stdout__
|
@@ -0,0 +1,13 @@
|
|
1
|
+
type: feature_store_marketplace
|
2
|
+
version: v1
|
3
|
+
name: Feature store deployment Operator
|
4
|
+
gpu: no
|
5
|
+
keywords:
|
6
|
+
- Helm
|
7
|
+
- marketplace
|
8
|
+
- Feature store
|
9
|
+
backends:
|
10
|
+
- marketplace
|
11
|
+
description: |
|
12
|
+
Feature store deployment operator helps ease the operational burden of deploying feature store helm charts from marketplace
|
13
|
+
to OKE cluster of your choice!!
|