oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +40 -0
- ads/aqua/app.py +507 -0
- ads/aqua/cli.py +96 -0
- ads/aqua/client/__init__.py +3 -0
- ads/aqua/client/client.py +836 -0
- ads/aqua/client/openai_client.py +305 -0
- ads/aqua/common/__init__.py +5 -0
- ads/aqua/common/decorator.py +125 -0
- ads/aqua/common/entities.py +274 -0
- ads/aqua/common/enums.py +134 -0
- ads/aqua/common/errors.py +109 -0
- ads/aqua/common/utils.py +1295 -0
- ads/aqua/config/__init__.py +4 -0
- ads/aqua/config/container_config.py +246 -0
- ads/aqua/config/evaluation/__init__.py +4 -0
- ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
- ads/aqua/config/utils/__init__.py +4 -0
- ads/aqua/config/utils/serializer.py +339 -0
- ads/aqua/constants.py +116 -0
- ads/aqua/data.py +14 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation/__init__.py +8 -0
- ads/aqua/evaluation/constants.py +53 -0
- ads/aqua/evaluation/entities.py +186 -0
- ads/aqua/evaluation/errors.py +70 -0
- ads/aqua/evaluation/evaluation.py +1814 -0
- ads/aqua/extension/__init__.py +42 -0
- ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
- ads/aqua/extension/base_handler.py +90 -0
- ads/aqua/extension/common_handler.py +121 -0
- ads/aqua/extension/common_ws_msg_handler.py +36 -0
- ads/aqua/extension/deployment_handler.py +381 -0
- ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
- ads/aqua/extension/errors.py +30 -0
- ads/aqua/extension/evaluation_handler.py +129 -0
- ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
- ads/aqua/extension/finetune_handler.py +96 -0
- ads/aqua/extension/model_handler.py +390 -0
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +145 -0
- ads/aqua/extension/models_ws_msg_handler.py +50 -0
- ads/aqua/extension/ui_handler.py +300 -0
- ads/aqua/extension/ui_websocket_handler.py +130 -0
- ads/aqua/extension/utils.py +133 -0
- ads/aqua/finetuning/__init__.py +7 -0
- ads/aqua/finetuning/constants.py +23 -0
- ads/aqua/finetuning/entities.py +181 -0
- ads/aqua/finetuning/finetuning.py +749 -0
- ads/aqua/model/__init__.py +8 -0
- ads/aqua/model/constants.py +60 -0
- ads/aqua/model/entities.py +385 -0
- ads/aqua/model/enums.py +32 -0
- ads/aqua/model/model.py +2134 -0
- ads/aqua/model/utils.py +52 -0
- ads/aqua/modeldeployment/__init__.py +6 -0
- ads/aqua/modeldeployment/constants.py +10 -0
- ads/aqua/modeldeployment/deployment.py +1315 -0
- ads/aqua/modeldeployment/entities.py +653 -0
- ads/aqua/modeldeployment/utils.py +543 -0
- ads/aqua/resources/gpu_shapes_index.json +94 -0
- ads/aqua/server/__init__.py +4 -0
- ads/aqua/server/__main__.py +24 -0
- ads/aqua/server/app.py +47 -0
- ads/aqua/server/aqua_spec.yml +1291 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +476 -0
- ads/aqua/ui.py +519 -0
- ads/automl/__init__.py +9 -0
- ads/automl/driver.py +330 -0
- ads/automl/provider.py +975 -0
- ads/bds/__init__.py +5 -0
- ads/bds/auth.py +127 -0
- ads/bds/big_data_service.py +255 -0
- ads/catalog/__init__.py +19 -0
- ads/catalog/model.py +1576 -0
- ads/catalog/notebook.py +461 -0
- ads/catalog/project.py +468 -0
- ads/catalog/summary.py +178 -0
- ads/common/__init__.py +11 -0
- ads/common/analyzer.py +65 -0
- ads/common/artifact/.model-ignore +63 -0
- ads/common/artifact/__init__.py +10 -0
- ads/common/auth.py +1122 -0
- ads/common/card_identifier.py +83 -0
- ads/common/config.py +647 -0
- ads/common/data.py +165 -0
- ads/common/decorator/__init__.py +9 -0
- ads/common/decorator/argument_to_case.py +88 -0
- ads/common/decorator/deprecate.py +69 -0
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/decorator/runtime_dependency.py +178 -0
- ads/common/decorator/threaded.py +97 -0
- ads/common/decorator/utils.py +35 -0
- ads/common/dsc_file_system.py +303 -0
- ads/common/error.py +14 -0
- ads/common/extended_enum.py +81 -0
- ads/common/function/__init__.py +5 -0
- ads/common/function/fn_util.py +142 -0
- ads/common/function/func_conf.yaml +25 -0
- ads/common/ipython.py +76 -0
- ads/common/model.py +679 -0
- ads/common/model_artifact.py +1759 -0
- ads/common/model_artifact_schema.json +107 -0
- ads/common/model_export_util.py +664 -0
- ads/common/model_metadata.py +24 -0
- ads/common/object_storage_details.py +296 -0
- ads/common/oci_client.py +179 -0
- ads/common/oci_datascience.py +46 -0
- ads/common/oci_logging.py +1144 -0
- ads/common/oci_mixin.py +957 -0
- ads/common/oci_resource.py +136 -0
- ads/common/serializer.py +559 -0
- ads/common/utils.py +1852 -0
- ads/common/word_lists.py +1491 -0
- ads/common/work_request.py +189 -0
- ads/config.py +1 -0
- ads/data_labeling/__init__.py +13 -0
- ads/data_labeling/boundingbox.py +253 -0
- ads/data_labeling/constants.py +47 -0
- ads/data_labeling/data_labeling_service.py +244 -0
- ads/data_labeling/interface/__init__.py +5 -0
- ads/data_labeling/interface/loader.py +16 -0
- ads/data_labeling/interface/parser.py +16 -0
- ads/data_labeling/interface/reader.py +23 -0
- ads/data_labeling/loader/__init__.py +5 -0
- ads/data_labeling/loader/file_loader.py +241 -0
- ads/data_labeling/metadata.py +110 -0
- ads/data_labeling/mixin/__init__.py +5 -0
- ads/data_labeling/mixin/data_labeling.py +232 -0
- ads/data_labeling/ner.py +129 -0
- ads/data_labeling/parser/__init__.py +5 -0
- ads/data_labeling/parser/dls_record_parser.py +388 -0
- ads/data_labeling/parser/export_metadata_parser.py +94 -0
- ads/data_labeling/parser/export_record_parser.py +473 -0
- ads/data_labeling/reader/__init__.py +5 -0
- ads/data_labeling/reader/dataset_reader.py +574 -0
- ads/data_labeling/reader/dls_record_reader.py +121 -0
- ads/data_labeling/reader/export_record_reader.py +62 -0
- ads/data_labeling/reader/jsonl_reader.py +75 -0
- ads/data_labeling/reader/metadata_reader.py +203 -0
- ads/data_labeling/reader/record_reader.py +263 -0
- ads/data_labeling/record.py +52 -0
- ads/data_labeling/visualizer/__init__.py +5 -0
- ads/data_labeling/visualizer/image_visualizer.py +525 -0
- ads/data_labeling/visualizer/text_visualizer.py +357 -0
- ads/database/__init__.py +5 -0
- ads/database/connection.py +338 -0
- ads/dataset/__init__.py +10 -0
- ads/dataset/capabilities.md +51 -0
- ads/dataset/classification_dataset.py +339 -0
- ads/dataset/correlation.py +226 -0
- ads/dataset/correlation_plot.py +563 -0
- ads/dataset/dask_series.py +173 -0
- ads/dataset/dataframe_transformer.py +110 -0
- ads/dataset/dataset.py +1979 -0
- ads/dataset/dataset_browser.py +360 -0
- ads/dataset/dataset_with_target.py +995 -0
- ads/dataset/exception.py +25 -0
- ads/dataset/factory.py +987 -0
- ads/dataset/feature_engineering_transformer.py +35 -0
- ads/dataset/feature_selection.py +107 -0
- ads/dataset/forecasting_dataset.py +26 -0
- ads/dataset/helper.py +1450 -0
- ads/dataset/label_encoder.py +99 -0
- ads/dataset/mixin/__init__.py +5 -0
- ads/dataset/mixin/dataset_accessor.py +134 -0
- ads/dataset/pipeline.py +58 -0
- ads/dataset/plot.py +710 -0
- ads/dataset/progress.py +86 -0
- ads/dataset/recommendation.py +297 -0
- ads/dataset/recommendation_transformer.py +502 -0
- ads/dataset/regression_dataset.py +14 -0
- ads/dataset/sampled_dataset.py +1050 -0
- ads/dataset/target.py +98 -0
- ads/dataset/timeseries.py +18 -0
- ads/dbmixin/__init__.py +5 -0
- ads/dbmixin/db_pandas_accessor.py +153 -0
- ads/environment/__init__.py +9 -0
- ads/environment/ml_runtime.py +66 -0
- ads/evaluations/README.md +14 -0
- ads/evaluations/__init__.py +109 -0
- ads/evaluations/evaluation_plot.py +983 -0
- ads/evaluations/evaluator.py +1334 -0
- ads/evaluations/statistical_metrics.py +543 -0
- ads/experiments/__init__.py +9 -0
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +21 -0
- ads/explanations/base_explainer.py +142 -0
- ads/explanations/capabilities.md +83 -0
- ads/explanations/explainer.py +190 -0
- ads/explanations/mlx_global_explainer.py +1050 -0
- ads/explanations/mlx_interface.py +386 -0
- ads/explanations/mlx_local_explainer.py +287 -0
- ads/explanations/mlx_whatif_explainer.py +201 -0
- ads/feature_engineering/__init__.py +20 -0
- ads/feature_engineering/accessor/__init__.py +5 -0
- ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
- ads/feature_engineering/accessor/mixin/__init__.py +5 -0
- ads/feature_engineering/accessor/mixin/correlation.py +166 -0
- ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
- ads/feature_engineering/accessor/mixin/utils.py +65 -0
- ads/feature_engineering/accessor/series_accessor.py +431 -0
- ads/feature_engineering/adsimage/__init__.py +5 -0
- ads/feature_engineering/adsimage/image.py +192 -0
- ads/feature_engineering/adsimage/image_reader.py +170 -0
- ads/feature_engineering/adsimage/interface/__init__.py +5 -0
- ads/feature_engineering/adsimage/interface/reader.py +19 -0
- ads/feature_engineering/adsstring/__init__.py +7 -0
- ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
- ads/feature_engineering/adsstring/string/__init__.py +8 -0
- ads/feature_engineering/data_schema.json +57 -0
- ads/feature_engineering/dataset/__init__.py +5 -0
- ads/feature_engineering/dataset/zip_code_data.py +42062 -0
- ads/feature_engineering/exceptions.py +40 -0
- ads/feature_engineering/feature_type/__init__.py +133 -0
- ads/feature_engineering/feature_type/address.py +184 -0
- ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
- ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
- ads/feature_engineering/feature_type/adsstring/string.py +258 -0
- ads/feature_engineering/feature_type/base.py +58 -0
- ads/feature_engineering/feature_type/boolean.py +183 -0
- ads/feature_engineering/feature_type/category.py +146 -0
- ads/feature_engineering/feature_type/constant.py +137 -0
- ads/feature_engineering/feature_type/continuous.py +151 -0
- ads/feature_engineering/feature_type/creditcard.py +314 -0
- ads/feature_engineering/feature_type/datetime.py +190 -0
- ads/feature_engineering/feature_type/discrete.py +134 -0
- ads/feature_engineering/feature_type/document.py +43 -0
- ads/feature_engineering/feature_type/gis.py +251 -0
- ads/feature_engineering/feature_type/handler/__init__.py +5 -0
- ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
- ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
- ads/feature_engineering/feature_type/handler/warnings.py +128 -0
- ads/feature_engineering/feature_type/integer.py +142 -0
- ads/feature_engineering/feature_type/ip_address.py +144 -0
- ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
- ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
- ads/feature_engineering/feature_type/lat_long.py +256 -0
- ads/feature_engineering/feature_type/object.py +43 -0
- ads/feature_engineering/feature_type/ordinal.py +132 -0
- ads/feature_engineering/feature_type/phone_number.py +135 -0
- ads/feature_engineering/feature_type/string.py +171 -0
- ads/feature_engineering/feature_type/text.py +93 -0
- ads/feature_engineering/feature_type/unknown.py +43 -0
- ads/feature_engineering/feature_type/zip_code.py +164 -0
- ads/feature_engineering/feature_type_manager.py +406 -0
- ads/feature_engineering/schema.py +795 -0
- ads/feature_engineering/utils.py +245 -0
- ads/feature_store/.readthedocs.yaml +19 -0
- ads/feature_store/README.md +65 -0
- ads/feature_store/__init__.py +9 -0
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +339 -0
- ads/feature_store/common/exceptions.py +18 -0
- ads/feature_store/common/spark_session_singleton.py +125 -0
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
- ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
- ads/feature_store/common/utils/transformation_utils.py +82 -0
- ads/feature_store/common/utils/utility.py +403 -0
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +129 -0
- ads/feature_store/dataset.py +1230 -0
- ads/feature_store/dataset_job.py +530 -0
- ads/feature_store/docs/Dockerfile +7 -0
- ads/feature_store/docs/Makefile +44 -0
- ads/feature_store/docs/conf.py +28 -0
- ads/feature_store/docs/requirements.txt +14 -0
- ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
- ads/feature_store/docs/source/cicd.rst +137 -0
- ads/feature_store/docs/source/conf.py +86 -0
- ads/feature_store/docs/source/data_versioning.rst +33 -0
- ads/feature_store/docs/source/dataset.rst +388 -0
- ads/feature_store/docs/source/dataset_job.rst +27 -0
- ads/feature_store/docs/source/demo.rst +70 -0
- ads/feature_store/docs/source/entity.rst +78 -0
- ads/feature_store/docs/source/feature_group.rst +624 -0
- ads/feature_store/docs/source/feature_group_job.rst +29 -0
- ads/feature_store/docs/source/feature_store.rst +122 -0
- ads/feature_store/docs/source/feature_store_class.rst +123 -0
- ads/feature_store/docs/source/feature_validation.rst +66 -0
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +81 -0
- ads/feature_store/docs/source/module.rst +8 -0
- ads/feature_store/docs/source/notebook.rst +94 -0
- ads/feature_store/docs/source/overview.rst +47 -0
- ads/feature_store/docs/source/quickstart.rst +176 -0
- ads/feature_store/docs/source/release_notes.rst +194 -0
- ads/feature_store/docs/source/setup_feature_store.rst +81 -0
- ads/feature_store/docs/source/statistics.rst +58 -0
- ads/feature_store/docs/source/transformation.rst +199 -0
- ads/feature_store/docs/source/ui.rst +65 -0
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
- ads/feature_store/entity.py +718 -0
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
- ads/feature_store/execution_strategy/execution_strategy.py +113 -0
- ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
- ads/feature_store/feature.py +192 -0
- ads/feature_store/feature_group.py +1494 -0
- ads/feature_store/feature_group_expectation.py +346 -0
- ads/feature_store/feature_group_job.py +602 -0
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +180 -0
- ads/feature_store/feature_option_details.py +50 -0
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +99 -0
- ads/feature_store/feature_store.py +699 -0
- ads/feature_store/feature_store_registrar.py +518 -0
- ads/feature_store/input_feature_detail.py +149 -0
- ads/feature_store/mixin/__init__.py +4 -0
- ads/feature_store/mixin/oci_feature_store.py +145 -0
- ads/feature_store/model_details.py +73 -0
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +266 -0
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +298 -0
- ads/feature_store/query/join.py +161 -0
- ads/feature_store/query/query.py +403 -0
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +57 -0
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +68 -0
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +139 -0
- ads/feature_store/service/oci_dataset_job.py +199 -0
- ads/feature_store/service/oci_entity.py +125 -0
- ads/feature_store/service/oci_feature_group.py +164 -0
- ads/feature_store/service/oci_feature_group_job.py +214 -0
- ads/feature_store/service/oci_feature_store.py +182 -0
- ads/feature_store/service/oci_lineage.py +87 -0
- ads/feature_store/service/oci_transformation.py +104 -0
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +49 -0
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
- ads/feature_store/statistics/charts/box_plot.py +148 -0
- ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
- ads/feature_store/statistics/charts/probability_distribution.py +68 -0
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
- ads/feature_store/statistics/feature_stat.py +126 -0
- ads/feature_store/statistics/generic_feature_value.py +33 -0
- ads/feature_store/statistics/statistics.py +41 -0
- ads/feature_store/statistics_config.py +101 -0
- ads/feature_store/templates/feature_store_template.yaml +45 -0
- ads/feature_store/transformation.py +499 -0
- ads/feature_store/validation_output.py +57 -0
- ads/hpo/__init__.py +9 -0
- ads/hpo/_imports.py +91 -0
- ads/hpo/ads_search_space.py +439 -0
- ads/hpo/distributions.py +325 -0
- ads/hpo/objective.py +280 -0
- ads/hpo/search_cv.py +1657 -0
- ads/hpo/stopping_criterion.py +75 -0
- ads/hpo/tuner_artifact.py +413 -0
- ads/hpo/utils.py +91 -0
- ads/hpo/validation.py +140 -0
- ads/hpo/visualization/__init__.py +5 -0
- ads/hpo/visualization/_contour.py +23 -0
- ads/hpo/visualization/_edf.py +20 -0
- ads/hpo/visualization/_intermediate_values.py +21 -0
- ads/hpo/visualization/_optimization_history.py +25 -0
- ads/hpo/visualization/_parallel_coordinate.py +169 -0
- ads/hpo/visualization/_param_importances.py +26 -0
- ads/jobs/__init__.py +53 -0
- ads/jobs/ads_job.py +663 -0
- ads/jobs/builders/__init__.py +5 -0
- ads/jobs/builders/base.py +156 -0
- ads/jobs/builders/infrastructure/__init__.py +6 -0
- ads/jobs/builders/infrastructure/base.py +165 -0
- ads/jobs/builders/infrastructure/dataflow.py +1252 -0
- ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
- ads/jobs/builders/infrastructure/utils.py +65 -0
- ads/jobs/builders/runtimes/__init__.py +5 -0
- ads/jobs/builders/runtimes/artifact.py +338 -0
- ads/jobs/builders/runtimes/base.py +325 -0
- ads/jobs/builders/runtimes/container_runtime.py +242 -0
- ads/jobs/builders/runtimes/python_runtime.py +1016 -0
- ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
- ads/jobs/cli.py +104 -0
- ads/jobs/env_var_parser.py +131 -0
- ads/jobs/extension.py +160 -0
- ads/jobs/schema/__init__.py +5 -0
- ads/jobs/schema/infrastructure_schema.json +116 -0
- ads/jobs/schema/job_schema.json +42 -0
- ads/jobs/schema/runtime_schema.json +183 -0
- ads/jobs/schema/validator.py +141 -0
- ads/jobs/serializer.py +296 -0
- ads/jobs/templates/__init__.py +5 -0
- ads/jobs/templates/container.py +6 -0
- ads/jobs/templates/driver_notebook.py +177 -0
- ads/jobs/templates/driver_oci.py +500 -0
- ads/jobs/templates/driver_python.py +48 -0
- ads/jobs/templates/driver_pytorch.py +852 -0
- ads/jobs/templates/driver_utils.py +615 -0
- ads/jobs/templates/hostname_from_env.c +55 -0
- ads/jobs/templates/oci_metrics.py +181 -0
- ads/jobs/utils.py +104 -0
- ads/llm/__init__.py +28 -0
- ads/llm/autogen/__init__.py +2 -0
- ads/llm/autogen/constants.py +15 -0
- ads/llm/autogen/reports/__init__.py +2 -0
- ads/llm/autogen/reports/base.py +67 -0
- ads/llm/autogen/reports/data.py +103 -0
- ads/llm/autogen/reports/session.py +526 -0
- ads/llm/autogen/reports/templates/chat_box.html +13 -0
- ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
- ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
- ads/llm/autogen/reports/utils.py +56 -0
- ads/llm/autogen/v02/__init__.py +4 -0
- ads/llm/autogen/v02/client.py +295 -0
- ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
- ads/llm/autogen/v02/loggers/__init__.py +6 -0
- ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
- ads/llm/autogen/v02/loggers/session_logger.py +580 -0
- ads/llm/autogen/v02/loggers/utils.py +86 -0
- ads/llm/autogen/v02/runtime_logging.py +163 -0
- ads/llm/chain.py +268 -0
- ads/llm/chat_template.py +31 -0
- ads/llm/deploy.py +63 -0
- ads/llm/guardrails/__init__.py +5 -0
- ads/llm/guardrails/base.py +442 -0
- ads/llm/guardrails/huggingface.py +44 -0
- ads/llm/langchain/__init__.py +5 -0
- ads/llm/langchain/plugins/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
- ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
- ads/llm/langchain/plugins/llms/__init__.py +5 -0
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
- ads/llm/requirements.txt +3 -0
- ads/llm/serialize.py +219 -0
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +153 -0
- ads/llm/serializers/runnable_parallel.py +27 -0
- ads/llm/templates/score_chain.jinja2 +155 -0
- ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
- ads/model/__init__.py +52 -0
- ads/model/artifact.py +573 -0
- ads/model/artifact_downloader.py +254 -0
- ads/model/artifact_uploader.py +267 -0
- ads/model/base_properties.py +238 -0
- ads/model/common/.model-ignore +66 -0
- ads/model/common/__init__.py +5 -0
- ads/model/common/utils.py +142 -0
- ads/model/datascience_model.py +2635 -0
- ads/model/deployment/__init__.py +20 -0
- ads/model/deployment/common/__init__.py +5 -0
- ads/model/deployment/common/utils.py +308 -0
- ads/model/deployment/model_deployer.py +466 -0
- ads/model/deployment/model_deployment.py +1846 -0
- ads/model/deployment/model_deployment_infrastructure.py +671 -0
- ads/model/deployment/model_deployment_properties.py +493 -0
- ads/model/deployment/model_deployment_runtime.py +838 -0
- ads/model/extractor/__init__.py +5 -0
- ads/model/extractor/automl_extractor.py +74 -0
- ads/model/extractor/embedding_onnx_extractor.py +80 -0
- ads/model/extractor/huggingface_extractor.py +88 -0
- ads/model/extractor/keras_extractor.py +84 -0
- ads/model/extractor/lightgbm_extractor.py +93 -0
- ads/model/extractor/model_info_extractor.py +114 -0
- ads/model/extractor/model_info_extractor_factory.py +105 -0
- ads/model/extractor/pytorch_extractor.py +87 -0
- ads/model/extractor/sklearn_extractor.py +112 -0
- ads/model/extractor/spark_extractor.py +89 -0
- ads/model/extractor/tensorflow_extractor.py +85 -0
- ads/model/extractor/xgboost_extractor.py +94 -0
- ads/model/framework/__init__.py +5 -0
- ads/model/framework/automl_model.py +178 -0
- ads/model/framework/embedding_onnx_model.py +438 -0
- ads/model/framework/huggingface_model.py +399 -0
- ads/model/framework/lightgbm_model.py +266 -0
- ads/model/framework/pytorch_model.py +266 -0
- ads/model/framework/sklearn_model.py +250 -0
- ads/model/framework/spark_model.py +326 -0
- ads/model/framework/tensorflow_model.py +254 -0
- ads/model/framework/xgboost_model.py +258 -0
- ads/model/generic_model.py +3518 -0
- ads/model/model_artifact_boilerplate/README.md +381 -0
- ads/model/model_artifact_boilerplate/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
- ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
- ads/model/model_artifact_boilerplate/score.py +61 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_introspect.py +331 -0
- ads/model/model_metadata.py +1810 -0
- ads/model/model_metadata_mixin.py +460 -0
- ads/model/model_properties.py +63 -0
- ads/model/model_version_set.py +739 -0
- ads/model/runtime/__init__.py +5 -0
- ads/model/runtime/env_info.py +306 -0
- ads/model/runtime/model_deployment_details.py +37 -0
- ads/model/runtime/model_provenance_details.py +58 -0
- ads/model/runtime/runtime_info.py +81 -0
- ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
- ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
- ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
- ads/model/runtime/utils.py +201 -0
- ads/model/serde/__init__.py +5 -0
- ads/model/serde/common.py +40 -0
- ads/model/serde/model_input.py +547 -0
- ads/model/serde/model_serializer.py +1184 -0
- ads/model/service/__init__.py +5 -0
- ads/model/service/oci_datascience_model.py +1076 -0
- ads/model/service/oci_datascience_model_deployment.py +500 -0
- ads/model/service/oci_datascience_model_version_set.py +176 -0
- ads/model/transformer/__init__.py +5 -0
- ads/model/transformer/onnx_transformer.py +324 -0
- ads/mysqldb/__init__.py +5 -0
- ads/mysqldb/mysql_db.py +227 -0
- ads/opctl/__init__.py +18 -0
- ads/opctl/anomaly_detection.py +11 -0
- ads/opctl/backend/__init__.py +5 -0
- ads/opctl/backend/ads_dataflow.py +353 -0
- ads/opctl/backend/ads_ml_job.py +710 -0
- ads/opctl/backend/ads_ml_pipeline.py +164 -0
- ads/opctl/backend/ads_model_deployment.py +209 -0
- ads/opctl/backend/base.py +146 -0
- ads/opctl/backend/local.py +1053 -0
- ads/opctl/backend/marketplace/__init__.py +9 -0
- ads/opctl/backend/marketplace/helm_helper.py +173 -0
- ads/opctl/backend/marketplace/local_marketplace.py +271 -0
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
- ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
- ads/opctl/backend/marketplace/models/__init__.py +5 -0
- ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
- ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
- ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
- ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
- ads/opctl/cli.py +707 -0
- ads/opctl/cmds.py +869 -0
- ads/opctl/conda/__init__.py +5 -0
- ads/opctl/conda/cli.py +193 -0
- ads/opctl/conda/cmds.py +749 -0
- ads/opctl/conda/config.yaml +34 -0
- ads/opctl/conda/manifest_template.yaml +13 -0
- ads/opctl/conda/multipart_uploader.py +188 -0
- ads/opctl/conda/pack.py +89 -0
- ads/opctl/config/__init__.py +5 -0
- ads/opctl/config/base.py +57 -0
- ads/opctl/config/diagnostics/__init__.py +5 -0
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
- ads/opctl/config/merger.py +255 -0
- ads/opctl/config/resolver.py +297 -0
- ads/opctl/config/utils.py +79 -0
- ads/opctl/config/validator.py +17 -0
- ads/opctl/config/versioner.py +68 -0
- ads/opctl/config/yaml_parsers/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/base.py +58 -0
- ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
- ads/opctl/constants.py +66 -0
- ads/opctl/decorator/__init__.py +5 -0
- ads/opctl/decorator/common.py +129 -0
- ads/opctl/diagnostics/__init__.py +5 -0
- ads/opctl/diagnostics/__main__.py +25 -0
- ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
- ads/opctl/diagnostics/check_requirements.py +144 -0
- ads/opctl/diagnostics/requirement_exception.py +9 -0
- ads/opctl/distributed/README.md +109 -0
- ads/opctl/distributed/__init__.py +5 -0
- ads/opctl/distributed/certificates.py +32 -0
- ads/opctl/distributed/cli.py +207 -0
- ads/opctl/distributed/cmds.py +731 -0
- ads/opctl/distributed/common/__init__.py +5 -0
- ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
- ads/opctl/distributed/common/cluster_config_helper.py +103 -0
- ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
- ads/opctl/distributed/common/cluster_runner.py +54 -0
- ads/opctl/distributed/common/framework_factory.py +29 -0
- ads/opctl/docker/Dockerfile.job +103 -0
- ads/opctl/docker/Dockerfile.job.arm +107 -0
- ads/opctl/docker/Dockerfile.job.gpu +175 -0
- ads/opctl/docker/base-env.yaml +13 -0
- ads/opctl/docker/cuda.repo +6 -0
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +41 -0
- ads/opctl/docker/operator/Dockerfile.gpu +85 -0
- ads/opctl/docker/operator/cuda.repo +6 -0
- ads/opctl/docker/operator/environment.yaml +8 -0
- ads/opctl/forecast.py +11 -0
- ads/opctl/index.yaml +3 -0
- ads/opctl/model/__init__.py +5 -0
- ads/opctl/model/cli.py +65 -0
- ads/opctl/model/cmds.py +73 -0
- ads/opctl/operator/README.md +4 -0
- ads/opctl/operator/__init__.py +31 -0
- ads/opctl/operator/cli.py +344 -0
- ads/opctl/operator/cmd.py +596 -0
- ads/opctl/operator/common/__init__.py +5 -0
- ads/opctl/operator/common/backend_factory.py +460 -0
- ads/opctl/operator/common/const.py +27 -0
- ads/opctl/operator/common/data/synthetic.csv +16001 -0
- ads/opctl/operator/common/dictionary_merger.py +148 -0
- ads/opctl/operator/common/errors.py +42 -0
- ads/opctl/operator/common/operator_config.py +99 -0
- ads/opctl/operator/common/operator_loader.py +811 -0
- ads/opctl/operator/common/operator_schema.yaml +130 -0
- ads/opctl/operator/common/operator_yaml_generator.py +152 -0
- ads/opctl/operator/common/utils.py +208 -0
- ads/opctl/operator/lowcode/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
- ads/opctl/operator/lowcode/anomaly/README.md +207 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +167 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +116 -0
- ads/opctl/operator/lowcode/common/errors.py +47 -0
- ads/opctl/operator/lowcode/common/transformations.py +296 -0
- ads/opctl/operator/lowcode/common/utils.py +384 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
- ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
- ads/opctl/operator/lowcode/forecast/README.md +209 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
- ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
- ads/opctl/operator/lowcode/forecast/const.py +92 -0
- ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
- ads/opctl/operator/lowcode/forecast/errors.py +26 -0
- ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
- ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
- ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
- ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
- ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
- ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
- ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
- ads/opctl/operator/lowcode/forecast/utils.py +397 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
- ads/opctl/operator/lowcode/pii/MLoperator +17 -0
- ads/opctl/operator/lowcode/pii/README.md +208 -0
- ads/opctl/operator/lowcode/pii/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/__main__.py +78 -0
- ads/opctl/operator/lowcode/pii/cmd.py +39 -0
- ads/opctl/operator/lowcode/pii/constant.py +84 -0
- ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
- ads/opctl/operator/lowcode/pii/errors.py +27 -0
- ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
- ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
- ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
- ads/opctl/operator/lowcode/pii/model/report.py +487 -0
- ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
- ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
- ads/opctl/operator/lowcode/pii/utils.py +43 -0
- ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
- ads/opctl/operator/lowcode/recommender/README.md +206 -0
- ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
- ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
- ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
- ads/opctl/operator/lowcode/recommender/constant.py +30 -0
- ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
- ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
- ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
- ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
- ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
- ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
- ads/opctl/operator/lowcode/recommender/utils.py +13 -0
- ads/opctl/operator/runtime/__init__.py +5 -0
- ads/opctl/operator/runtime/const.py +17 -0
- ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
- ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/runtime.py +115 -0
- ads/opctl/schema.yaml.yml +36 -0
- ads/opctl/script.py +40 -0
- ads/opctl/spark/__init__.py +5 -0
- ads/opctl/spark/cli.py +43 -0
- ads/opctl/spark/cmds.py +147 -0
- ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
- ads/opctl/utils.py +344 -0
- ads/oracledb/__init__.py +5 -0
- ads/oracledb/oracle_db.py +346 -0
- ads/pipeline/__init__.py +39 -0
- ads/pipeline/ads_pipeline.py +2279 -0
- ads/pipeline/ads_pipeline_run.py +772 -0
- ads/pipeline/ads_pipeline_step.py +605 -0
- ads/pipeline/builders/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/custom_script.py +32 -0
- ads/pipeline/cli.py +119 -0
- ads/pipeline/extension.py +291 -0
- ads/pipeline/schema/__init__.py +5 -0
- ads/pipeline/schema/cs_step_schema.json +35 -0
- ads/pipeline/schema/ml_step_schema.json +31 -0
- ads/pipeline/schema/pipeline_schema.json +71 -0
- ads/pipeline/visualizer/__init__.py +5 -0
- ads/pipeline/visualizer/base.py +570 -0
- ads/pipeline/visualizer/graph_renderer.py +272 -0
- ads/pipeline/visualizer/text_renderer.py +84 -0
- ads/secrets/__init__.py +11 -0
- ads/secrets/adb.py +386 -0
- ads/secrets/auth_token.py +86 -0
- ads/secrets/big_data_service.py +365 -0
- ads/secrets/mysqldb.py +149 -0
- ads/secrets/oracledb.py +160 -0
- ads/secrets/secrets.py +407 -0
- ads/telemetry/__init__.py +7 -0
- ads/telemetry/base.py +69 -0
- ads/telemetry/client.py +122 -0
- ads/telemetry/telemetry.py +257 -0
- ads/templates/dataflow_pyspark.jinja2 +13 -0
- ads/templates/dataflow_sparksql.jinja2 +22 -0
- ads/templates/func.jinja2 +20 -0
- ads/templates/schemas/openapi.json +1740 -0
- ads/templates/score-pkl.jinja2 +173 -0
- ads/templates/score.jinja2 +322 -0
- ads/templates/score_embedding_onnx.jinja2 +202 -0
- ads/templates/score_generic.jinja2 +165 -0
- ads/templates/score_huggingface_pipeline.jinja2 +217 -0
- ads/templates/score_lightgbm.jinja2 +185 -0
- ads/templates/score_onnx.jinja2 +407 -0
- ads/templates/score_onnx_new.jinja2 +473 -0
- ads/templates/score_oracle_automl.jinja2 +185 -0
- ads/templates/score_pyspark.jinja2 +154 -0
- ads/templates/score_pytorch.jinja2 +219 -0
- ads/templates/score_scikit-learn.jinja2 +184 -0
- ads/templates/score_tensorflow.jinja2 +184 -0
- ads/templates/score_xgboost.jinja2 +178 -0
- ads/text_dataset/__init__.py +5 -0
- ads/text_dataset/backends.py +211 -0
- ads/text_dataset/dataset.py +445 -0
- ads/text_dataset/extractor.py +207 -0
- ads/text_dataset/options.py +53 -0
- ads/text_dataset/udfs.py +22 -0
- ads/text_dataset/utils.py +49 -0
- ads/type_discovery/__init__.py +9 -0
- ads/type_discovery/abstract_detector.py +21 -0
- ads/type_discovery/constant_detector.py +41 -0
- ads/type_discovery/continuous_detector.py +54 -0
- ads/type_discovery/credit_card_detector.py +99 -0
- ads/type_discovery/datetime_detector.py +92 -0
- ads/type_discovery/discrete_detector.py +118 -0
- ads/type_discovery/document_detector.py +146 -0
- ads/type_discovery/ip_detector.py +68 -0
- ads/type_discovery/latlon_detector.py +90 -0
- ads/type_discovery/phone_number_detector.py +63 -0
- ads/type_discovery/type_discovery_driver.py +87 -0
- ads/type_discovery/typed_feature.py +594 -0
- ads/type_discovery/unknown_detector.py +41 -0
- ads/type_discovery/zipcode_detector.py +48 -0
- ads/vault/__init__.py +7 -0
- ads/vault/vault.py +237 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/METADATA +150 -149
- oracle_ads-2.13.10.dist-info/RECORD +858 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/WHEEL +1 -2
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/entry_points.txt +2 -1
- oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,1230 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
import logging
|
4
|
+
from copy import deepcopy
|
5
|
+
from datetime import datetime
|
6
|
+
from typing import Dict, List, Union
|
7
|
+
|
8
|
+
import pandas
|
9
|
+
import pandas as pd
|
10
|
+
from great_expectations.core import ExpectationSuite
|
11
|
+
|
12
|
+
from ads import deprecated
|
13
|
+
from feature_store_client.feature_store.models import (
|
14
|
+
DatasetFeatureGroupCollection,
|
15
|
+
DatasetFeatureGroupSummary,
|
16
|
+
)
|
17
|
+
|
18
|
+
from ads.common import utils
|
19
|
+
from ads.common.oci_mixin import OCIModelMixin
|
20
|
+
from ads.feature_store.common.enums import (
|
21
|
+
ExecutionEngine,
|
22
|
+
ExpectationType,
|
23
|
+
EntityType,
|
24
|
+
BatchIngestionMode,
|
25
|
+
)
|
26
|
+
from ads.feature_store.common.exceptions import NotMaterializedError
|
27
|
+
from ads.feature_store.common.utils.utility import (
|
28
|
+
get_metastore_id,
|
29
|
+
validate_delta_format_parameters,
|
30
|
+
convert_expectation_suite_to_expectation,
|
31
|
+
)
|
32
|
+
from ads.feature_store.dataset_job import DatasetJob
|
33
|
+
from ads.feature_store.execution_strategy.engine.spark_engine import SparkEngine
|
34
|
+
from ads.feature_store.execution_strategy.execution_strategy_provider import (
|
35
|
+
OciExecutionStrategyProvider,
|
36
|
+
)
|
37
|
+
from ads.feature_store.feature import DatasetFeature
|
38
|
+
from ads.feature_store.feature_group import FeatureGroup
|
39
|
+
from ads.feature_store.feature_group_expectation import Expectation
|
40
|
+
from ads.feature_store.feature_option_details import FeatureOptionDetails
|
41
|
+
from ads.feature_store.service.oci_dataset import OCIDataset
|
42
|
+
from ads.feature_store.statistics.statistics import Statistics
|
43
|
+
from ads.feature_store.statistics_config import StatisticsConfig
|
44
|
+
from ads.feature_store.service.oci_lineage import OCILineage
|
45
|
+
from ads.feature_store.model_details import ModelDetails
|
46
|
+
from ads.jobs.builders.base import Builder
|
47
|
+
from ads.feature_store.feature_lineage.graphviz_service import (
|
48
|
+
GraphService,
|
49
|
+
GraphOrientation,
|
50
|
+
)
|
51
|
+
from ads.feature_store.validation_output import ValidationOutput
|
52
|
+
|
53
|
+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
|
54
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
55
|
+
|
56
|
+
logger = logging.getLogger(__name__)
|
57
|
+
|
58
|
+
|
59
|
+
class Dataset(Builder):
|
60
|
+
""" Represents a Dataset Resource.
|
61
|
+
|
62
|
+
Methods
|
63
|
+
-------
|
64
|
+
create(self, **kwargs) -> "Dataset"
|
65
|
+
Creates dataset resource.
|
66
|
+
delete(self) -> "Dataset":
|
67
|
+
Removes dataset resource.
|
68
|
+
to_dict(self) -> dict
|
69
|
+
Serializes dataset to a dictionary.
|
70
|
+
from_id(cls, id: str) -> "Dataset"
|
71
|
+
Gets an existing dataset resource by id.
|
72
|
+
list(cls, compartment_id: str = None, **kwargs) -> List["Dataset"]
|
73
|
+
Lists dataset resources in a given compartment.
|
74
|
+
list_df(cls, compartment_id: str = None, **kwargs) -> "pandas.DataFrame"
|
75
|
+
Lists datasets resources as a pandas dataframe.
|
76
|
+
with_description(self, description: str) -> "Dataset"
|
77
|
+
Sets the description.
|
78
|
+
with_compartment_id(self, compartment_id: str) -> "Dataset"
|
79
|
+
Sets the compartment ID.
|
80
|
+
with_feature_store_id(self, feature_store_id: str) -> "Dataset"
|
81
|
+
Sets the feature store ID.
|
82
|
+
with_entity_id(self, entity_id: str) -> "Dataset"
|
83
|
+
Sets the entity ID.
|
84
|
+
with_query(self, query: str) -> "Dataset"
|
85
|
+
Sets the SQL query.
|
86
|
+
with_dataset_ingestion_mode(self, dataset_ingestion_mode: str) -> "Dataset"
|
87
|
+
Sets the ingestion mode for dataset.
|
88
|
+
with_statistics_config(self, statistics_config: Union[StatisticsConfig, bool]) -> "Dataset"
|
89
|
+
Sets the statistics config details
|
90
|
+
Examples
|
91
|
+
--------
|
92
|
+
>>> from ads.feature_store import dataset
|
93
|
+
>>> import oci
|
94
|
+
>>> import os
|
95
|
+
>>> dataset = dataset.Dataset()
|
96
|
+
>>> .with_description("dataset description")
|
97
|
+
>>> .with_compartment_id(os.environ["PROJECT_COMPARTMENT_OCID"])
|
98
|
+
>>> .with_name("Dataset")
|
99
|
+
>>> .with_entity_id("<entity id>") \
|
100
|
+
>>> .with_feature_store_id("<feature_store_id>") \
|
101
|
+
>>> .with_query('SELECT feature_gr_1.name FROM feature_gr_1') \
|
102
|
+
>>> .with_statistics_config(StatisticsConfig(True,columns=["column1","column2"]))
|
103
|
+
>>> dataset.create()
|
104
|
+
"""
|
105
|
+
|
106
|
+
_PREFIX = "Dataset_resource"
|
107
|
+
CONST_LINEAGE_CONSTRUCT_TYPE = "construct_type"
|
108
|
+
|
109
|
+
CONST_ID = "id"
|
110
|
+
CONST_COMPARTMENT_ID = "compartmentId"
|
111
|
+
CONST_NAME = "name"
|
112
|
+
CONST_QUERY = "query"
|
113
|
+
CONST_ENTITY_ID = "entityId"
|
114
|
+
CONST_FEATURE_STORE_ID = "featureStoreId"
|
115
|
+
CONST_DESCRIPTION = "description"
|
116
|
+
CONST_FREEFORM_TAG = "freeformTags"
|
117
|
+
CONST_DEFINED_TAG = "definedTags"
|
118
|
+
CONST_PARTITION_KEYS = "partitionKeys"
|
119
|
+
CONST_OUTPUT_FEATURE_DETAILS = "outputFeatureDetails"
|
120
|
+
CONST_EXPECTATION_DETAILS = "expectationDetails"
|
121
|
+
CONST_STATISTICS_CONFIG = "statisticsConfig"
|
122
|
+
CONST_LIFECYCLE_STATE = "lifecycleState"
|
123
|
+
CONST_ITEMS = "items"
|
124
|
+
CONST_LAST_JOB_ID = "jobId"
|
125
|
+
CONST_MODEL_DETAILS = "modelDetails"
|
126
|
+
CONST_FEATURE_GROUP = "datasetFeatureGroups"
|
127
|
+
|
128
|
+
attribute_map = {
|
129
|
+
CONST_ID: "id",
|
130
|
+
CONST_COMPARTMENT_ID: "compartment_id",
|
131
|
+
CONST_NAME: "name",
|
132
|
+
CONST_FEATURE_STORE_ID: "feature_store_id",
|
133
|
+
CONST_ENTITY_ID: "entity_id",
|
134
|
+
CONST_QUERY: "query",
|
135
|
+
CONST_DESCRIPTION: "description",
|
136
|
+
CONST_FREEFORM_TAG: "freeform_tags",
|
137
|
+
CONST_DEFINED_TAG: "defined_tags",
|
138
|
+
CONST_EXPECTATION_DETAILS: "expectation_details",
|
139
|
+
CONST_STATISTICS_CONFIG: "statistics_config",
|
140
|
+
CONST_OUTPUT_FEATURE_DETAILS: "output_feature_details",
|
141
|
+
CONST_LIFECYCLE_STATE: "lifecycle_state",
|
142
|
+
CONST_MODEL_DETAILS: "model_details",
|
143
|
+
CONST_PARTITION_KEYS: "partition_keys",
|
144
|
+
CONST_FEATURE_GROUP: "dataset_feature_groups",
|
145
|
+
}
|
146
|
+
|
147
|
+
def __init__(self, spec: Dict = None, **kwargs) -> None:
|
148
|
+
"""Initializes Dataset Resource.
|
149
|
+
|
150
|
+
Parameters
|
151
|
+
----------
|
152
|
+
spec: (Dict, optional). Defaults to None.
|
153
|
+
Object specification.
|
154
|
+
|
155
|
+
kwargs: Dict
|
156
|
+
Specification as keyword arguments.
|
157
|
+
If 'spec' contains the same key as the one in kwargs,
|
158
|
+
the value from kwargs will be used.
|
159
|
+
"""
|
160
|
+
super().__init__(spec=spec, **deepcopy(kwargs))
|
161
|
+
# Specify oci Dataset instance
|
162
|
+
self.dataset_job = None
|
163
|
+
self._is_manual_association: bool = False
|
164
|
+
self._spark_engine = None
|
165
|
+
self.oci_dataset = self._to_oci_dataset(**kwargs)
|
166
|
+
self.lineage = OCILineage(**kwargs)
|
167
|
+
|
168
|
+
def _to_oci_dataset(self, **kwargs) -> OCIDataset:
|
169
|
+
"""Creates an `OCIDataset` instance from the `Dataset`.
|
170
|
+
|
171
|
+
kwargs
|
172
|
+
Additional kwargs arguments.
|
173
|
+
Can be any attribute that `oci.dataset.models.Dataset` accepts.
|
174
|
+
|
175
|
+
Returns
|
176
|
+
-------
|
177
|
+
OCIDataset
|
178
|
+
The instance of the OCIDataset.
|
179
|
+
"""
|
180
|
+
fs_spec = {}
|
181
|
+
|
182
|
+
for infra_attr, dsc_attr in self.attribute_map.items():
|
183
|
+
value = self.get_spec(infra_attr)
|
184
|
+
fs_spec[dsc_attr] = value
|
185
|
+
|
186
|
+
fs_spec.update(**kwargs)
|
187
|
+
|
188
|
+
return OCIDataset(**fs_spec)
|
189
|
+
|
190
|
+
@property
|
191
|
+
def spark_engine(self):
|
192
|
+
if not self._spark_engine:
|
193
|
+
self._spark_engine = SparkEngine(get_metastore_id(self.feature_store_id))
|
194
|
+
return self._spark_engine
|
195
|
+
|
196
|
+
@property
|
197
|
+
def is_manual_association(self):
|
198
|
+
collection: DatasetFeatureGroupCollection = self.get_spec(
|
199
|
+
self.CONST_FEATURE_GROUP
|
200
|
+
)
|
201
|
+
if collection and collection.is_manual_association is not None:
|
202
|
+
return collection.is_manual_association
|
203
|
+
else:
|
204
|
+
return self._is_manual_association
|
205
|
+
|
206
|
+
@property
|
207
|
+
def kind(self) -> str:
|
208
|
+
"""The kind of the object as showing in a YAML."""
|
209
|
+
return "Dataset"
|
210
|
+
|
211
|
+
@property
|
212
|
+
def compartment_id(self) -> str:
|
213
|
+
return self.get_spec(self.CONST_COMPARTMENT_ID)
|
214
|
+
|
215
|
+
@compartment_id.setter
|
216
|
+
def compartment_id(self, value: str):
|
217
|
+
self.with_compartment_id(value)
|
218
|
+
|
219
|
+
def with_compartment_id(self, compartment_id: str) -> "Dataset":
|
220
|
+
"""Sets the compartment_id.
|
221
|
+
|
222
|
+
Parameters
|
223
|
+
----------
|
224
|
+
compartment_id: str
|
225
|
+
The compartment_id.
|
226
|
+
|
227
|
+
Returns
|
228
|
+
-------
|
229
|
+
Dataset
|
230
|
+
The Dataset instance (self)
|
231
|
+
"""
|
232
|
+
return self.set_spec(self.CONST_COMPARTMENT_ID, compartment_id)
|
233
|
+
|
234
|
+
@property
|
235
|
+
def name(self) -> str:
|
236
|
+
return self.get_spec(self.CONST_NAME)
|
237
|
+
|
238
|
+
@name.setter
|
239
|
+
def name(self, name: str):
|
240
|
+
self.with_name(name)
|
241
|
+
|
242
|
+
def with_name(self, name: str) -> "Dataset":
|
243
|
+
"""Sets the name.
|
244
|
+
|
245
|
+
Parameters
|
246
|
+
----------
|
247
|
+
name: str
|
248
|
+
The name of dataset.
|
249
|
+
|
250
|
+
Returns
|
251
|
+
-------
|
252
|
+
Dataset
|
253
|
+
The Dataset instance (self)
|
254
|
+
"""
|
255
|
+
return self.set_spec(self.CONST_NAME, name)
|
256
|
+
|
257
|
+
@property
|
258
|
+
def id(self) -> str:
|
259
|
+
"""The id of the dataset.
|
260
|
+
|
261
|
+
Returns
|
262
|
+
-------
|
263
|
+
str
|
264
|
+
The id of the dataset.
|
265
|
+
"""
|
266
|
+
return self.get_spec(self.CONST_ID)
|
267
|
+
|
268
|
+
@property
|
269
|
+
def features(self) -> List[DatasetFeature]:
|
270
|
+
return [
|
271
|
+
DatasetFeature(**feature_dict)
|
272
|
+
for feature_dict in self.get_spec(self.CONST_OUTPUT_FEATURE_DETAILS)[
|
273
|
+
self.CONST_ITEMS
|
274
|
+
]
|
275
|
+
or []
|
276
|
+
]
|
277
|
+
|
278
|
+
def with_id(self, id: str) -> "Dataset":
|
279
|
+
return self.set_spec(self.CONST_ID, id)
|
280
|
+
|
281
|
+
def with_job_id(self, dataset_job_id: str) -> "Dataset":
|
282
|
+
"""Sets the job_id for the last running job.
|
283
|
+
|
284
|
+
Parameters
|
285
|
+
----------
|
286
|
+
dataset_job_id: str
|
287
|
+
Dataset job id.
|
288
|
+
Returns
|
289
|
+
-------
|
290
|
+
Dataset
|
291
|
+
The Dataset instance (self)
|
292
|
+
"""
|
293
|
+
return self.set_spec(self.CONST_LAST_JOB_ID, dataset_job_id)
|
294
|
+
|
295
|
+
@property
|
296
|
+
def job_id(self) -> str:
|
297
|
+
return self.get_spec(self.CONST_LAST_JOB_ID)
|
298
|
+
|
299
|
+
@property
|
300
|
+
def query(self) -> str:
|
301
|
+
return self.get_spec(self.CONST_QUERY)
|
302
|
+
|
303
|
+
@query.setter
|
304
|
+
def query(self, query: str):
|
305
|
+
self.with_query(query)
|
306
|
+
|
307
|
+
def with_query(self, query: str) -> "Dataset":
|
308
|
+
"""Sets the dataset query.
|
309
|
+
|
310
|
+
Parameters
|
311
|
+
----------
|
312
|
+
query: str
|
313
|
+
SQL Query that will be used to join the FeatureGroups to form the dataset.
|
314
|
+
|
315
|
+
Returns
|
316
|
+
-------
|
317
|
+
Dataset
|
318
|
+
The Dataset instance (self)
|
319
|
+
"""
|
320
|
+
return self.set_spec(self.CONST_QUERY, query)
|
321
|
+
|
322
|
+
def _with_lifecycle_state(self, lifecycle_state: str) -> "Dataset":
|
323
|
+
"""Sets the lifecycle_state.
|
324
|
+
|
325
|
+
Parameters
|
326
|
+
----------
|
327
|
+
lifecycle_state: str
|
328
|
+
The lifecycle_state.
|
329
|
+
|
330
|
+
Returns
|
331
|
+
-------
|
332
|
+
Dataset
|
333
|
+
The Dataset instance (self)
|
334
|
+
"""
|
335
|
+
return self.set_spec(self.CONST_LIFECYCLE_STATE, lifecycle_state)
|
336
|
+
|
337
|
+
def _with_features(self, features: List[DatasetFeature]):
|
338
|
+
"""Sets the output_features.
|
339
|
+
|
340
|
+
Parameters
|
341
|
+
----------
|
342
|
+
features: List[DatasetFeature]
|
343
|
+
The features for the Dataset.
|
344
|
+
Returns
|
345
|
+
-------
|
346
|
+
Dataset
|
347
|
+
The Dataset instance (self)
|
348
|
+
"""
|
349
|
+
return self.set_spec(
|
350
|
+
self.CONST_OUTPUT_FEATURE_DETAILS,
|
351
|
+
{self.CONST_ITEMS: [feature.to_dict() for feature in features]},
|
352
|
+
)
|
353
|
+
|
354
|
+
@property
|
355
|
+
def description(self) -> str:
|
356
|
+
return self.get_spec(self.CONST_DESCRIPTION)
|
357
|
+
|
358
|
+
@description.setter
|
359
|
+
def description(self, value: str):
|
360
|
+
self.with_description(value)
|
361
|
+
|
362
|
+
def with_description(self, description: str) -> "Dataset":
|
363
|
+
"""Sets the description.
|
364
|
+
|
365
|
+
Parameters
|
366
|
+
----------
|
367
|
+
description: str
|
368
|
+
The description of the dataset.
|
369
|
+
|
370
|
+
Returns
|
371
|
+
-------
|
372
|
+
Dataset
|
373
|
+
The Dataset instance (self)
|
374
|
+
"""
|
375
|
+
return self.set_spec(self.CONST_DESCRIPTION, description)
|
376
|
+
|
377
|
+
@property
|
378
|
+
def feature_store_id(self) -> str:
|
379
|
+
return self.get_spec(self.CONST_FEATURE_STORE_ID)
|
380
|
+
|
381
|
+
@feature_store_id.setter
|
382
|
+
def feature_store_id(self, value: str):
|
383
|
+
self.with_feature_store_id(value)
|
384
|
+
|
385
|
+
def with_feature_store_id(self, feature_store_id: str) -> "Dataset":
|
386
|
+
"""Sets the feature_store_id.
|
387
|
+
|
388
|
+
Parameters
|
389
|
+
----------
|
390
|
+
feature_store_id: str
|
391
|
+
The feature_store_id.
|
392
|
+
|
393
|
+
Returns
|
394
|
+
-------
|
395
|
+
FeatureGroup
|
396
|
+
The FeatureGroup instance (self)
|
397
|
+
"""
|
398
|
+
return self.set_spec(self.CONST_FEATURE_STORE_ID, feature_store_id)
|
399
|
+
|
400
|
+
@property
|
401
|
+
def expectation_details(self) -> "Expectation":
|
402
|
+
"""The expectation details of the dataset.
|
403
|
+
|
404
|
+
Returns
|
405
|
+
-------
|
406
|
+
list
|
407
|
+
The step details of the feature group.
|
408
|
+
"""
|
409
|
+
return self.get_spec(self.CONST_EXPECTATION_DETAILS)
|
410
|
+
|
411
|
+
def with_expectation_suite(
|
412
|
+
self, expectation_suite: ExpectationSuite, expectation_type: ExpectationType
|
413
|
+
) -> "Dataset":
|
414
|
+
"""Sets the expectation details for the feature group.
|
415
|
+
|
416
|
+
Parameters
|
417
|
+
----------
|
418
|
+
expectation_suite: ExpectationSuite
|
419
|
+
A list of rules in the feature store.
|
420
|
+
expectation_type: ExpectationType
|
421
|
+
Type of the expectation.
|
422
|
+
|
423
|
+
Returns
|
424
|
+
-------
|
425
|
+
Dataset
|
426
|
+
The Dataset instance (self).
|
427
|
+
"""
|
428
|
+
return self.set_spec(
|
429
|
+
self.CONST_EXPECTATION_DETAILS,
|
430
|
+
convert_expectation_suite_to_expectation(
|
431
|
+
expectation_suite, expectation_type
|
432
|
+
).to_dict(),
|
433
|
+
)
|
434
|
+
|
435
|
+
@property
|
436
|
+
def entity_id(self) -> str:
|
437
|
+
return self.get_spec(self.CONST_ENTITY_ID)
|
438
|
+
|
439
|
+
@entity_id.setter
|
440
|
+
def entity_id(self, value: str):
|
441
|
+
self.with_entity_id(value)
|
442
|
+
|
443
|
+
@classmethod
|
444
|
+
def from_id(cls, id: str) -> "Dataset":
|
445
|
+
"""Gets an existing dataset resource by Id.
|
446
|
+
|
447
|
+
Parameters
|
448
|
+
----------
|
449
|
+
id: str
|
450
|
+
The dataset id.
|
451
|
+
|
452
|
+
Returns
|
453
|
+
-------
|
454
|
+
Dataset
|
455
|
+
An instance of Dataset resource.
|
456
|
+
"""
|
457
|
+
return cls()._update_from_oci_dataset_model(OCIDataset.from_id(id))
|
458
|
+
|
459
|
+
def with_entity_id(self, entity_id: str) -> "Dataset":
|
460
|
+
"""Sets the entity_id.
|
461
|
+
|
462
|
+
Parameters
|
463
|
+
----------
|
464
|
+
entity_id: str
|
465
|
+
The entity_id.
|
466
|
+
|
467
|
+
Returns
|
468
|
+
-------
|
469
|
+
Dataset
|
470
|
+
The Dataset instance (self)
|
471
|
+
"""
|
472
|
+
return self.set_spec(self.CONST_ENTITY_ID, entity_id)
|
473
|
+
|
474
|
+
@property
|
475
|
+
def statistics_config(self) -> "StatisticsConfig":
|
476
|
+
return self.get_spec(self.CONST_STATISTICS_CONFIG)
|
477
|
+
|
478
|
+
@statistics_config.setter
|
479
|
+
def statistics_config(self, statistics_config: StatisticsConfig):
|
480
|
+
self.with_statistics_config(statistics_config)
|
481
|
+
|
482
|
+
def with_statistics_config(
|
483
|
+
self, statistics_config: Union[StatisticsConfig, bool]
|
484
|
+
) -> "Dataset":
|
485
|
+
"""Sets the statistics details for the dataset.
|
486
|
+
|
487
|
+
Parameters
|
488
|
+
----------
|
489
|
+
statistics_config: StatisticsConfig
|
490
|
+
statistics config
|
491
|
+
|
492
|
+
Returns
|
493
|
+
-------
|
494
|
+
Dataset
|
495
|
+
The Dataset instance (self).
|
496
|
+
"""
|
497
|
+
statistics_config_in = None
|
498
|
+
if isinstance(statistics_config, StatisticsConfig):
|
499
|
+
statistics_config_in = statistics_config
|
500
|
+
elif isinstance(statistics_config, bool):
|
501
|
+
statistics_config_in = StatisticsConfig(statistics_config)
|
502
|
+
else:
|
503
|
+
raise TypeError(
|
504
|
+
"The argument `statistics_config` has to be of type `StatisticsConfig` or `bool`, "
|
505
|
+
"but is of type: `{}`".format(type(statistics_config))
|
506
|
+
)
|
507
|
+
return self.set_spec(
|
508
|
+
self.CONST_STATISTICS_CONFIG, statistics_config_in.to_dict()
|
509
|
+
)
|
510
|
+
|
511
|
+
def target_delta_table(self):
|
512
|
+
"""
|
513
|
+
Returns the fully-qualified name of the target table for storing delta data.
|
514
|
+
|
515
|
+
The name of the target table is constructed by concatenating the entity ID
|
516
|
+
and the name of the table, separated by a dot. The resulting string has the
|
517
|
+
format 'entity_id.table_name'.
|
518
|
+
|
519
|
+
Returns:
|
520
|
+
str: The fully-qualified name of the target delta table.
|
521
|
+
"""
|
522
|
+
target_table = f"{self.entity_id}.{self.name}"
|
523
|
+
return target_table
|
524
|
+
|
525
|
+
@property
|
526
|
+
def model_details(self) -> "ModelDetails":
|
527
|
+
return self.get_spec(self.CONST_MODEL_DETAILS)
|
528
|
+
|
529
|
+
@model_details.setter
|
530
|
+
def model_details(self, model_details: ModelDetails):
|
531
|
+
self.with_model_details(model_details)
|
532
|
+
|
533
|
+
def with_model_details(self, model_details: ModelDetails) -> "Dataset":
|
534
|
+
"""Sets the model details for the dataset.
|
535
|
+
|
536
|
+
Parameters
|
537
|
+
----------
|
538
|
+
model_details: ModelDetails
|
539
|
+
|
540
|
+
Returns
|
541
|
+
-------
|
542
|
+
Dataset
|
543
|
+
The Dataset instance (self).
|
544
|
+
"""
|
545
|
+
if not isinstance(model_details, ModelDetails):
|
546
|
+
raise TypeError(
|
547
|
+
"The argument `model_details` has to be of type `ModelDetails`"
|
548
|
+
"but is of type: `{}`".format(type(model_details))
|
549
|
+
)
|
550
|
+
|
551
|
+
return self.set_spec(self.CONST_MODEL_DETAILS, model_details.to_dict())
|
552
|
+
|
553
|
+
@property
|
554
|
+
def feature_groups(self) -> List["FeatureGroup"]:
|
555
|
+
collection: "DatasetFeatureGroupCollection" = self.get_spec(
|
556
|
+
self.CONST_FEATURE_GROUP
|
557
|
+
)
|
558
|
+
feature_groups: List["FeatureGroup"] = []
|
559
|
+
if collection and collection.items:
|
560
|
+
for datasetFGSummary in collection.items:
|
561
|
+
feature_groups.append(
|
562
|
+
FeatureGroup.from_id(datasetFGSummary.feature_group_id)
|
563
|
+
)
|
564
|
+
|
565
|
+
return feature_groups
|
566
|
+
|
567
|
+
@feature_groups.setter
|
568
|
+
def feature_groups(self, feature_groups: List["FeatureGroup"]):
|
569
|
+
self.with_feature_groups(feature_groups)
|
570
|
+
|
571
|
+
def with_feature_groups(self, feature_groups: List["FeatureGroup"]) -> "Dataset":
|
572
|
+
"""Sets the model details for the dataset.
|
573
|
+
|
574
|
+
Parameters
|
575
|
+
----------
|
576
|
+
feature_groups: List of feature groups
|
577
|
+
Returns
|
578
|
+
-------
|
579
|
+
Dataset
|
580
|
+
The Dataset instance (self).
|
581
|
+
|
582
|
+
"""
|
583
|
+
collection: List["DatasetFeatureGroupSummary"] = []
|
584
|
+
for group in feature_groups:
|
585
|
+
collection.append(DatasetFeatureGroupSummary(feature_group_id=group.id))
|
586
|
+
|
587
|
+
self._is_manual_association = True
|
588
|
+
return self.set_spec(
|
589
|
+
self.CONST_FEATURE_GROUP,
|
590
|
+
DatasetFeatureGroupCollection(items=collection, is_manual_association=True),
|
591
|
+
)
|
592
|
+
|
593
|
+
def feature_groups_to_df(self):
|
594
|
+
return pd.DataFrame.from_records(
|
595
|
+
[
|
596
|
+
feature_group.oci_feature_group.to_df_record()
|
597
|
+
for feature_group in self.feature_groups
|
598
|
+
]
|
599
|
+
)
|
600
|
+
|
601
|
+
@property
|
602
|
+
def partition_keys(self) -> List[str]:
|
603
|
+
return self.get_spec(self.CONST_PARTITION_KEYS)
|
604
|
+
|
605
|
+
@partition_keys.setter
|
606
|
+
def partition_keys(self, value: List[str]):
|
607
|
+
self.with_partition_keys(value)
|
608
|
+
|
609
|
+
def with_partition_keys(self, partition_keys: List[str]) -> "Dataset":
|
610
|
+
"""Sets the partition keys of the dataset.
|
611
|
+
|
612
|
+
Parameters
|
613
|
+
----------
|
614
|
+
partition_keys: List[str]
|
615
|
+
The List of partition keys for the feature group.
|
616
|
+
|
617
|
+
Returns
|
618
|
+
-------
|
619
|
+
FeatureGroup
|
620
|
+
The FeatureGroup instance (self)
|
621
|
+
"""
|
622
|
+
return self.set_spec(
|
623
|
+
self.CONST_PARTITION_KEYS,
|
624
|
+
{
|
625
|
+
self.CONST_ITEMS: [
|
626
|
+
{self.CONST_NAME: partition_key}
|
627
|
+
for partition_key in partition_keys or []
|
628
|
+
]
|
629
|
+
},
|
630
|
+
)
|
631
|
+
|
632
|
+
def add_models(self, model_details: ModelDetails) -> "Dataset":
|
633
|
+
"""Add model details to the dataset, Append to the existing model id list
|
634
|
+
|
635
|
+
Parameters
|
636
|
+
----------
|
637
|
+
model_details: ModelDetails to be appended to the existing model details
|
638
|
+
|
639
|
+
Returns
|
640
|
+
-------
|
641
|
+
Dataset
|
642
|
+
The Dataset instance (self).
|
643
|
+
"""
|
644
|
+
existing_model_details = self.model_details
|
645
|
+
if existing_model_details and existing_model_details.items:
|
646
|
+
items = existing_model_details["items"]
|
647
|
+
for item in items:
|
648
|
+
if item not in model_details.items:
|
649
|
+
model_details.items.append(item)
|
650
|
+
self.with_model_details(model_details)
|
651
|
+
try:
|
652
|
+
return self.update()
|
653
|
+
except Exception as ex:
|
654
|
+
logger.error(
|
655
|
+
f"Dataset update Failed with : {type(ex)} with error message: {ex}"
|
656
|
+
)
|
657
|
+
if existing_model_details:
|
658
|
+
self.with_model_details(
|
659
|
+
ModelDetails().with_items(existing_model_details["items"])
|
660
|
+
)
|
661
|
+
else:
|
662
|
+
self.with_model_details(ModelDetails().with_items([]))
|
663
|
+
return self
|
664
|
+
|
665
|
+
def remove_models(self, model_details: ModelDetails) -> "Dataset":
|
666
|
+
"""remove model details from the dataset, remove from the existing dataset model id list
|
667
|
+
|
668
|
+
Parameters
|
669
|
+
----------
|
670
|
+
model_details: ModelDetails to be removed from the existing model details
|
671
|
+
|
672
|
+
Returns
|
673
|
+
-------
|
674
|
+
Dataset
|
675
|
+
The Dataset instance (self).
|
676
|
+
"""
|
677
|
+
existing_model_details = self.model_details
|
678
|
+
if existing_model_details.items:
|
679
|
+
items = existing_model_details["items"]
|
680
|
+
if model_details.items and all(
|
681
|
+
item in items for item in model_details.items
|
682
|
+
):
|
683
|
+
model_details_input = list(set(items) - set(model_details.items))
|
684
|
+
self.with_model_details(ModelDetails().with_items(model_details_input))
|
685
|
+
return self.update()
|
686
|
+
else:
|
687
|
+
raise ValueError(
|
688
|
+
f"Can't get find the model details in associated dataset model ids {self.model_details}"
|
689
|
+
)
|
690
|
+
|
691
|
+
else:
|
692
|
+
raise ValueError(
|
693
|
+
f"Can't get find the model details in associated dataset model ids {self.model_details}"
|
694
|
+
)
|
695
|
+
|
696
|
+
def show(self, rankdir: str = GraphOrientation.LEFT_RIGHT) -> None:
|
697
|
+
"""
|
698
|
+
Show the lineage tree for the dataset instance.
|
699
|
+
|
700
|
+
Raises:
|
701
|
+
ValueError: If lineage graph cannot be plotted due to missing lineage information.
|
702
|
+
"""
|
703
|
+
lineage_type = {self.CONST_LINEAGE_CONSTRUCT_TYPE: EntityType.DATASET.value}
|
704
|
+
lineage = self.lineage.from_id(self.id, **lineage_type)
|
705
|
+
if lineage:
|
706
|
+
GraphService.view_lineage(lineage.data, EntityType.DATASET, rankdir)
|
707
|
+
else:
|
708
|
+
raise ValueError(
|
709
|
+
f"Can't get lineage information for Feature group id {self.id}"
|
710
|
+
)
|
711
|
+
|
712
|
+
def create(self, validate_sql=False, **kwargs) -> "Dataset":
|
713
|
+
"""Creates dataset resource.
|
714
|
+
|
715
|
+
!!! note "Lazy"
|
716
|
+
This method is lazy and does not persist any metadata or feature data in the
|
717
|
+
feature store on its own. To persist the dataset and save dataset data
|
718
|
+
along the metadata in the feature store, call the `materialise()`.
|
719
|
+
|
720
|
+
Parameters
|
721
|
+
----------
|
722
|
+
kwargs
|
723
|
+
Additional kwargs arguments.
|
724
|
+
Can be any attribute that `feature_store.models.Dataset` accepts.
|
725
|
+
validate_sql:
|
726
|
+
Boolean value indicating whether to validate sql before creating dataset
|
727
|
+
|
728
|
+
Returns
|
729
|
+
-------
|
730
|
+
Dataset
|
731
|
+
The Dataset instance (self)
|
732
|
+
|
733
|
+
Raises
|
734
|
+
------
|
735
|
+
ValueError
|
736
|
+
If compartment id not provided.
|
737
|
+
"""
|
738
|
+
|
739
|
+
self.compartment_id = OCIModelMixin.check_compartment_id(self.compartment_id)
|
740
|
+
|
741
|
+
if not self.name:
|
742
|
+
self.name = self._random_display_name()
|
743
|
+
|
744
|
+
if self.statistics_config is None:
|
745
|
+
self.statistics_config = StatisticsConfig()
|
746
|
+
|
747
|
+
if validate_sql is True:
|
748
|
+
self.spark_engine.sql(self.get_spec(self.CONST_QUERY))
|
749
|
+
|
750
|
+
payload = deepcopy(self._spec)
|
751
|
+
payload.pop("id", None)
|
752
|
+
logger.debug(f"Creating a dataset resource with payload {payload}")
|
753
|
+
|
754
|
+
# Create dataset
|
755
|
+
logger.info("Saving dataset.")
|
756
|
+
self.oci_dataset = self._to_oci_dataset(**kwargs).create()
|
757
|
+
self._update_from_oci_dataset_model(self.oci_dataset)
|
758
|
+
self.with_id(self.oci_dataset.id)
|
759
|
+
return self
|
760
|
+
|
761
|
+
def _build_dataset_job(self, ingestion_mode, feature_option_details=None):
|
762
|
+
dataset_job = (
|
763
|
+
DatasetJob()
|
764
|
+
.with_dataset_id(self.id)
|
765
|
+
.with_compartment_id(self.compartment_id)
|
766
|
+
.with_ingestion_mode(ingestion_mode)
|
767
|
+
)
|
768
|
+
|
769
|
+
if feature_option_details:
|
770
|
+
dataset_job = dataset_job.with_feature_option_details(
|
771
|
+
feature_option_details
|
772
|
+
)
|
773
|
+
return dataset_job
|
774
|
+
|
775
|
+
def delete(self):
|
776
|
+
"""Removes Dataset Resource.
|
777
|
+
|
778
|
+
Returns
|
779
|
+
-------
|
780
|
+
None
|
781
|
+
"""
|
782
|
+
# Create DataSet Job and persist it
|
783
|
+
dataset_job = self._build_dataset_job(BatchIngestionMode.DEFAULT)
|
784
|
+
|
785
|
+
# Create the Job
|
786
|
+
dataset_job.create()
|
787
|
+
dataset_execution_strategy = (
|
788
|
+
OciExecutionStrategyProvider.provide_execution_strategy(
|
789
|
+
execution_engine=ExecutionEngine.SPARK,
|
790
|
+
metastore_id=get_metastore_id(self.feature_store_id),
|
791
|
+
)
|
792
|
+
)
|
793
|
+
|
794
|
+
dataset_execution_strategy.delete_dataset(self, dataset_job)
|
795
|
+
|
796
|
+
def get_features(self) -> List[DatasetFeature]:
|
797
|
+
"""
|
798
|
+
Returns all the features in the dataset.
|
799
|
+
|
800
|
+
Returns:
|
801
|
+
List[DatasetFeature]
|
802
|
+
"""
|
803
|
+
|
804
|
+
return self.features
|
805
|
+
|
806
|
+
def get_features_df(self) -> "pandas.DataFrame":
|
807
|
+
"""
|
808
|
+
Returns all the features as pandas dataframe.
|
809
|
+
|
810
|
+
Returns:
|
811
|
+
pandas.DataFrame
|
812
|
+
"""
|
813
|
+
records = []
|
814
|
+
for feature in self.features:
|
815
|
+
records.append({"name": feature.feature_name, "type": feature.feature_type})
|
816
|
+
return pandas.DataFrame.from_records(records)
|
817
|
+
|
818
|
+
def update(self, **kwargs) -> "Dataset":
|
819
|
+
"""Updates Dataset in the feature store.
|
820
|
+
|
821
|
+
Parameters
|
822
|
+
----------
|
823
|
+
kwargs
|
824
|
+
Additional kwargs arguments.
|
825
|
+
Can be any attribute that `feature_store.models.Dataset` accepts.
|
826
|
+
|
827
|
+
Returns
|
828
|
+
-------
|
829
|
+
Dataset
|
830
|
+
The Dataset instance (self).
|
831
|
+
"""
|
832
|
+
|
833
|
+
if not self.id:
|
834
|
+
raise ValueError(
|
835
|
+
"Dataset needs to be saved to the feature store before it can be updated."
|
836
|
+
)
|
837
|
+
|
838
|
+
self.oci_dataset = self._to_oci_dataset(**kwargs).update()
|
839
|
+
return self
|
840
|
+
|
841
|
+
def _update_from_oci_dataset_model(self, oci_dataset: OCIDataset) -> "Dataset":
|
842
|
+
"""Update the properties from an OCIDataset object.
|
843
|
+
|
844
|
+
Parameters
|
845
|
+
----------
|
846
|
+
oci_dataset: OCIDataset
|
847
|
+
An instance of OCIDataset.
|
848
|
+
|
849
|
+
Returns
|
850
|
+
-------
|
851
|
+
Dataset
|
852
|
+
The Dataset instance (self).
|
853
|
+
"""
|
854
|
+
|
855
|
+
# Update the main properties
|
856
|
+
self.oci_dataset = oci_dataset
|
857
|
+
dataset_details = oci_dataset.to_dict()
|
858
|
+
|
859
|
+
for infra_attr, dsc_attr in self.attribute_map.items():
|
860
|
+
if infra_attr in dataset_details:
|
861
|
+
if infra_attr == self.CONST_OUTPUT_FEATURE_DETAILS:
|
862
|
+
# May not need if we fix the backend and add dataset_id to the output_feature
|
863
|
+
features_list = []
|
864
|
+
for output_feature in dataset_details[infra_attr]["items"]:
|
865
|
+
output_feature["datasetId"] = dataset_details[self.CONST_ID]
|
866
|
+
features_list.append(output_feature)
|
867
|
+
|
868
|
+
value = {self.CONST_ITEMS: features_list}
|
869
|
+
elif infra_attr == self.CONST_FEATURE_GROUP:
|
870
|
+
value = getattr(self.oci_dataset, dsc_attr)
|
871
|
+
else:
|
872
|
+
value = dataset_details[infra_attr]
|
873
|
+
self.set_spec(infra_attr, value)
|
874
|
+
return self
|
875
|
+
|
876
|
+
def materialise(
|
877
|
+
self,
|
878
|
+
ingestion_mode: BatchIngestionMode = BatchIngestionMode.OVERWRITE,
|
879
|
+
feature_option_details: FeatureOptionDetails = None,
|
880
|
+
):
|
881
|
+
"""Creates a dataset job.
|
882
|
+
|
883
|
+
Parameters
|
884
|
+
----------
|
885
|
+
ingestion_mode: dict(str, str), optional
|
886
|
+
The IngestionMode is used to specify the expected behavior of saving a DataFrame.
|
887
|
+
Defaults to OVERWRITE.
|
888
|
+
feature_option_details: FeatureOptionDetails
|
889
|
+
An instance of the FeatureOptionDetails class containing feature options.
|
890
|
+
|
891
|
+
Returns
|
892
|
+
-------
|
893
|
+
None
|
894
|
+
|
895
|
+
"""
|
896
|
+
|
897
|
+
# Build the job and persist it.
|
898
|
+
dataset_job = self._build_dataset_job(ingestion_mode, feature_option_details)
|
899
|
+
dataset_job = dataset_job.create()
|
900
|
+
# Update the dataset with corresponding job so that user can see the details about the job
|
901
|
+
self.with_job_id(dataset_job.id)
|
902
|
+
|
903
|
+
dataset_execution_strategy = (
|
904
|
+
OciExecutionStrategyProvider.provide_execution_strategy(
|
905
|
+
execution_engine=ExecutionEngine.SPARK,
|
906
|
+
metastore_id=get_metastore_id(self.feature_store_id),
|
907
|
+
)
|
908
|
+
)
|
909
|
+
|
910
|
+
dataset_execution_strategy.ingest_dataset(self, dataset_job)
|
911
|
+
|
912
|
+
def get_last_job(self) -> "DatasetJob":
|
913
|
+
"""Gets the Job details for the last running Dataset job.
|
914
|
+
|
915
|
+
Returns:
|
916
|
+
DatasetJob
|
917
|
+
"""
|
918
|
+
|
919
|
+
if not self.id:
|
920
|
+
raise ValueError(
|
921
|
+
"Dataset needs to be saved to the feature store before getting associated jobs."
|
922
|
+
)
|
923
|
+
|
924
|
+
if not self.job_id:
|
925
|
+
ds_job = DatasetJob.list(
|
926
|
+
dataset_id=self.id,
|
927
|
+
compartment_id=self.compartment_id,
|
928
|
+
sort_by="timeCreated",
|
929
|
+
limit="1",
|
930
|
+
)
|
931
|
+
if not ds_job:
|
932
|
+
raise ValueError(
|
933
|
+
"Unable to retrieve the associated last job. Please make sure you materialized the data."
|
934
|
+
)
|
935
|
+
self.with_job_id(ds_job[0].id)
|
936
|
+
return ds_job[0]
|
937
|
+
return DatasetJob.from_id(self.job_id)
|
938
|
+
|
939
|
+
@deprecated(details="preview functionality is deprecated. Please use as_of.")
|
940
|
+
def preview(
|
941
|
+
self,
|
942
|
+
row_count: int = 10,
|
943
|
+
version_number: int = None,
|
944
|
+
timestamp: datetime = None,
|
945
|
+
):
|
946
|
+
"""preview the dataset and return the response in dataframe.
|
947
|
+
|
948
|
+
Parameters
|
949
|
+
----------
|
950
|
+
timestamp: datetime
|
951
|
+
commit date time to preview in format yyyy-MM-dd or yyyy-MM-dd HH:mm:ss
|
952
|
+
commit date time is maintained for every ingestion commit using delta lake
|
953
|
+
version_number: int
|
954
|
+
commit version number for the preview. Version numbers are automatically versioned for every ingestion
|
955
|
+
commit using delta lake
|
956
|
+
row_count: int
|
957
|
+
preview row count
|
958
|
+
|
959
|
+
Returns
|
960
|
+
-------
|
961
|
+
spark dataframe
|
962
|
+
The preview result in spark dataframe
|
963
|
+
"""
|
964
|
+
self.check_resource_materialization()
|
965
|
+
|
966
|
+
validate_delta_format_parameters(timestamp, version_number)
|
967
|
+
target_table = f"{self.entity_id}.{self.name}"
|
968
|
+
|
969
|
+
if version_number or timestamp is not None:
|
970
|
+
logger.warning("Time travel queries are not supported in current version")
|
971
|
+
sql_query = f"select * from {target_table} LIMIT {row_count}"
|
972
|
+
|
973
|
+
return self.spark_engine.sql(sql_query)
|
974
|
+
|
975
|
+
def check_resource_materialization(self):
|
976
|
+
"""Checks whether the target Delta table for this resource has been materialized in Spark.
|
977
|
+
If the target Delta table doesn't exist, raises a NotMaterializedError with the type and name of this resource.
|
978
|
+
"""
|
979
|
+
if not self.spark_engine.is_delta_table_exists(self.target_delta_table()):
|
980
|
+
raise NotMaterializedError(self.type, self.name)
|
981
|
+
|
982
|
+
def as_of(
|
983
|
+
self,
|
984
|
+
version_number: int = None,
|
985
|
+
commit_timestamp: datetime = None,
|
986
|
+
):
|
987
|
+
"""preview the feature definition and return the response in dataframe.
|
988
|
+
|
989
|
+
Parameters
|
990
|
+
----------
|
991
|
+
commit_timestamp: datetime
|
992
|
+
commit date time to preview in format yyyy-MM-dd or yyyy-MM-dd HH:mm:ss
|
993
|
+
commit date time is maintained for every ingestion commit using delta lake
|
994
|
+
version_number: int
|
995
|
+
commit version number for the preview. Version numbers are automatically versioned for every ingestion
|
996
|
+
commit using delta lake
|
997
|
+
|
998
|
+
Returns
|
999
|
+
-------
|
1000
|
+
spark dataframe
|
1001
|
+
The preview result in spark dataframe
|
1002
|
+
"""
|
1003
|
+
self.check_resource_materialization()
|
1004
|
+
|
1005
|
+
validate_delta_format_parameters(commit_timestamp, version_number)
|
1006
|
+
target_table = self.target_delta_table()
|
1007
|
+
|
1008
|
+
return self.spark_engine.get_time_version_data(
|
1009
|
+
target_table, version_number, commit_timestamp
|
1010
|
+
)
|
1011
|
+
|
1012
|
+
def profile(self):
|
1013
|
+
"""Get the dataset profile information and return the response in dataframe.
|
1014
|
+
|
1015
|
+
Returns
|
1016
|
+
-------
|
1017
|
+
spark dataframe
|
1018
|
+
The profile result in spark dataframe
|
1019
|
+
"""
|
1020
|
+
self.check_resource_materialization()
|
1021
|
+
|
1022
|
+
target_table = f"{self.entity_id}.{self.name}"
|
1023
|
+
sql_query = f"DESCRIBE DETAIL {target_table}"
|
1024
|
+
|
1025
|
+
return self.spark_engine.sql(sql_query)
|
1026
|
+
|
1027
|
+
def restore(self, version_number: int = None, timestamp: datetime = None):
|
1028
|
+
"""restore the dataset and return the response in dataframe.
|
1029
|
+
|
1030
|
+
Parameters
|
1031
|
+
----------
|
1032
|
+
timestamp: datetime
|
1033
|
+
commit date time in format yyyy-MM-dd or yyyy-MM-dd HH:mm:ss to restore
|
1034
|
+
commit date time is maintained for every ingestion commit using delta lake
|
1035
|
+
version_number: int
|
1036
|
+
commit version number to restore. Version numbers are automatically versioned for every ingestion
|
1037
|
+
commit using delta lake
|
1038
|
+
Returns
|
1039
|
+
-------
|
1040
|
+
spark dataframe
|
1041
|
+
The restore output as spark dataframe
|
1042
|
+
"""
|
1043
|
+
self.check_resource_materialization()
|
1044
|
+
|
1045
|
+
validate_delta_format_parameters(timestamp, version_number, True)
|
1046
|
+
target_table = f"{self.entity_id}.{self.name}"
|
1047
|
+
if version_number is not None:
|
1048
|
+
sql_query = (
|
1049
|
+
f"RESTORE TABLE {target_table} TO VERSION AS OF {version_number}"
|
1050
|
+
)
|
1051
|
+
else:
|
1052
|
+
iso_timestamp = timestamp.isoformat(" ", "seconds").__str__()
|
1053
|
+
sql_query = (
|
1054
|
+
f"RESTORE TABLE {target_table} TO TIMESTAMP AS OF {iso_timestamp}"
|
1055
|
+
)
|
1056
|
+
|
1057
|
+
restore_output = self.spark_engine.sql(sql_query)
|
1058
|
+
|
1059
|
+
feature_group_execution_strategy = (
|
1060
|
+
OciExecutionStrategyProvider.provide_execution_strategy(
|
1061
|
+
execution_engine=ExecutionEngine.SPARK,
|
1062
|
+
metastore_id=get_metastore_id(self.feature_store_id),
|
1063
|
+
)
|
1064
|
+
)
|
1065
|
+
|
1066
|
+
feature_group_execution_strategy.update_dataset_features(self, target_table)
|
1067
|
+
return restore_output
|
1068
|
+
|
1069
|
+
def history(self):
|
1070
|
+
"""get the dataset commit history.
|
1071
|
+
|
1072
|
+
Returns
|
1073
|
+
-------
|
1074
|
+
spark dataframe
|
1075
|
+
The history output as spark dataframe
|
1076
|
+
"""
|
1077
|
+
target_table = f"{self.entity_id}.{self.name}"
|
1078
|
+
sql_query = f"DESCRIBE HISTORY {target_table}"
|
1079
|
+
return self.spark_engine.sql(sql_query)
|
1080
|
+
|
1081
|
+
def get_statistics(self, job_id: str = None) -> "Statistics":
|
1082
|
+
"""Retrieve Statistics object for the job with job_id
|
1083
|
+
if job_id is not specified the last run job will be considered.
|
1084
|
+
Args:
|
1085
|
+
job_id (str): [job id of the job for which the statistics need to be retrieved]
|
1086
|
+
|
1087
|
+
Returns:
|
1088
|
+
[type]: [Statistics]
|
1089
|
+
"""
|
1090
|
+
if not self.id:
|
1091
|
+
raise ValueError(
|
1092
|
+
"Dataset needs to be saved to the feature store before retrieving the statistics"
|
1093
|
+
)
|
1094
|
+
|
1095
|
+
stat_job_id = job_id if job_id is not None else self.get_last_job().id
|
1096
|
+
|
1097
|
+
# TODO: take the one in memory or will list down job ids and find the latest
|
1098
|
+
dataset_job = DatasetJob.from_id(stat_job_id)
|
1099
|
+
if self.id != dataset_job.dataset_id:
|
1100
|
+
raise ValueError("The specified job id does not belong to this dataset")
|
1101
|
+
output_details = dataset_job.job_output_details
|
1102
|
+
feature_statistics = (
|
1103
|
+
output_details.get("featureStatistics") if output_details else None
|
1104
|
+
)
|
1105
|
+
stat_version = output_details.get("version") if output_details else None
|
1106
|
+
version = stat_version if stat_version is not None else 1
|
1107
|
+
|
1108
|
+
return Statistics(feature_statistics, version)
|
1109
|
+
|
1110
|
+
def get_validation_output(self, job_id: str = None) -> "ValidationOutput":
|
1111
|
+
"""Retrieve Statistics object for the job with job_id
|
1112
|
+
if job_id is not specified the last run job will be considered.
|
1113
|
+
Args:
|
1114
|
+
job_id (str): [job id of the job for which the validation report need to be retrieved]
|
1115
|
+
|
1116
|
+
Returns:
|
1117
|
+
ValidationOutput -- The validation output data in DataFrame format.
|
1118
|
+
"""
|
1119
|
+
|
1120
|
+
if not self.id:
|
1121
|
+
raise ValueError(
|
1122
|
+
"Dataset needs to be saved to the feature store before retrieving the validation report"
|
1123
|
+
)
|
1124
|
+
|
1125
|
+
validation_job_id = job_id if job_id is not None else self.get_last_job().id
|
1126
|
+
|
1127
|
+
# retrieve the validation output JSON from data_flow_batch_execution_output
|
1128
|
+
dataset_job = DatasetJob.from_id(validation_job_id)
|
1129
|
+
output_details = dataset_job.job_output_details
|
1130
|
+
validation_output = (
|
1131
|
+
output_details.get("validationOutput") if output_details else None
|
1132
|
+
)
|
1133
|
+
return ValidationOutput(validation_output)
|
1134
|
+
|
1135
|
+
@classmethod
|
1136
|
+
def list_df(cls, compartment_id: str = None, **kwargs) -> "pandas.DataFrame":
|
1137
|
+
"""Lists dataset resources in a given compartment.
|
1138
|
+
|
1139
|
+
Parameters
|
1140
|
+
----------
|
1141
|
+
compartment_id: (str, optional). Defaults to `None`.
|
1142
|
+
The compartment OCID.
|
1143
|
+
kwargs
|
1144
|
+
Additional keyword arguments for filtering models.
|
1145
|
+
|
1146
|
+
Returns
|
1147
|
+
-------
|
1148
|
+
pandas.DataFrame
|
1149
|
+
The list of the dataset resources in a pandas dataframe format.
|
1150
|
+
"""
|
1151
|
+
records = []
|
1152
|
+
for oci_dataset in OCIDataset.list_resource(compartment_id, **kwargs):
|
1153
|
+
records.append(
|
1154
|
+
{
|
1155
|
+
"id": oci_dataset.id,
|
1156
|
+
"name": oci_dataset.name,
|
1157
|
+
"description": oci_dataset.description,
|
1158
|
+
"time_created": oci_dataset.time_created.strftime(
|
1159
|
+
utils.date_format
|
1160
|
+
),
|
1161
|
+
"time_updated": oci_dataset.time_updated.strftime(
|
1162
|
+
utils.date_format
|
1163
|
+
),
|
1164
|
+
"lifecycle_state": oci_dataset.lifecycle_state,
|
1165
|
+
"created_by": f"...{oci_dataset.created_by[-6:]}",
|
1166
|
+
"compartment_id": f"...{oci_dataset.compartment_id[-6:]}",
|
1167
|
+
"feature_store_id": oci_dataset.feature_store_id,
|
1168
|
+
"entity_id": oci_dataset.entity_id,
|
1169
|
+
"query": oci_dataset.query,
|
1170
|
+
"dataset_ingestion_mode": oci_dataset.dataset_ingestion_mode,
|
1171
|
+
"expectation_details": oci_dataset.expectation_details,
|
1172
|
+
}
|
1173
|
+
)
|
1174
|
+
return pandas.DataFrame.from_records(records)
|
1175
|
+
|
1176
|
+
@classmethod
|
1177
|
+
def list(cls, compartment_id: str = None, **kwargs) -> List["Dataset"]:
|
1178
|
+
"""Lists Dataset Resources in a given compartment.
|
1179
|
+
|
1180
|
+
Parameters
|
1181
|
+
----------
|
1182
|
+
compartment_id: (str, optional). Defaults to `None`.
|
1183
|
+
The compartment OCID.
|
1184
|
+
kwargs
|
1185
|
+
Additional keyword arguments for filtering Dataset.
|
1186
|
+
|
1187
|
+
Returns
|
1188
|
+
-------
|
1189
|
+
List[Dataset]
|
1190
|
+
The list of the Dataset Resources.
|
1191
|
+
"""
|
1192
|
+
return [
|
1193
|
+
cls()._update_from_oci_dataset_model(oci_dataset)
|
1194
|
+
for oci_dataset in OCIDataset.list_resource(compartment_id, **kwargs)
|
1195
|
+
]
|
1196
|
+
|
1197
|
+
def _random_display_name(self):
|
1198
|
+
"""Generates a random display name."""
|
1199
|
+
return f"{self._PREFIX}-{utils.get_random_name_for_resource()}"
|
1200
|
+
|
1201
|
+
def to_dict(self) -> Dict:
|
1202
|
+
"""Serializes dataset to a dictionary.
|
1203
|
+
|
1204
|
+
Returns
|
1205
|
+
-------
|
1206
|
+
dict
|
1207
|
+
The dataset resource serialized as a dictionary.
|
1208
|
+
"""
|
1209
|
+
|
1210
|
+
spec = deepcopy(self._spec)
|
1211
|
+
for key, value in spec.items():
|
1212
|
+
if hasattr(value, "to_dict"):
|
1213
|
+
value = value.to_dict()
|
1214
|
+
if key == self.CONST_FEATURE_GROUP:
|
1215
|
+
spec[
|
1216
|
+
key
|
1217
|
+
] = self.oci_dataset.client.base_client.sanitize_for_serialization(
|
1218
|
+
value
|
1219
|
+
)
|
1220
|
+
else:
|
1221
|
+
spec[key] = value
|
1222
|
+
return {
|
1223
|
+
"kind": self.kind,
|
1224
|
+
"type": self.type,
|
1225
|
+
"spec": utils.batch_convert_case(spec, "camel"),
|
1226
|
+
}
|
1227
|
+
|
1228
|
+
def __repr__(self) -> str:
|
1229
|
+
"""Displays the object as YAML."""
|
1230
|
+
return self.to_yaml()
|