oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +40 -0
- ads/aqua/app.py +507 -0
- ads/aqua/cli.py +96 -0
- ads/aqua/client/__init__.py +3 -0
- ads/aqua/client/client.py +836 -0
- ads/aqua/client/openai_client.py +305 -0
- ads/aqua/common/__init__.py +5 -0
- ads/aqua/common/decorator.py +125 -0
- ads/aqua/common/entities.py +274 -0
- ads/aqua/common/enums.py +134 -0
- ads/aqua/common/errors.py +109 -0
- ads/aqua/common/utils.py +1295 -0
- ads/aqua/config/__init__.py +4 -0
- ads/aqua/config/container_config.py +246 -0
- ads/aqua/config/evaluation/__init__.py +4 -0
- ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
- ads/aqua/config/utils/__init__.py +4 -0
- ads/aqua/config/utils/serializer.py +339 -0
- ads/aqua/constants.py +116 -0
- ads/aqua/data.py +14 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation/__init__.py +8 -0
- ads/aqua/evaluation/constants.py +53 -0
- ads/aqua/evaluation/entities.py +186 -0
- ads/aqua/evaluation/errors.py +70 -0
- ads/aqua/evaluation/evaluation.py +1814 -0
- ads/aqua/extension/__init__.py +42 -0
- ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
- ads/aqua/extension/base_handler.py +90 -0
- ads/aqua/extension/common_handler.py +121 -0
- ads/aqua/extension/common_ws_msg_handler.py +36 -0
- ads/aqua/extension/deployment_handler.py +381 -0
- ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
- ads/aqua/extension/errors.py +30 -0
- ads/aqua/extension/evaluation_handler.py +129 -0
- ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
- ads/aqua/extension/finetune_handler.py +96 -0
- ads/aqua/extension/model_handler.py +390 -0
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +145 -0
- ads/aqua/extension/models_ws_msg_handler.py +50 -0
- ads/aqua/extension/ui_handler.py +300 -0
- ads/aqua/extension/ui_websocket_handler.py +130 -0
- ads/aqua/extension/utils.py +133 -0
- ads/aqua/finetuning/__init__.py +7 -0
- ads/aqua/finetuning/constants.py +23 -0
- ads/aqua/finetuning/entities.py +181 -0
- ads/aqua/finetuning/finetuning.py +749 -0
- ads/aqua/model/__init__.py +8 -0
- ads/aqua/model/constants.py +60 -0
- ads/aqua/model/entities.py +385 -0
- ads/aqua/model/enums.py +32 -0
- ads/aqua/model/model.py +2134 -0
- ads/aqua/model/utils.py +52 -0
- ads/aqua/modeldeployment/__init__.py +6 -0
- ads/aqua/modeldeployment/constants.py +10 -0
- ads/aqua/modeldeployment/deployment.py +1315 -0
- ads/aqua/modeldeployment/entities.py +653 -0
- ads/aqua/modeldeployment/utils.py +543 -0
- ads/aqua/resources/gpu_shapes_index.json +94 -0
- ads/aqua/server/__init__.py +4 -0
- ads/aqua/server/__main__.py +24 -0
- ads/aqua/server/app.py +47 -0
- ads/aqua/server/aqua_spec.yml +1291 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +476 -0
- ads/aqua/ui.py +519 -0
- ads/automl/__init__.py +9 -0
- ads/automl/driver.py +330 -0
- ads/automl/provider.py +975 -0
- ads/bds/__init__.py +5 -0
- ads/bds/auth.py +127 -0
- ads/bds/big_data_service.py +255 -0
- ads/catalog/__init__.py +19 -0
- ads/catalog/model.py +1576 -0
- ads/catalog/notebook.py +461 -0
- ads/catalog/project.py +468 -0
- ads/catalog/summary.py +178 -0
- ads/common/__init__.py +11 -0
- ads/common/analyzer.py +65 -0
- ads/common/artifact/.model-ignore +63 -0
- ads/common/artifact/__init__.py +10 -0
- ads/common/auth.py +1122 -0
- ads/common/card_identifier.py +83 -0
- ads/common/config.py +647 -0
- ads/common/data.py +165 -0
- ads/common/decorator/__init__.py +9 -0
- ads/common/decorator/argument_to_case.py +88 -0
- ads/common/decorator/deprecate.py +69 -0
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/decorator/runtime_dependency.py +178 -0
- ads/common/decorator/threaded.py +97 -0
- ads/common/decorator/utils.py +35 -0
- ads/common/dsc_file_system.py +303 -0
- ads/common/error.py +14 -0
- ads/common/extended_enum.py +81 -0
- ads/common/function/__init__.py +5 -0
- ads/common/function/fn_util.py +142 -0
- ads/common/function/func_conf.yaml +25 -0
- ads/common/ipython.py +76 -0
- ads/common/model.py +679 -0
- ads/common/model_artifact.py +1759 -0
- ads/common/model_artifact_schema.json +107 -0
- ads/common/model_export_util.py +664 -0
- ads/common/model_metadata.py +24 -0
- ads/common/object_storage_details.py +296 -0
- ads/common/oci_client.py +179 -0
- ads/common/oci_datascience.py +46 -0
- ads/common/oci_logging.py +1144 -0
- ads/common/oci_mixin.py +957 -0
- ads/common/oci_resource.py +136 -0
- ads/common/serializer.py +559 -0
- ads/common/utils.py +1852 -0
- ads/common/word_lists.py +1491 -0
- ads/common/work_request.py +189 -0
- ads/config.py +1 -0
- ads/data_labeling/__init__.py +13 -0
- ads/data_labeling/boundingbox.py +253 -0
- ads/data_labeling/constants.py +47 -0
- ads/data_labeling/data_labeling_service.py +244 -0
- ads/data_labeling/interface/__init__.py +5 -0
- ads/data_labeling/interface/loader.py +16 -0
- ads/data_labeling/interface/parser.py +16 -0
- ads/data_labeling/interface/reader.py +23 -0
- ads/data_labeling/loader/__init__.py +5 -0
- ads/data_labeling/loader/file_loader.py +241 -0
- ads/data_labeling/metadata.py +110 -0
- ads/data_labeling/mixin/__init__.py +5 -0
- ads/data_labeling/mixin/data_labeling.py +232 -0
- ads/data_labeling/ner.py +129 -0
- ads/data_labeling/parser/__init__.py +5 -0
- ads/data_labeling/parser/dls_record_parser.py +388 -0
- ads/data_labeling/parser/export_metadata_parser.py +94 -0
- ads/data_labeling/parser/export_record_parser.py +473 -0
- ads/data_labeling/reader/__init__.py +5 -0
- ads/data_labeling/reader/dataset_reader.py +574 -0
- ads/data_labeling/reader/dls_record_reader.py +121 -0
- ads/data_labeling/reader/export_record_reader.py +62 -0
- ads/data_labeling/reader/jsonl_reader.py +75 -0
- ads/data_labeling/reader/metadata_reader.py +203 -0
- ads/data_labeling/reader/record_reader.py +263 -0
- ads/data_labeling/record.py +52 -0
- ads/data_labeling/visualizer/__init__.py +5 -0
- ads/data_labeling/visualizer/image_visualizer.py +525 -0
- ads/data_labeling/visualizer/text_visualizer.py +357 -0
- ads/database/__init__.py +5 -0
- ads/database/connection.py +338 -0
- ads/dataset/__init__.py +10 -0
- ads/dataset/capabilities.md +51 -0
- ads/dataset/classification_dataset.py +339 -0
- ads/dataset/correlation.py +226 -0
- ads/dataset/correlation_plot.py +563 -0
- ads/dataset/dask_series.py +173 -0
- ads/dataset/dataframe_transformer.py +110 -0
- ads/dataset/dataset.py +1979 -0
- ads/dataset/dataset_browser.py +360 -0
- ads/dataset/dataset_with_target.py +995 -0
- ads/dataset/exception.py +25 -0
- ads/dataset/factory.py +987 -0
- ads/dataset/feature_engineering_transformer.py +35 -0
- ads/dataset/feature_selection.py +107 -0
- ads/dataset/forecasting_dataset.py +26 -0
- ads/dataset/helper.py +1450 -0
- ads/dataset/label_encoder.py +99 -0
- ads/dataset/mixin/__init__.py +5 -0
- ads/dataset/mixin/dataset_accessor.py +134 -0
- ads/dataset/pipeline.py +58 -0
- ads/dataset/plot.py +710 -0
- ads/dataset/progress.py +86 -0
- ads/dataset/recommendation.py +297 -0
- ads/dataset/recommendation_transformer.py +502 -0
- ads/dataset/regression_dataset.py +14 -0
- ads/dataset/sampled_dataset.py +1050 -0
- ads/dataset/target.py +98 -0
- ads/dataset/timeseries.py +18 -0
- ads/dbmixin/__init__.py +5 -0
- ads/dbmixin/db_pandas_accessor.py +153 -0
- ads/environment/__init__.py +9 -0
- ads/environment/ml_runtime.py +66 -0
- ads/evaluations/README.md +14 -0
- ads/evaluations/__init__.py +109 -0
- ads/evaluations/evaluation_plot.py +983 -0
- ads/evaluations/evaluator.py +1334 -0
- ads/evaluations/statistical_metrics.py +543 -0
- ads/experiments/__init__.py +9 -0
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +21 -0
- ads/explanations/base_explainer.py +142 -0
- ads/explanations/capabilities.md +83 -0
- ads/explanations/explainer.py +190 -0
- ads/explanations/mlx_global_explainer.py +1050 -0
- ads/explanations/mlx_interface.py +386 -0
- ads/explanations/mlx_local_explainer.py +287 -0
- ads/explanations/mlx_whatif_explainer.py +201 -0
- ads/feature_engineering/__init__.py +20 -0
- ads/feature_engineering/accessor/__init__.py +5 -0
- ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
- ads/feature_engineering/accessor/mixin/__init__.py +5 -0
- ads/feature_engineering/accessor/mixin/correlation.py +166 -0
- ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
- ads/feature_engineering/accessor/mixin/utils.py +65 -0
- ads/feature_engineering/accessor/series_accessor.py +431 -0
- ads/feature_engineering/adsimage/__init__.py +5 -0
- ads/feature_engineering/adsimage/image.py +192 -0
- ads/feature_engineering/adsimage/image_reader.py +170 -0
- ads/feature_engineering/adsimage/interface/__init__.py +5 -0
- ads/feature_engineering/adsimage/interface/reader.py +19 -0
- ads/feature_engineering/adsstring/__init__.py +7 -0
- ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
- ads/feature_engineering/adsstring/string/__init__.py +8 -0
- ads/feature_engineering/data_schema.json +57 -0
- ads/feature_engineering/dataset/__init__.py +5 -0
- ads/feature_engineering/dataset/zip_code_data.py +42062 -0
- ads/feature_engineering/exceptions.py +40 -0
- ads/feature_engineering/feature_type/__init__.py +133 -0
- ads/feature_engineering/feature_type/address.py +184 -0
- ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
- ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
- ads/feature_engineering/feature_type/adsstring/string.py +258 -0
- ads/feature_engineering/feature_type/base.py +58 -0
- ads/feature_engineering/feature_type/boolean.py +183 -0
- ads/feature_engineering/feature_type/category.py +146 -0
- ads/feature_engineering/feature_type/constant.py +137 -0
- ads/feature_engineering/feature_type/continuous.py +151 -0
- ads/feature_engineering/feature_type/creditcard.py +314 -0
- ads/feature_engineering/feature_type/datetime.py +190 -0
- ads/feature_engineering/feature_type/discrete.py +134 -0
- ads/feature_engineering/feature_type/document.py +43 -0
- ads/feature_engineering/feature_type/gis.py +251 -0
- ads/feature_engineering/feature_type/handler/__init__.py +5 -0
- ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
- ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
- ads/feature_engineering/feature_type/handler/warnings.py +128 -0
- ads/feature_engineering/feature_type/integer.py +142 -0
- ads/feature_engineering/feature_type/ip_address.py +144 -0
- ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
- ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
- ads/feature_engineering/feature_type/lat_long.py +256 -0
- ads/feature_engineering/feature_type/object.py +43 -0
- ads/feature_engineering/feature_type/ordinal.py +132 -0
- ads/feature_engineering/feature_type/phone_number.py +135 -0
- ads/feature_engineering/feature_type/string.py +171 -0
- ads/feature_engineering/feature_type/text.py +93 -0
- ads/feature_engineering/feature_type/unknown.py +43 -0
- ads/feature_engineering/feature_type/zip_code.py +164 -0
- ads/feature_engineering/feature_type_manager.py +406 -0
- ads/feature_engineering/schema.py +795 -0
- ads/feature_engineering/utils.py +245 -0
- ads/feature_store/.readthedocs.yaml +19 -0
- ads/feature_store/README.md +65 -0
- ads/feature_store/__init__.py +9 -0
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +339 -0
- ads/feature_store/common/exceptions.py +18 -0
- ads/feature_store/common/spark_session_singleton.py +125 -0
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
- ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
- ads/feature_store/common/utils/transformation_utils.py +82 -0
- ads/feature_store/common/utils/utility.py +403 -0
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +129 -0
- ads/feature_store/dataset.py +1230 -0
- ads/feature_store/dataset_job.py +530 -0
- ads/feature_store/docs/Dockerfile +7 -0
- ads/feature_store/docs/Makefile +44 -0
- ads/feature_store/docs/conf.py +28 -0
- ads/feature_store/docs/requirements.txt +14 -0
- ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
- ads/feature_store/docs/source/cicd.rst +137 -0
- ads/feature_store/docs/source/conf.py +86 -0
- ads/feature_store/docs/source/data_versioning.rst +33 -0
- ads/feature_store/docs/source/dataset.rst +388 -0
- ads/feature_store/docs/source/dataset_job.rst +27 -0
- ads/feature_store/docs/source/demo.rst +70 -0
- ads/feature_store/docs/source/entity.rst +78 -0
- ads/feature_store/docs/source/feature_group.rst +624 -0
- ads/feature_store/docs/source/feature_group_job.rst +29 -0
- ads/feature_store/docs/source/feature_store.rst +122 -0
- ads/feature_store/docs/source/feature_store_class.rst +123 -0
- ads/feature_store/docs/source/feature_validation.rst +66 -0
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +81 -0
- ads/feature_store/docs/source/module.rst +8 -0
- ads/feature_store/docs/source/notebook.rst +94 -0
- ads/feature_store/docs/source/overview.rst +47 -0
- ads/feature_store/docs/source/quickstart.rst +176 -0
- ads/feature_store/docs/source/release_notes.rst +194 -0
- ads/feature_store/docs/source/setup_feature_store.rst +81 -0
- ads/feature_store/docs/source/statistics.rst +58 -0
- ads/feature_store/docs/source/transformation.rst +199 -0
- ads/feature_store/docs/source/ui.rst +65 -0
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
- ads/feature_store/entity.py +718 -0
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
- ads/feature_store/execution_strategy/execution_strategy.py +113 -0
- ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
- ads/feature_store/feature.py +192 -0
- ads/feature_store/feature_group.py +1494 -0
- ads/feature_store/feature_group_expectation.py +346 -0
- ads/feature_store/feature_group_job.py +602 -0
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +180 -0
- ads/feature_store/feature_option_details.py +50 -0
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +99 -0
- ads/feature_store/feature_store.py +699 -0
- ads/feature_store/feature_store_registrar.py +518 -0
- ads/feature_store/input_feature_detail.py +149 -0
- ads/feature_store/mixin/__init__.py +4 -0
- ads/feature_store/mixin/oci_feature_store.py +145 -0
- ads/feature_store/model_details.py +73 -0
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +266 -0
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +298 -0
- ads/feature_store/query/join.py +161 -0
- ads/feature_store/query/query.py +403 -0
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +57 -0
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +68 -0
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +139 -0
- ads/feature_store/service/oci_dataset_job.py +199 -0
- ads/feature_store/service/oci_entity.py +125 -0
- ads/feature_store/service/oci_feature_group.py +164 -0
- ads/feature_store/service/oci_feature_group_job.py +214 -0
- ads/feature_store/service/oci_feature_store.py +182 -0
- ads/feature_store/service/oci_lineage.py +87 -0
- ads/feature_store/service/oci_transformation.py +104 -0
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +49 -0
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
- ads/feature_store/statistics/charts/box_plot.py +148 -0
- ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
- ads/feature_store/statistics/charts/probability_distribution.py +68 -0
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
- ads/feature_store/statistics/feature_stat.py +126 -0
- ads/feature_store/statistics/generic_feature_value.py +33 -0
- ads/feature_store/statistics/statistics.py +41 -0
- ads/feature_store/statistics_config.py +101 -0
- ads/feature_store/templates/feature_store_template.yaml +45 -0
- ads/feature_store/transformation.py +499 -0
- ads/feature_store/validation_output.py +57 -0
- ads/hpo/__init__.py +9 -0
- ads/hpo/_imports.py +91 -0
- ads/hpo/ads_search_space.py +439 -0
- ads/hpo/distributions.py +325 -0
- ads/hpo/objective.py +280 -0
- ads/hpo/search_cv.py +1657 -0
- ads/hpo/stopping_criterion.py +75 -0
- ads/hpo/tuner_artifact.py +413 -0
- ads/hpo/utils.py +91 -0
- ads/hpo/validation.py +140 -0
- ads/hpo/visualization/__init__.py +5 -0
- ads/hpo/visualization/_contour.py +23 -0
- ads/hpo/visualization/_edf.py +20 -0
- ads/hpo/visualization/_intermediate_values.py +21 -0
- ads/hpo/visualization/_optimization_history.py +25 -0
- ads/hpo/visualization/_parallel_coordinate.py +169 -0
- ads/hpo/visualization/_param_importances.py +26 -0
- ads/jobs/__init__.py +53 -0
- ads/jobs/ads_job.py +663 -0
- ads/jobs/builders/__init__.py +5 -0
- ads/jobs/builders/base.py +156 -0
- ads/jobs/builders/infrastructure/__init__.py +6 -0
- ads/jobs/builders/infrastructure/base.py +165 -0
- ads/jobs/builders/infrastructure/dataflow.py +1252 -0
- ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
- ads/jobs/builders/infrastructure/utils.py +65 -0
- ads/jobs/builders/runtimes/__init__.py +5 -0
- ads/jobs/builders/runtimes/artifact.py +338 -0
- ads/jobs/builders/runtimes/base.py +325 -0
- ads/jobs/builders/runtimes/container_runtime.py +242 -0
- ads/jobs/builders/runtimes/python_runtime.py +1016 -0
- ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
- ads/jobs/cli.py +104 -0
- ads/jobs/env_var_parser.py +131 -0
- ads/jobs/extension.py +160 -0
- ads/jobs/schema/__init__.py +5 -0
- ads/jobs/schema/infrastructure_schema.json +116 -0
- ads/jobs/schema/job_schema.json +42 -0
- ads/jobs/schema/runtime_schema.json +183 -0
- ads/jobs/schema/validator.py +141 -0
- ads/jobs/serializer.py +296 -0
- ads/jobs/templates/__init__.py +5 -0
- ads/jobs/templates/container.py +6 -0
- ads/jobs/templates/driver_notebook.py +177 -0
- ads/jobs/templates/driver_oci.py +500 -0
- ads/jobs/templates/driver_python.py +48 -0
- ads/jobs/templates/driver_pytorch.py +852 -0
- ads/jobs/templates/driver_utils.py +615 -0
- ads/jobs/templates/hostname_from_env.c +55 -0
- ads/jobs/templates/oci_metrics.py +181 -0
- ads/jobs/utils.py +104 -0
- ads/llm/__init__.py +28 -0
- ads/llm/autogen/__init__.py +2 -0
- ads/llm/autogen/constants.py +15 -0
- ads/llm/autogen/reports/__init__.py +2 -0
- ads/llm/autogen/reports/base.py +67 -0
- ads/llm/autogen/reports/data.py +103 -0
- ads/llm/autogen/reports/session.py +526 -0
- ads/llm/autogen/reports/templates/chat_box.html +13 -0
- ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
- ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
- ads/llm/autogen/reports/utils.py +56 -0
- ads/llm/autogen/v02/__init__.py +4 -0
- ads/llm/autogen/v02/client.py +295 -0
- ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
- ads/llm/autogen/v02/loggers/__init__.py +6 -0
- ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
- ads/llm/autogen/v02/loggers/session_logger.py +580 -0
- ads/llm/autogen/v02/loggers/utils.py +86 -0
- ads/llm/autogen/v02/runtime_logging.py +163 -0
- ads/llm/chain.py +268 -0
- ads/llm/chat_template.py +31 -0
- ads/llm/deploy.py +63 -0
- ads/llm/guardrails/__init__.py +5 -0
- ads/llm/guardrails/base.py +442 -0
- ads/llm/guardrails/huggingface.py +44 -0
- ads/llm/langchain/__init__.py +5 -0
- ads/llm/langchain/plugins/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
- ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
- ads/llm/langchain/plugins/llms/__init__.py +5 -0
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
- ads/llm/requirements.txt +3 -0
- ads/llm/serialize.py +219 -0
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +153 -0
- ads/llm/serializers/runnable_parallel.py +27 -0
- ads/llm/templates/score_chain.jinja2 +155 -0
- ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
- ads/model/__init__.py +52 -0
- ads/model/artifact.py +573 -0
- ads/model/artifact_downloader.py +254 -0
- ads/model/artifact_uploader.py +267 -0
- ads/model/base_properties.py +238 -0
- ads/model/common/.model-ignore +66 -0
- ads/model/common/__init__.py +5 -0
- ads/model/common/utils.py +142 -0
- ads/model/datascience_model.py +2635 -0
- ads/model/deployment/__init__.py +20 -0
- ads/model/deployment/common/__init__.py +5 -0
- ads/model/deployment/common/utils.py +308 -0
- ads/model/deployment/model_deployer.py +466 -0
- ads/model/deployment/model_deployment.py +1846 -0
- ads/model/deployment/model_deployment_infrastructure.py +671 -0
- ads/model/deployment/model_deployment_properties.py +493 -0
- ads/model/deployment/model_deployment_runtime.py +838 -0
- ads/model/extractor/__init__.py +5 -0
- ads/model/extractor/automl_extractor.py +74 -0
- ads/model/extractor/embedding_onnx_extractor.py +80 -0
- ads/model/extractor/huggingface_extractor.py +88 -0
- ads/model/extractor/keras_extractor.py +84 -0
- ads/model/extractor/lightgbm_extractor.py +93 -0
- ads/model/extractor/model_info_extractor.py +114 -0
- ads/model/extractor/model_info_extractor_factory.py +105 -0
- ads/model/extractor/pytorch_extractor.py +87 -0
- ads/model/extractor/sklearn_extractor.py +112 -0
- ads/model/extractor/spark_extractor.py +89 -0
- ads/model/extractor/tensorflow_extractor.py +85 -0
- ads/model/extractor/xgboost_extractor.py +94 -0
- ads/model/framework/__init__.py +5 -0
- ads/model/framework/automl_model.py +178 -0
- ads/model/framework/embedding_onnx_model.py +438 -0
- ads/model/framework/huggingface_model.py +399 -0
- ads/model/framework/lightgbm_model.py +266 -0
- ads/model/framework/pytorch_model.py +266 -0
- ads/model/framework/sklearn_model.py +250 -0
- ads/model/framework/spark_model.py +326 -0
- ads/model/framework/tensorflow_model.py +254 -0
- ads/model/framework/xgboost_model.py +258 -0
- ads/model/generic_model.py +3518 -0
- ads/model/model_artifact_boilerplate/README.md +381 -0
- ads/model/model_artifact_boilerplate/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
- ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
- ads/model/model_artifact_boilerplate/score.py +61 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_introspect.py +331 -0
- ads/model/model_metadata.py +1810 -0
- ads/model/model_metadata_mixin.py +460 -0
- ads/model/model_properties.py +63 -0
- ads/model/model_version_set.py +739 -0
- ads/model/runtime/__init__.py +5 -0
- ads/model/runtime/env_info.py +306 -0
- ads/model/runtime/model_deployment_details.py +37 -0
- ads/model/runtime/model_provenance_details.py +58 -0
- ads/model/runtime/runtime_info.py +81 -0
- ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
- ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
- ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
- ads/model/runtime/utils.py +201 -0
- ads/model/serde/__init__.py +5 -0
- ads/model/serde/common.py +40 -0
- ads/model/serde/model_input.py +547 -0
- ads/model/serde/model_serializer.py +1184 -0
- ads/model/service/__init__.py +5 -0
- ads/model/service/oci_datascience_model.py +1076 -0
- ads/model/service/oci_datascience_model_deployment.py +500 -0
- ads/model/service/oci_datascience_model_version_set.py +176 -0
- ads/model/transformer/__init__.py +5 -0
- ads/model/transformer/onnx_transformer.py +324 -0
- ads/mysqldb/__init__.py +5 -0
- ads/mysqldb/mysql_db.py +227 -0
- ads/opctl/__init__.py +18 -0
- ads/opctl/anomaly_detection.py +11 -0
- ads/opctl/backend/__init__.py +5 -0
- ads/opctl/backend/ads_dataflow.py +353 -0
- ads/opctl/backend/ads_ml_job.py +710 -0
- ads/opctl/backend/ads_ml_pipeline.py +164 -0
- ads/opctl/backend/ads_model_deployment.py +209 -0
- ads/opctl/backend/base.py +146 -0
- ads/opctl/backend/local.py +1053 -0
- ads/opctl/backend/marketplace/__init__.py +9 -0
- ads/opctl/backend/marketplace/helm_helper.py +173 -0
- ads/opctl/backend/marketplace/local_marketplace.py +271 -0
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
- ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
- ads/opctl/backend/marketplace/models/__init__.py +5 -0
- ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
- ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
- ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
- ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
- ads/opctl/cli.py +707 -0
- ads/opctl/cmds.py +869 -0
- ads/opctl/conda/__init__.py +5 -0
- ads/opctl/conda/cli.py +193 -0
- ads/opctl/conda/cmds.py +749 -0
- ads/opctl/conda/config.yaml +34 -0
- ads/opctl/conda/manifest_template.yaml +13 -0
- ads/opctl/conda/multipart_uploader.py +188 -0
- ads/opctl/conda/pack.py +89 -0
- ads/opctl/config/__init__.py +5 -0
- ads/opctl/config/base.py +57 -0
- ads/opctl/config/diagnostics/__init__.py +5 -0
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
- ads/opctl/config/merger.py +255 -0
- ads/opctl/config/resolver.py +297 -0
- ads/opctl/config/utils.py +79 -0
- ads/opctl/config/validator.py +17 -0
- ads/opctl/config/versioner.py +68 -0
- ads/opctl/config/yaml_parsers/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/base.py +58 -0
- ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
- ads/opctl/constants.py +66 -0
- ads/opctl/decorator/__init__.py +5 -0
- ads/opctl/decorator/common.py +129 -0
- ads/opctl/diagnostics/__init__.py +5 -0
- ads/opctl/diagnostics/__main__.py +25 -0
- ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
- ads/opctl/diagnostics/check_requirements.py +144 -0
- ads/opctl/diagnostics/requirement_exception.py +9 -0
- ads/opctl/distributed/README.md +109 -0
- ads/opctl/distributed/__init__.py +5 -0
- ads/opctl/distributed/certificates.py +32 -0
- ads/opctl/distributed/cli.py +207 -0
- ads/opctl/distributed/cmds.py +731 -0
- ads/opctl/distributed/common/__init__.py +5 -0
- ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
- ads/opctl/distributed/common/cluster_config_helper.py +103 -0
- ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
- ads/opctl/distributed/common/cluster_runner.py +54 -0
- ads/opctl/distributed/common/framework_factory.py +29 -0
- ads/opctl/docker/Dockerfile.job +103 -0
- ads/opctl/docker/Dockerfile.job.arm +107 -0
- ads/opctl/docker/Dockerfile.job.gpu +175 -0
- ads/opctl/docker/base-env.yaml +13 -0
- ads/opctl/docker/cuda.repo +6 -0
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +41 -0
- ads/opctl/docker/operator/Dockerfile.gpu +85 -0
- ads/opctl/docker/operator/cuda.repo +6 -0
- ads/opctl/docker/operator/environment.yaml +8 -0
- ads/opctl/forecast.py +11 -0
- ads/opctl/index.yaml +3 -0
- ads/opctl/model/__init__.py +5 -0
- ads/opctl/model/cli.py +65 -0
- ads/opctl/model/cmds.py +73 -0
- ads/opctl/operator/README.md +4 -0
- ads/opctl/operator/__init__.py +31 -0
- ads/opctl/operator/cli.py +344 -0
- ads/opctl/operator/cmd.py +596 -0
- ads/opctl/operator/common/__init__.py +5 -0
- ads/opctl/operator/common/backend_factory.py +460 -0
- ads/opctl/operator/common/const.py +27 -0
- ads/opctl/operator/common/data/synthetic.csv +16001 -0
- ads/opctl/operator/common/dictionary_merger.py +148 -0
- ads/opctl/operator/common/errors.py +42 -0
- ads/opctl/operator/common/operator_config.py +99 -0
- ads/opctl/operator/common/operator_loader.py +811 -0
- ads/opctl/operator/common/operator_schema.yaml +130 -0
- ads/opctl/operator/common/operator_yaml_generator.py +152 -0
- ads/opctl/operator/common/utils.py +208 -0
- ads/opctl/operator/lowcode/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
- ads/opctl/operator/lowcode/anomaly/README.md +207 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +167 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +116 -0
- ads/opctl/operator/lowcode/common/errors.py +47 -0
- ads/opctl/operator/lowcode/common/transformations.py +296 -0
- ads/opctl/operator/lowcode/common/utils.py +384 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
- ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
- ads/opctl/operator/lowcode/forecast/README.md +209 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
- ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
- ads/opctl/operator/lowcode/forecast/const.py +92 -0
- ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
- ads/opctl/operator/lowcode/forecast/errors.py +26 -0
- ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
- ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
- ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
- ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
- ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
- ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
- ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
- ads/opctl/operator/lowcode/forecast/utils.py +397 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
- ads/opctl/operator/lowcode/pii/MLoperator +17 -0
- ads/opctl/operator/lowcode/pii/README.md +208 -0
- ads/opctl/operator/lowcode/pii/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/__main__.py +78 -0
- ads/opctl/operator/lowcode/pii/cmd.py +39 -0
- ads/opctl/operator/lowcode/pii/constant.py +84 -0
- ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
- ads/opctl/operator/lowcode/pii/errors.py +27 -0
- ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
- ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
- ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
- ads/opctl/operator/lowcode/pii/model/report.py +487 -0
- ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
- ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
- ads/opctl/operator/lowcode/pii/utils.py +43 -0
- ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
- ads/opctl/operator/lowcode/recommender/README.md +206 -0
- ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
- ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
- ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
- ads/opctl/operator/lowcode/recommender/constant.py +30 -0
- ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
- ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
- ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
- ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
- ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
- ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
- ads/opctl/operator/lowcode/recommender/utils.py +13 -0
- ads/opctl/operator/runtime/__init__.py +5 -0
- ads/opctl/operator/runtime/const.py +17 -0
- ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
- ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/runtime.py +115 -0
- ads/opctl/schema.yaml.yml +36 -0
- ads/opctl/script.py +40 -0
- ads/opctl/spark/__init__.py +5 -0
- ads/opctl/spark/cli.py +43 -0
- ads/opctl/spark/cmds.py +147 -0
- ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
- ads/opctl/utils.py +344 -0
- ads/oracledb/__init__.py +5 -0
- ads/oracledb/oracle_db.py +346 -0
- ads/pipeline/__init__.py +39 -0
- ads/pipeline/ads_pipeline.py +2279 -0
- ads/pipeline/ads_pipeline_run.py +772 -0
- ads/pipeline/ads_pipeline_step.py +605 -0
- ads/pipeline/builders/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/custom_script.py +32 -0
- ads/pipeline/cli.py +119 -0
- ads/pipeline/extension.py +291 -0
- ads/pipeline/schema/__init__.py +5 -0
- ads/pipeline/schema/cs_step_schema.json +35 -0
- ads/pipeline/schema/ml_step_schema.json +31 -0
- ads/pipeline/schema/pipeline_schema.json +71 -0
- ads/pipeline/visualizer/__init__.py +5 -0
- ads/pipeline/visualizer/base.py +570 -0
- ads/pipeline/visualizer/graph_renderer.py +272 -0
- ads/pipeline/visualizer/text_renderer.py +84 -0
- ads/secrets/__init__.py +11 -0
- ads/secrets/adb.py +386 -0
- ads/secrets/auth_token.py +86 -0
- ads/secrets/big_data_service.py +365 -0
- ads/secrets/mysqldb.py +149 -0
- ads/secrets/oracledb.py +160 -0
- ads/secrets/secrets.py +407 -0
- ads/telemetry/__init__.py +7 -0
- ads/telemetry/base.py +69 -0
- ads/telemetry/client.py +122 -0
- ads/telemetry/telemetry.py +257 -0
- ads/templates/dataflow_pyspark.jinja2 +13 -0
- ads/templates/dataflow_sparksql.jinja2 +22 -0
- ads/templates/func.jinja2 +20 -0
- ads/templates/schemas/openapi.json +1740 -0
- ads/templates/score-pkl.jinja2 +173 -0
- ads/templates/score.jinja2 +322 -0
- ads/templates/score_embedding_onnx.jinja2 +202 -0
- ads/templates/score_generic.jinja2 +165 -0
- ads/templates/score_huggingface_pipeline.jinja2 +217 -0
- ads/templates/score_lightgbm.jinja2 +185 -0
- ads/templates/score_onnx.jinja2 +407 -0
- ads/templates/score_onnx_new.jinja2 +473 -0
- ads/templates/score_oracle_automl.jinja2 +185 -0
- ads/templates/score_pyspark.jinja2 +154 -0
- ads/templates/score_pytorch.jinja2 +219 -0
- ads/templates/score_scikit-learn.jinja2 +184 -0
- ads/templates/score_tensorflow.jinja2 +184 -0
- ads/templates/score_xgboost.jinja2 +178 -0
- ads/text_dataset/__init__.py +5 -0
- ads/text_dataset/backends.py +211 -0
- ads/text_dataset/dataset.py +445 -0
- ads/text_dataset/extractor.py +207 -0
- ads/text_dataset/options.py +53 -0
- ads/text_dataset/udfs.py +22 -0
- ads/text_dataset/utils.py +49 -0
- ads/type_discovery/__init__.py +9 -0
- ads/type_discovery/abstract_detector.py +21 -0
- ads/type_discovery/constant_detector.py +41 -0
- ads/type_discovery/continuous_detector.py +54 -0
- ads/type_discovery/credit_card_detector.py +99 -0
- ads/type_discovery/datetime_detector.py +92 -0
- ads/type_discovery/discrete_detector.py +118 -0
- ads/type_discovery/document_detector.py +146 -0
- ads/type_discovery/ip_detector.py +68 -0
- ads/type_discovery/latlon_detector.py +90 -0
- ads/type_discovery/phone_number_detector.py +63 -0
- ads/type_discovery/type_discovery_driver.py +87 -0
- ads/type_discovery/typed_feature.py +594 -0
- ads/type_discovery/unknown_detector.py +41 -0
- ads/type_discovery/zipcode_detector.py +48 -0
- ads/vault/__init__.py +7 -0
- ads/vault/vault.py +237 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/METADATA +150 -149
- oracle_ads-2.13.10.dist-info/RECORD +858 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/WHEEL +1 -2
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/entry_points.txt +2 -1
- oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,574 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
"""
|
8
|
+
The module containing classes to read labeled datasets.
|
9
|
+
Allows to read labeled datasets from exports or from the cloud.
|
10
|
+
|
11
|
+
Classes
|
12
|
+
-------
|
13
|
+
LabeledDatasetReader
|
14
|
+
The LabeledDatasetReader class to read labeled dataset.
|
15
|
+
ExportReader
|
16
|
+
The ExportReader class to read labeled dataset from the export.
|
17
|
+
DLSDatasetReader
|
18
|
+
The DLSDatasetReader class to read labeled dataset from the cloud.
|
19
|
+
|
20
|
+
Examples
|
21
|
+
--------
|
22
|
+
>>> from ads.common import auth as authutil
|
23
|
+
>>> from ads.data_labeling import LabeledDatasetReader
|
24
|
+
>>> ds_reader = LabeledDatasetReader.from_export(
|
25
|
+
... path="oci://bucket_name@namespace/dataset_metadata.jsonl",
|
26
|
+
... auth=authutil.api_keys(),
|
27
|
+
... materialize=True
|
28
|
+
... )
|
29
|
+
>>> ds_reader.info()
|
30
|
+
------------------------------------------------------------------------
|
31
|
+
annotation_type SINGLE_LABEL
|
32
|
+
compartment_id TEST_COMPARTMENT
|
33
|
+
dataset_id TEST_DATASET
|
34
|
+
dataset_name test_dataset_name
|
35
|
+
dataset_type TEXT
|
36
|
+
labels ['yes', 'no']
|
37
|
+
records_path path/to/records
|
38
|
+
source_path path/to/dataset
|
39
|
+
|
40
|
+
>>> ds_reader.read()
|
41
|
+
Path Content Annotations
|
42
|
+
-----------------------------------------------------------------------
|
43
|
+
0 path/to/the/content/file1 file content yes
|
44
|
+
1 path/to/the/content/file2 file content no
|
45
|
+
2 path/to/the/content/file3 file content no
|
46
|
+
|
47
|
+
>>> next(ds_reader.read(iterator=True))
|
48
|
+
("path/to/the/content/file1", "file content", "yes")
|
49
|
+
|
50
|
+
>>> next(ds_reader.read(iterator=True, chunksize=2))
|
51
|
+
[("path/to/the/content/file1", "file content", "yes"),
|
52
|
+
("path/to/the/content/file2", "file content", "no")]
|
53
|
+
|
54
|
+
>>> next(ds_reader.read(chunksize=2))
|
55
|
+
Path Content Annotations
|
56
|
+
----------------------------------------------------------------------
|
57
|
+
0 path/to/the/content/file1 file content yes
|
58
|
+
1 path/to/the/content/file2 file content no
|
59
|
+
|
60
|
+
>>> ds_reader = LabeledDatasetReader.from_DLS(
|
61
|
+
... dataset_id="dataset_OCID",
|
62
|
+
... compartment_id="compartment_OCID",
|
63
|
+
... auth=authutil.api_keys(),
|
64
|
+
... materialize=True
|
65
|
+
... )
|
66
|
+
"""
|
67
|
+
|
68
|
+
from functools import lru_cache
|
69
|
+
from typing import Any, Dict, Generator, Tuple, Union
|
70
|
+
|
71
|
+
import pandas as pd
|
72
|
+
from ads.common import auth as authutil
|
73
|
+
from ads.common.serializer import Serializable
|
74
|
+
from ads.data_labeling.interface.reader import Reader
|
75
|
+
from ads.data_labeling.reader.metadata_reader import Metadata, MetadataReader
|
76
|
+
from ads.data_labeling.reader.record_reader import RecordReader
|
77
|
+
from ads.data_labeling.constants import (
|
78
|
+
FORMATS_TO_ANNOTATION_TYPE,
|
79
|
+
ANNOTATION_TYPE_TO_FORMATS,
|
80
|
+
)
|
81
|
+
from ads.config import NB_SESSION_COMPARTMENT_OCID, JOB_RUN_COMPARTMENT_OCID
|
82
|
+
|
83
|
+
_LABELED_DF_COLUMNS = ["Path", "Content", "Annotations"]
|
84
|
+
|
85
|
+
|
86
|
+
class LabeledDatasetReader:
|
87
|
+
"""The labeled dataset reader class.
|
88
|
+
|
89
|
+
Methods
|
90
|
+
-------
|
91
|
+
info(self) -> Metadata
|
92
|
+
Gets labeled dataset metadata.
|
93
|
+
read(self, iterator: bool = False) -> Union[Generator[Any, Any, Any], pd.DataFrame]
|
94
|
+
Reads labeled dataset.
|
95
|
+
from_export(cls, path: str, auth: Dict = None, encoding="utf-8", materialize: bool = False) -> "LabeledDatasetReader"
|
96
|
+
Constructs a Labeled Dataset Reader instance.
|
97
|
+
|
98
|
+
Examples
|
99
|
+
--------
|
100
|
+
>>> from ads.common import auth as authutil
|
101
|
+
>>> from ads.data_labeling import LabeledDatasetReader
|
102
|
+
|
103
|
+
>>> ds_reader = LabeledDatasetReader.from_export(
|
104
|
+
... path="oci://bucket_name@namespace/dataset_metadata.jsonl",
|
105
|
+
... auth=authutil.api_keys(),
|
106
|
+
... materialize=True
|
107
|
+
... )
|
108
|
+
|
109
|
+
>>> ds_reader = LabeledDatasetReader.from_DLS(
|
110
|
+
... dataset_id="dataset_OCID",
|
111
|
+
... compartment_id="compartment_OCID",
|
112
|
+
... auth=authutil.api_keys(),
|
113
|
+
... materialize=True
|
114
|
+
... )
|
115
|
+
|
116
|
+
>>> ds_reader.info()
|
117
|
+
------------------------------------------------------------------------
|
118
|
+
annotation_type SINGLE_LABEL
|
119
|
+
compartment_id TEST_COMPARTMENT
|
120
|
+
dataset_id TEST_DATASET
|
121
|
+
dataset_name test_dataset_name
|
122
|
+
dataset_type TEXT
|
123
|
+
labels ['yes', 'no']
|
124
|
+
records_path path/to/records
|
125
|
+
source_path path/to/dataset
|
126
|
+
|
127
|
+
>>> ds_reader.read()
|
128
|
+
Path Content Annotations
|
129
|
+
-----------------------------------------------------------------------
|
130
|
+
0 path/to/the/content/file1 file content yes
|
131
|
+
1 path/to/the/content/file2 file content no
|
132
|
+
2 path/to/the/content/file3 file content no
|
133
|
+
|
134
|
+
>>> next(ds_reader.read(iterator=True))
|
135
|
+
("path/to/the/content/file1", "file content", "yes")
|
136
|
+
|
137
|
+
>>> next(ds_reader.read(iterator=True, chunksize=2))
|
138
|
+
[("path/to/the/content/file1", "file content", "yes"),
|
139
|
+
("path/to/the/content/file2", "file content", "no")]
|
140
|
+
|
141
|
+
>>> next(ds_reader.read(chunksize=2))
|
142
|
+
Path Content Annotations
|
143
|
+
----------------------------------------------------------------------
|
144
|
+
0 path/to/the/content/file1 file content yes
|
145
|
+
1 path/to/the/content/file2 file content no
|
146
|
+
"""
|
147
|
+
|
148
|
+
def __init__(self, reader: Reader):
|
149
|
+
"""Initializes the labeled dataset reader instance.
|
150
|
+
|
151
|
+
Parameters
|
152
|
+
----------
|
153
|
+
reader: Reader
|
154
|
+
The Reader instance which reads and extracts the labeled dataset.
|
155
|
+
"""
|
156
|
+
self._reader = reader
|
157
|
+
|
158
|
+
@classmethod
|
159
|
+
def from_export(
|
160
|
+
cls,
|
161
|
+
path: str,
|
162
|
+
auth: dict = None,
|
163
|
+
encoding: str = "utf-8",
|
164
|
+
materialize: bool = False,
|
165
|
+
include_unlabeled: bool = False,
|
166
|
+
) -> "LabeledDatasetReader":
|
167
|
+
"""Constructs Labeled Dataset Reader instance.
|
168
|
+
|
169
|
+
Parameters
|
170
|
+
----------
|
171
|
+
path: str
|
172
|
+
The metadata file path, can be either local or object storage path.
|
173
|
+
auth: (dict, optional). Defaults to None.
|
174
|
+
The default authetication is set using `ads.set_auth` API. If you need to override the
|
175
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
176
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
177
|
+
encoding: (str, optional). Defaults to 'utf-8'.
|
178
|
+
Encoding for files.
|
179
|
+
materialize: (bool, optional). Defaults to False.
|
180
|
+
Whether the content of the dataset file should be loaded or it should return the file path to the content.
|
181
|
+
By default the content will not be loaded.
|
182
|
+
|
183
|
+
Returns
|
184
|
+
-------
|
185
|
+
LabeledDatasetReader
|
186
|
+
The LabeledDatasetReader instance.
|
187
|
+
"""
|
188
|
+
auth = auth or authutil.default_signer()
|
189
|
+
|
190
|
+
return cls(
|
191
|
+
reader=ExportReader(
|
192
|
+
path=path,
|
193
|
+
auth=auth,
|
194
|
+
encoding=encoding,
|
195
|
+
materialize=materialize,
|
196
|
+
include_unlabeled=include_unlabeled,
|
197
|
+
)
|
198
|
+
)
|
199
|
+
|
200
|
+
@classmethod
|
201
|
+
def from_DLS(
|
202
|
+
cls,
|
203
|
+
dataset_id: str,
|
204
|
+
compartment_id: str = None,
|
205
|
+
auth: dict = None,
|
206
|
+
encoding: str = "utf-8",
|
207
|
+
materialize: bool = False,
|
208
|
+
include_unlabeled: bool = False,
|
209
|
+
) -> "LabeledDatasetReader":
|
210
|
+
"""Constructs Labeled Dataset Reader instance.
|
211
|
+
|
212
|
+
Parameters
|
213
|
+
----------
|
214
|
+
dataset_id: str
|
215
|
+
The dataset OCID.
|
216
|
+
compartment_id: str. Defaults to the compartment_id from the env variable.
|
217
|
+
The compartment OCID of the dataset.
|
218
|
+
auth: (dict, optional). Defaults to None.
|
219
|
+
The default authetication is set using `ads.set_auth` API. If you need to override the
|
220
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
221
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
222
|
+
encoding: (str, optional). Defaults to 'utf-8'.
|
223
|
+
Encoding for files.
|
224
|
+
materialize: (bool, optional). Defaults to False.
|
225
|
+
Whether the content of the dataset file should be loaded or it should return the file path to the content.
|
226
|
+
By default the content will not be loaded.
|
227
|
+
|
228
|
+
Returns
|
229
|
+
-------
|
230
|
+
LabeledDatasetReader
|
231
|
+
The LabeledDatasetReader instance.
|
232
|
+
"""
|
233
|
+
if compartment_id is None:
|
234
|
+
compartment_id = NB_SESSION_COMPARTMENT_OCID or JOB_RUN_COMPARTMENT_OCID
|
235
|
+
|
236
|
+
if not compartment_id:
|
237
|
+
raise ValueError("The `compartment_id` must be provided.")
|
238
|
+
|
239
|
+
return cls(
|
240
|
+
reader=DLSDatasetReader(
|
241
|
+
compartment_id=compartment_id,
|
242
|
+
dataset_id=dataset_id,
|
243
|
+
auth=auth or authutil.default_signer(),
|
244
|
+
encoding=encoding,
|
245
|
+
materialize=materialize,
|
246
|
+
include_unlabeled=include_unlabeled,
|
247
|
+
)
|
248
|
+
)
|
249
|
+
|
250
|
+
def info(self) -> Serializable:
|
251
|
+
"""Gets the labeled dataset metadata.
|
252
|
+
|
253
|
+
Returns
|
254
|
+
-------
|
255
|
+
Metadata
|
256
|
+
The labeled dataset metadata.
|
257
|
+
"""
|
258
|
+
return self._reader.info()
|
259
|
+
|
260
|
+
def read(
|
261
|
+
self, iterator: bool = False, format: str = None, chunksize: int = None
|
262
|
+
) -> Union[Generator[Any, Any, Any], pd.DataFrame]:
|
263
|
+
"""Reads the labeled dataset records.
|
264
|
+
|
265
|
+
Parameters
|
266
|
+
----------
|
267
|
+
iterator: (bool, optional). Defaults to False.
|
268
|
+
True if the result should be represented as a Generator.
|
269
|
+
Fasle if the result should be represented as a Pandas DataFrame.
|
270
|
+
format: (str, optional). Defaults to None.
|
271
|
+
Output format of annotations. Can be None, "spacy" or "yolo".
|
272
|
+
chunksize: (int, optional). Defaults to None.
|
273
|
+
The number of records that should be read in one iteration.
|
274
|
+
The result will be returned in a generator format.
|
275
|
+
|
276
|
+
Returns
|
277
|
+
-------
|
278
|
+
Union[
|
279
|
+
Generator[Tuple[str, str, Any], Any, Any],
|
280
|
+
Generator[List[Tuple[str, str, Any]], Any, Any],
|
281
|
+
Generator[pd.DataFrame, Any, Any],
|
282
|
+
pd.DataFrame
|
283
|
+
]
|
284
|
+
`pd.Dataframe` if `iterator` and `chunksize` are not specified.
|
285
|
+
`Generator[pd.Dataframe] ` if `iterator` equal to False and `chunksize` is specified.
|
286
|
+
`Generator[List[Tuple[str, str, Any]]]` if `iterator` equal to True and `chunksize` is specified.
|
287
|
+
`Generator[Tuple[str, str, Any]]` if `iterator` equal to True and `chunksize` is not specified.
|
288
|
+
"""
|
289
|
+
|
290
|
+
if chunksize:
|
291
|
+
return self._bulk_read(
|
292
|
+
iterator=iterator, format=format, chunksize=chunksize
|
293
|
+
)
|
294
|
+
|
295
|
+
if iterator:
|
296
|
+
return self._reader.read(format=format)
|
297
|
+
|
298
|
+
return pd.DataFrame(
|
299
|
+
self._reader.read(format=format), columns=_LABELED_DF_COLUMNS
|
300
|
+
)
|
301
|
+
|
302
|
+
def _bulk_read(
|
303
|
+
self, iterator: bool = False, format: str = None, chunksize: int = None
|
304
|
+
) -> Generator[Union[pd.DataFrame, Tuple[str, str, Any]], Any, Any]:
|
305
|
+
"""Reads the labeled dataset records by chunks.
|
306
|
+
|
307
|
+
Parameters
|
308
|
+
----------
|
309
|
+
iterator: (bool, optional). Defaults to False.
|
310
|
+
True if the result should be represented as a Generator.
|
311
|
+
Fasle if the result should be represented as a Pandas DataFrame.
|
312
|
+
format: (str, optional). Defaults to None.
|
313
|
+
Output format of annotations. Can be None, "spacy" or "yolo".
|
314
|
+
chunksize: (int, optional). Defaults to None.
|
315
|
+
The number of records that should be read in one iteration.
|
316
|
+
Result will be represented as a generator.
|
317
|
+
|
318
|
+
Yields
|
319
|
+
-------
|
320
|
+
Generator[Union[pd.DataFrame, Tuple[str, str, Any]], Any, Any]
|
321
|
+
The generator that yields records either in Dataframe format or Tuple.
|
322
|
+
|
323
|
+
Raises
|
324
|
+
------
|
325
|
+
ValueError: If chunksize is empty or not a positive integer.
|
326
|
+
"""
|
327
|
+
if not chunksize or not isinstance(chunksize, int) or chunksize < 1:
|
328
|
+
raise ValueError("`chunksize` must be a positive integer.")
|
329
|
+
|
330
|
+
result = []
|
331
|
+
i = 0
|
332
|
+
for record in self._reader.read(format=format):
|
333
|
+
result.append(record)
|
334
|
+
i += 1
|
335
|
+
if i >= chunksize:
|
336
|
+
yield result if iterator else pd.DataFrame(
|
337
|
+
result, columns=_LABELED_DF_COLUMNS
|
338
|
+
)
|
339
|
+
result = []
|
340
|
+
i = 0
|
341
|
+
if result:
|
342
|
+
yield result if iterator else pd.DataFrame(
|
343
|
+
result, columns=_LABELED_DF_COLUMNS
|
344
|
+
)
|
345
|
+
|
346
|
+
|
347
|
+
class DLSDatasetReader(Reader):
|
348
|
+
"""The DLSDatasetReader class to read labeled dataset from the cloud.
|
349
|
+
|
350
|
+
Methods
|
351
|
+
-------
|
352
|
+
info(self) -> Metadata
|
353
|
+
Gets the labeled dataset metadata.
|
354
|
+
read(self) -> Generator[Tuple, Any, Any]
|
355
|
+
Reads the labeled dataset.
|
356
|
+
"""
|
357
|
+
|
358
|
+
def __init__(
|
359
|
+
self,
|
360
|
+
dataset_id: str,
|
361
|
+
compartment_id: str,
|
362
|
+
auth: Dict,
|
363
|
+
encoding="utf-8",
|
364
|
+
materialize: bool = False,
|
365
|
+
include_unlabeled: bool = False,
|
366
|
+
):
|
367
|
+
"""Initializes the DLS dataset reader instance.
|
368
|
+
|
369
|
+
Parameters
|
370
|
+
----------
|
371
|
+
dataset_id: str
|
372
|
+
The dataset OCID.
|
373
|
+
compartment_id: str
|
374
|
+
The compartment OCID of the dataset.
|
375
|
+
auth: (dict, optional). Defaults to None.
|
376
|
+
The default authetication is set using `ads.set_auth` API. If you need to override the
|
377
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
378
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
379
|
+
encoding: (str, optional). Defaults to 'utf-8'.
|
380
|
+
Encoding for files. The encoding is used to extract the metadata information
|
381
|
+
of the labeled dataset and also to extract the content of the text dataset records.
|
382
|
+
materialize: (bool, optional). Defaults to False.
|
383
|
+
Whether the content of dataset files should be loaded/materialized or not.
|
384
|
+
By default the content will not be materialized.
|
385
|
+
include_unlabeled: (bool, optional). Defaults to False.
|
386
|
+
Whether to load the unlabeled records or not.
|
387
|
+
|
388
|
+
Raises
|
389
|
+
------
|
390
|
+
ValueError: When dataset_id is empty or not a string.
|
391
|
+
TypeError: When dataset_id not a string.
|
392
|
+
"""
|
393
|
+
if not dataset_id:
|
394
|
+
raise ValueError("The dataset OCID must be specified.")
|
395
|
+
|
396
|
+
if not isinstance(dataset_id, str):
|
397
|
+
raise TypeError("The dataset_id must be a string.")
|
398
|
+
|
399
|
+
if not compartment_id:
|
400
|
+
raise ValueError("The compartment OCID must be specified.")
|
401
|
+
|
402
|
+
self.dataset_id = dataset_id
|
403
|
+
self.compartment_id = compartment_id
|
404
|
+
self.auth = auth or authutil.default_signer()
|
405
|
+
self.encoding = encoding
|
406
|
+
self.materialize = materialize
|
407
|
+
self.include_unlabeled = include_unlabeled
|
408
|
+
|
409
|
+
@lru_cache(maxsize=1)
|
410
|
+
def info(self) -> Metadata:
|
411
|
+
"""Gets the labeled dataset metadata.
|
412
|
+
|
413
|
+
Returns
|
414
|
+
-------
|
415
|
+
Metadata
|
416
|
+
The labeled dataset metadata.
|
417
|
+
"""
|
418
|
+
return MetadataReader.from_DLS(
|
419
|
+
compartment_id=self.compartment_id,
|
420
|
+
dataset_id=self.dataset_id,
|
421
|
+
).read()
|
422
|
+
|
423
|
+
def read(self, format: str = None) -> Generator[Tuple, Any, Any]:
|
424
|
+
"""Reads the labeled dataset records.
|
425
|
+
|
426
|
+
Parameters
|
427
|
+
----------
|
428
|
+
format: (str, optional). Defaults to None.
|
429
|
+
Output format of annotations. Can be None, "spacy" for dataset
|
430
|
+
Entity Extraction type or "yolo" for Object Detection type.
|
431
|
+
When None, it outputs List[NERItem] or List[BoundingBoxItem].
|
432
|
+
When "spacy", it outputs List[Tuple].
|
433
|
+
When "yolo", it outputs List[List[Tuple]].
|
434
|
+
|
435
|
+
Returns
|
436
|
+
-------
|
437
|
+
Generator[Tuple, Any, Any]
|
438
|
+
The labeled dataset records.
|
439
|
+
"""
|
440
|
+
|
441
|
+
metadata = self.info()
|
442
|
+
|
443
|
+
records_reader = RecordReader.from_DLS(
|
444
|
+
dataset_type=metadata.dataset_type,
|
445
|
+
annotation_type=metadata.annotation_type,
|
446
|
+
dataset_source_path=metadata.source_path,
|
447
|
+
compartment_id=self.compartment_id,
|
448
|
+
dataset_id=self.dataset_id,
|
449
|
+
auth=self.auth,
|
450
|
+
encoding=self.encoding,
|
451
|
+
materialize=self.materialize,
|
452
|
+
include_unlabeled=self.include_unlabeled,
|
453
|
+
format=format,
|
454
|
+
categories=metadata.labels,
|
455
|
+
)
|
456
|
+
return records_reader.read()
|
457
|
+
|
458
|
+
|
459
|
+
class ExportReader(Reader):
|
460
|
+
"""The ExportReader class to read labeled dataset from the export.
|
461
|
+
|
462
|
+
Methods
|
463
|
+
-------
|
464
|
+
info(self) -> Metadata
|
465
|
+
Gets the labeled dataset metadata.
|
466
|
+
read(self) -> Generator[Tuple, Any, Any]
|
467
|
+
Reads the labeled dataset.
|
468
|
+
"""
|
469
|
+
|
470
|
+
def __init__(
|
471
|
+
self,
|
472
|
+
path: str,
|
473
|
+
auth: Dict = None,
|
474
|
+
encoding="utf-8",
|
475
|
+
materialize: bool = False,
|
476
|
+
include_unlabeled: bool = False,
|
477
|
+
):
|
478
|
+
"""Initializes the labeled dataset export reader instance.
|
479
|
+
|
480
|
+
Parameters
|
481
|
+
----------
|
482
|
+
path: str
|
483
|
+
The metadata file path, can be either local or object storage path.
|
484
|
+
auth: (dict, optional). Defaults to None.
|
485
|
+
The default authetication is set using `ads.set_auth` API. If you need to override the
|
486
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
487
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
488
|
+
encoding: (str, optional). Defaults to 'utf-8'.
|
489
|
+
Encoding for files. The encoding is used to extract the metadata information
|
490
|
+
of the labeled dataset and also to extract the content of the text dataset records.
|
491
|
+
materialize: (bool, optional). Defaults to False.
|
492
|
+
Whether the content of dataset files should be loaded/materialized or not.
|
493
|
+
By default the content will not be materialized.
|
494
|
+
include_unlabeled: (bool, optional). Defaults to False.
|
495
|
+
Whether to load the unlabeled records or not.
|
496
|
+
|
497
|
+
Raises
|
498
|
+
------
|
499
|
+
ValueError: When path is empty or not a string.
|
500
|
+
TypeError: When path not a string.
|
501
|
+
"""
|
502
|
+
|
503
|
+
if not path:
|
504
|
+
raise ValueError("The parameter `path` is required.")
|
505
|
+
|
506
|
+
if not isinstance(path, str):
|
507
|
+
raise TypeError("The parameter `path` must be a string.")
|
508
|
+
|
509
|
+
self.path = path
|
510
|
+
self.auth = auth or authutil.default_signer()
|
511
|
+
self.encoding = encoding
|
512
|
+
self.materialize = materialize
|
513
|
+
self.include_unlabeled = include_unlabeled
|
514
|
+
|
515
|
+
@lru_cache(maxsize=1)
|
516
|
+
def info(self) -> Metadata:
|
517
|
+
"""Gets the labeled dataset metadata.
|
518
|
+
|
519
|
+
Returns
|
520
|
+
-------
|
521
|
+
Metadata
|
522
|
+
The labeled dataset metadata.
|
523
|
+
"""
|
524
|
+
return MetadataReader.from_export_file(
|
525
|
+
path=self.path,
|
526
|
+
auth=self.auth,
|
527
|
+
).read()
|
528
|
+
|
529
|
+
def read(self, format: str = None) -> Generator[Tuple, Any, Any]:
|
530
|
+
"""Reads the labeled dataset records.
|
531
|
+
|
532
|
+
Parameters
|
533
|
+
----------
|
534
|
+
format: (str, optional). Defaults to None.
|
535
|
+
Output format of annotations. Can be None, "spacy" for dataset
|
536
|
+
Entity Extraction type or "yolo" for Object Detection type.
|
537
|
+
When None, it outputs List[NERItem] or List[BoundingBoxItem].
|
538
|
+
When "spacy", it outputs List[Tuple].
|
539
|
+
When "yolo", it outputs List[List[Tuple]].
|
540
|
+
|
541
|
+
Returns
|
542
|
+
-------
|
543
|
+
Generator[Tuple, Any, Any]
|
544
|
+
The labeled dataset records.
|
545
|
+
"""
|
546
|
+
metadata = self.info()
|
547
|
+
if (
|
548
|
+
format
|
549
|
+
and isinstance(format, str)
|
550
|
+
and (
|
551
|
+
format.lower() not in FORMATS_TO_ANNOTATION_TYPE.keys()
|
552
|
+
or FORMATS_TO_ANNOTATION_TYPE[format.lower()]
|
553
|
+
!= metadata.annotation_type
|
554
|
+
)
|
555
|
+
):
|
556
|
+
raise TypeError(
|
557
|
+
"Wrong format. `format` can only be None or "
|
558
|
+
f"`{ANNOTATION_TYPE_TO_FORMATS[metadata.annotation_type]}`."
|
559
|
+
)
|
560
|
+
|
561
|
+
records_reader = RecordReader.from_export_file(
|
562
|
+
path=metadata.records_path or self.path,
|
563
|
+
dataset_type=metadata.dataset_type,
|
564
|
+
annotation_type=metadata.annotation_type,
|
565
|
+
dataset_source_path=metadata.source_path,
|
566
|
+
auth=self.auth,
|
567
|
+
encoding=self.encoding,
|
568
|
+
materialize=self.materialize,
|
569
|
+
include_unlabeled=self.include_unlabeled,
|
570
|
+
format=format,
|
571
|
+
categories=metadata.labels,
|
572
|
+
includes_metadata=not metadata.records_path,
|
573
|
+
)
|
574
|
+
return records_reader.read()
|
@@ -0,0 +1,121 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from collections import defaultdict
|
8
|
+
from dataclasses import dataclass
|
9
|
+
from typing import Any, Generator, List
|
10
|
+
|
11
|
+
from ads.common import auth as authutil
|
12
|
+
from ads.common import oci_client
|
13
|
+
from ads.data_labeling.interface.reader import Reader
|
14
|
+
from oci.data_labeling_service_dataplane.models import AnnotationSummary, RecordSummary
|
15
|
+
from oci.exceptions import ServiceError
|
16
|
+
from oci.pagination import list_call_get_all_results
|
17
|
+
|
18
|
+
|
19
|
+
class ReadRecordsError(Exception): # pragma: no cover
|
20
|
+
def __init__(self, dataset_id: str):
|
21
|
+
super().__init__(
|
22
|
+
f"Error occurred in attempt to read records of dataset {dataset_id}."
|
23
|
+
)
|
24
|
+
|
25
|
+
|
26
|
+
class ReadAnnotationsError(Exception): # pragma: no cover
|
27
|
+
def __init__(self, dataset_id: str):
|
28
|
+
super().__init__(
|
29
|
+
f"Error occurred in attempt to read annotations of dataset {dataset_id}."
|
30
|
+
)
|
31
|
+
|
32
|
+
|
33
|
+
@dataclass
|
34
|
+
class OCIRecordSummary:
|
35
|
+
"""The class that representing the labeled record in ADS format.
|
36
|
+
|
37
|
+
Attributes
|
38
|
+
----------
|
39
|
+
record: RecordSummary
|
40
|
+
OCI RecordSummary.
|
41
|
+
annotations: List[AnnotationSummary]
|
42
|
+
List of OCI AnnotationSummary.
|
43
|
+
"""
|
44
|
+
|
45
|
+
record: RecordSummary = None
|
46
|
+
annotation: List[AnnotationSummary] = None
|
47
|
+
|
48
|
+
|
49
|
+
class DLSRecordReader(Reader):
|
50
|
+
"""DLS Record Reader Class that reads records from the cloud into ADS format."""
|
51
|
+
|
52
|
+
def __init__(self, dataset_id: str, compartment_id: str, auth: dict = None):
|
53
|
+
"""Initiates a DLSRecordReader instance.
|
54
|
+
|
55
|
+
Parameters
|
56
|
+
----------
|
57
|
+
dataset_id: str
|
58
|
+
The dataset OCID.
|
59
|
+
compartment_id: str
|
60
|
+
The compartment OCID of the dataset.
|
61
|
+
auth: (dict, optional). Defaults to None.
|
62
|
+
The default authetication is set using `ads.set_auth` API. If you need to override the
|
63
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
64
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
65
|
+
"""
|
66
|
+
if not dataset_id:
|
67
|
+
raise ValueError("The dataset OCID must be specified.")
|
68
|
+
|
69
|
+
if not isinstance(dataset_id, str):
|
70
|
+
raise TypeError("The dataset_id must be a string.")
|
71
|
+
|
72
|
+
if not compartment_id:
|
73
|
+
raise ValueError("The compartment OCID must be specified.")
|
74
|
+
|
75
|
+
if not isinstance(compartment_id, str):
|
76
|
+
raise TypeError("The compartment OCID must be a string.")
|
77
|
+
|
78
|
+
auth = auth or authutil.default_signer()
|
79
|
+
self.dataset_id = dataset_id
|
80
|
+
self.compartment_id = compartment_id
|
81
|
+
self.dls_dp_client = oci_client.OCIClientFactory(**auth).data_labeling_dp
|
82
|
+
|
83
|
+
def _read_records(self):
|
84
|
+
try:
|
85
|
+
return list_call_get_all_results(
|
86
|
+
self.dls_dp_client.list_records,
|
87
|
+
self.compartment_id,
|
88
|
+
self.dataset_id,
|
89
|
+
lifecycle_state="ACTIVE",
|
90
|
+
).data
|
91
|
+
except ServiceError:
|
92
|
+
raise ReadRecordsError(self.dataset_id)
|
93
|
+
|
94
|
+
def _read_annotations(self):
|
95
|
+
try:
|
96
|
+
return list_call_get_all_results(
|
97
|
+
self.dls_dp_client.list_annotations,
|
98
|
+
self.compartment_id,
|
99
|
+
self.dataset_id,
|
100
|
+
lifecycle_state="ACTIVE",
|
101
|
+
).data
|
102
|
+
except ServiceError:
|
103
|
+
raise ReadAnnotationsError(self.dataset_id)
|
104
|
+
|
105
|
+
def read(self) -> Generator[OCIRecordSummary, Any, Any]:
|
106
|
+
"""Reads OCI records.
|
107
|
+
|
108
|
+
Yields
|
109
|
+
------
|
110
|
+
OCIRecordSummary
|
111
|
+
The OCIRecordSummary instance.
|
112
|
+
"""
|
113
|
+
records = self._read_records()
|
114
|
+
annotations = self._read_annotations()
|
115
|
+
|
116
|
+
annotations_map = defaultdict(list)
|
117
|
+
for annotation in annotations:
|
118
|
+
annotations_map[annotation.record_id].append(annotation)
|
119
|
+
|
120
|
+
for record in records:
|
121
|
+
yield OCIRecordSummary(record, annotations_map.get(record.id))
|