oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +40 -0
- ads/aqua/app.py +507 -0
- ads/aqua/cli.py +96 -0
- ads/aqua/client/__init__.py +3 -0
- ads/aqua/client/client.py +836 -0
- ads/aqua/client/openai_client.py +305 -0
- ads/aqua/common/__init__.py +5 -0
- ads/aqua/common/decorator.py +125 -0
- ads/aqua/common/entities.py +274 -0
- ads/aqua/common/enums.py +134 -0
- ads/aqua/common/errors.py +109 -0
- ads/aqua/common/utils.py +1295 -0
- ads/aqua/config/__init__.py +4 -0
- ads/aqua/config/container_config.py +246 -0
- ads/aqua/config/evaluation/__init__.py +4 -0
- ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
- ads/aqua/config/utils/__init__.py +4 -0
- ads/aqua/config/utils/serializer.py +339 -0
- ads/aqua/constants.py +116 -0
- ads/aqua/data.py +14 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation/__init__.py +8 -0
- ads/aqua/evaluation/constants.py +53 -0
- ads/aqua/evaluation/entities.py +186 -0
- ads/aqua/evaluation/errors.py +70 -0
- ads/aqua/evaluation/evaluation.py +1814 -0
- ads/aqua/extension/__init__.py +42 -0
- ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
- ads/aqua/extension/base_handler.py +90 -0
- ads/aqua/extension/common_handler.py +121 -0
- ads/aqua/extension/common_ws_msg_handler.py +36 -0
- ads/aqua/extension/deployment_handler.py +381 -0
- ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
- ads/aqua/extension/errors.py +30 -0
- ads/aqua/extension/evaluation_handler.py +129 -0
- ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
- ads/aqua/extension/finetune_handler.py +96 -0
- ads/aqua/extension/model_handler.py +390 -0
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +145 -0
- ads/aqua/extension/models_ws_msg_handler.py +50 -0
- ads/aqua/extension/ui_handler.py +300 -0
- ads/aqua/extension/ui_websocket_handler.py +130 -0
- ads/aqua/extension/utils.py +133 -0
- ads/aqua/finetuning/__init__.py +7 -0
- ads/aqua/finetuning/constants.py +23 -0
- ads/aqua/finetuning/entities.py +181 -0
- ads/aqua/finetuning/finetuning.py +749 -0
- ads/aqua/model/__init__.py +8 -0
- ads/aqua/model/constants.py +60 -0
- ads/aqua/model/entities.py +385 -0
- ads/aqua/model/enums.py +32 -0
- ads/aqua/model/model.py +2134 -0
- ads/aqua/model/utils.py +52 -0
- ads/aqua/modeldeployment/__init__.py +6 -0
- ads/aqua/modeldeployment/constants.py +10 -0
- ads/aqua/modeldeployment/deployment.py +1315 -0
- ads/aqua/modeldeployment/entities.py +653 -0
- ads/aqua/modeldeployment/utils.py +543 -0
- ads/aqua/resources/gpu_shapes_index.json +94 -0
- ads/aqua/server/__init__.py +4 -0
- ads/aqua/server/__main__.py +24 -0
- ads/aqua/server/app.py +47 -0
- ads/aqua/server/aqua_spec.yml +1291 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +476 -0
- ads/aqua/ui.py +519 -0
- ads/automl/__init__.py +9 -0
- ads/automl/driver.py +330 -0
- ads/automl/provider.py +975 -0
- ads/bds/__init__.py +5 -0
- ads/bds/auth.py +127 -0
- ads/bds/big_data_service.py +255 -0
- ads/catalog/__init__.py +19 -0
- ads/catalog/model.py +1576 -0
- ads/catalog/notebook.py +461 -0
- ads/catalog/project.py +468 -0
- ads/catalog/summary.py +178 -0
- ads/common/__init__.py +11 -0
- ads/common/analyzer.py +65 -0
- ads/common/artifact/.model-ignore +63 -0
- ads/common/artifact/__init__.py +10 -0
- ads/common/auth.py +1122 -0
- ads/common/card_identifier.py +83 -0
- ads/common/config.py +647 -0
- ads/common/data.py +165 -0
- ads/common/decorator/__init__.py +9 -0
- ads/common/decorator/argument_to_case.py +88 -0
- ads/common/decorator/deprecate.py +69 -0
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/decorator/runtime_dependency.py +178 -0
- ads/common/decorator/threaded.py +97 -0
- ads/common/decorator/utils.py +35 -0
- ads/common/dsc_file_system.py +303 -0
- ads/common/error.py +14 -0
- ads/common/extended_enum.py +81 -0
- ads/common/function/__init__.py +5 -0
- ads/common/function/fn_util.py +142 -0
- ads/common/function/func_conf.yaml +25 -0
- ads/common/ipython.py +76 -0
- ads/common/model.py +679 -0
- ads/common/model_artifact.py +1759 -0
- ads/common/model_artifact_schema.json +107 -0
- ads/common/model_export_util.py +664 -0
- ads/common/model_metadata.py +24 -0
- ads/common/object_storage_details.py +296 -0
- ads/common/oci_client.py +179 -0
- ads/common/oci_datascience.py +46 -0
- ads/common/oci_logging.py +1144 -0
- ads/common/oci_mixin.py +957 -0
- ads/common/oci_resource.py +136 -0
- ads/common/serializer.py +559 -0
- ads/common/utils.py +1852 -0
- ads/common/word_lists.py +1491 -0
- ads/common/work_request.py +189 -0
- ads/config.py +1 -0
- ads/data_labeling/__init__.py +13 -0
- ads/data_labeling/boundingbox.py +253 -0
- ads/data_labeling/constants.py +47 -0
- ads/data_labeling/data_labeling_service.py +244 -0
- ads/data_labeling/interface/__init__.py +5 -0
- ads/data_labeling/interface/loader.py +16 -0
- ads/data_labeling/interface/parser.py +16 -0
- ads/data_labeling/interface/reader.py +23 -0
- ads/data_labeling/loader/__init__.py +5 -0
- ads/data_labeling/loader/file_loader.py +241 -0
- ads/data_labeling/metadata.py +110 -0
- ads/data_labeling/mixin/__init__.py +5 -0
- ads/data_labeling/mixin/data_labeling.py +232 -0
- ads/data_labeling/ner.py +129 -0
- ads/data_labeling/parser/__init__.py +5 -0
- ads/data_labeling/parser/dls_record_parser.py +388 -0
- ads/data_labeling/parser/export_metadata_parser.py +94 -0
- ads/data_labeling/parser/export_record_parser.py +473 -0
- ads/data_labeling/reader/__init__.py +5 -0
- ads/data_labeling/reader/dataset_reader.py +574 -0
- ads/data_labeling/reader/dls_record_reader.py +121 -0
- ads/data_labeling/reader/export_record_reader.py +62 -0
- ads/data_labeling/reader/jsonl_reader.py +75 -0
- ads/data_labeling/reader/metadata_reader.py +203 -0
- ads/data_labeling/reader/record_reader.py +263 -0
- ads/data_labeling/record.py +52 -0
- ads/data_labeling/visualizer/__init__.py +5 -0
- ads/data_labeling/visualizer/image_visualizer.py +525 -0
- ads/data_labeling/visualizer/text_visualizer.py +357 -0
- ads/database/__init__.py +5 -0
- ads/database/connection.py +338 -0
- ads/dataset/__init__.py +10 -0
- ads/dataset/capabilities.md +51 -0
- ads/dataset/classification_dataset.py +339 -0
- ads/dataset/correlation.py +226 -0
- ads/dataset/correlation_plot.py +563 -0
- ads/dataset/dask_series.py +173 -0
- ads/dataset/dataframe_transformer.py +110 -0
- ads/dataset/dataset.py +1979 -0
- ads/dataset/dataset_browser.py +360 -0
- ads/dataset/dataset_with_target.py +995 -0
- ads/dataset/exception.py +25 -0
- ads/dataset/factory.py +987 -0
- ads/dataset/feature_engineering_transformer.py +35 -0
- ads/dataset/feature_selection.py +107 -0
- ads/dataset/forecasting_dataset.py +26 -0
- ads/dataset/helper.py +1450 -0
- ads/dataset/label_encoder.py +99 -0
- ads/dataset/mixin/__init__.py +5 -0
- ads/dataset/mixin/dataset_accessor.py +134 -0
- ads/dataset/pipeline.py +58 -0
- ads/dataset/plot.py +710 -0
- ads/dataset/progress.py +86 -0
- ads/dataset/recommendation.py +297 -0
- ads/dataset/recommendation_transformer.py +502 -0
- ads/dataset/regression_dataset.py +14 -0
- ads/dataset/sampled_dataset.py +1050 -0
- ads/dataset/target.py +98 -0
- ads/dataset/timeseries.py +18 -0
- ads/dbmixin/__init__.py +5 -0
- ads/dbmixin/db_pandas_accessor.py +153 -0
- ads/environment/__init__.py +9 -0
- ads/environment/ml_runtime.py +66 -0
- ads/evaluations/README.md +14 -0
- ads/evaluations/__init__.py +109 -0
- ads/evaluations/evaluation_plot.py +983 -0
- ads/evaluations/evaluator.py +1334 -0
- ads/evaluations/statistical_metrics.py +543 -0
- ads/experiments/__init__.py +9 -0
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +21 -0
- ads/explanations/base_explainer.py +142 -0
- ads/explanations/capabilities.md +83 -0
- ads/explanations/explainer.py +190 -0
- ads/explanations/mlx_global_explainer.py +1050 -0
- ads/explanations/mlx_interface.py +386 -0
- ads/explanations/mlx_local_explainer.py +287 -0
- ads/explanations/mlx_whatif_explainer.py +201 -0
- ads/feature_engineering/__init__.py +20 -0
- ads/feature_engineering/accessor/__init__.py +5 -0
- ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
- ads/feature_engineering/accessor/mixin/__init__.py +5 -0
- ads/feature_engineering/accessor/mixin/correlation.py +166 -0
- ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
- ads/feature_engineering/accessor/mixin/utils.py +65 -0
- ads/feature_engineering/accessor/series_accessor.py +431 -0
- ads/feature_engineering/adsimage/__init__.py +5 -0
- ads/feature_engineering/adsimage/image.py +192 -0
- ads/feature_engineering/adsimage/image_reader.py +170 -0
- ads/feature_engineering/adsimage/interface/__init__.py +5 -0
- ads/feature_engineering/adsimage/interface/reader.py +19 -0
- ads/feature_engineering/adsstring/__init__.py +7 -0
- ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
- ads/feature_engineering/adsstring/string/__init__.py +8 -0
- ads/feature_engineering/data_schema.json +57 -0
- ads/feature_engineering/dataset/__init__.py +5 -0
- ads/feature_engineering/dataset/zip_code_data.py +42062 -0
- ads/feature_engineering/exceptions.py +40 -0
- ads/feature_engineering/feature_type/__init__.py +133 -0
- ads/feature_engineering/feature_type/address.py +184 -0
- ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
- ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
- ads/feature_engineering/feature_type/adsstring/string.py +258 -0
- ads/feature_engineering/feature_type/base.py +58 -0
- ads/feature_engineering/feature_type/boolean.py +183 -0
- ads/feature_engineering/feature_type/category.py +146 -0
- ads/feature_engineering/feature_type/constant.py +137 -0
- ads/feature_engineering/feature_type/continuous.py +151 -0
- ads/feature_engineering/feature_type/creditcard.py +314 -0
- ads/feature_engineering/feature_type/datetime.py +190 -0
- ads/feature_engineering/feature_type/discrete.py +134 -0
- ads/feature_engineering/feature_type/document.py +43 -0
- ads/feature_engineering/feature_type/gis.py +251 -0
- ads/feature_engineering/feature_type/handler/__init__.py +5 -0
- ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
- ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
- ads/feature_engineering/feature_type/handler/warnings.py +128 -0
- ads/feature_engineering/feature_type/integer.py +142 -0
- ads/feature_engineering/feature_type/ip_address.py +144 -0
- ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
- ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
- ads/feature_engineering/feature_type/lat_long.py +256 -0
- ads/feature_engineering/feature_type/object.py +43 -0
- ads/feature_engineering/feature_type/ordinal.py +132 -0
- ads/feature_engineering/feature_type/phone_number.py +135 -0
- ads/feature_engineering/feature_type/string.py +171 -0
- ads/feature_engineering/feature_type/text.py +93 -0
- ads/feature_engineering/feature_type/unknown.py +43 -0
- ads/feature_engineering/feature_type/zip_code.py +164 -0
- ads/feature_engineering/feature_type_manager.py +406 -0
- ads/feature_engineering/schema.py +795 -0
- ads/feature_engineering/utils.py +245 -0
- ads/feature_store/.readthedocs.yaml +19 -0
- ads/feature_store/README.md +65 -0
- ads/feature_store/__init__.py +9 -0
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +339 -0
- ads/feature_store/common/exceptions.py +18 -0
- ads/feature_store/common/spark_session_singleton.py +125 -0
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
- ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
- ads/feature_store/common/utils/transformation_utils.py +82 -0
- ads/feature_store/common/utils/utility.py +403 -0
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +129 -0
- ads/feature_store/dataset.py +1230 -0
- ads/feature_store/dataset_job.py +530 -0
- ads/feature_store/docs/Dockerfile +7 -0
- ads/feature_store/docs/Makefile +44 -0
- ads/feature_store/docs/conf.py +28 -0
- ads/feature_store/docs/requirements.txt +14 -0
- ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
- ads/feature_store/docs/source/cicd.rst +137 -0
- ads/feature_store/docs/source/conf.py +86 -0
- ads/feature_store/docs/source/data_versioning.rst +33 -0
- ads/feature_store/docs/source/dataset.rst +388 -0
- ads/feature_store/docs/source/dataset_job.rst +27 -0
- ads/feature_store/docs/source/demo.rst +70 -0
- ads/feature_store/docs/source/entity.rst +78 -0
- ads/feature_store/docs/source/feature_group.rst +624 -0
- ads/feature_store/docs/source/feature_group_job.rst +29 -0
- ads/feature_store/docs/source/feature_store.rst +122 -0
- ads/feature_store/docs/source/feature_store_class.rst +123 -0
- ads/feature_store/docs/source/feature_validation.rst +66 -0
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +81 -0
- ads/feature_store/docs/source/module.rst +8 -0
- ads/feature_store/docs/source/notebook.rst +94 -0
- ads/feature_store/docs/source/overview.rst +47 -0
- ads/feature_store/docs/source/quickstart.rst +176 -0
- ads/feature_store/docs/source/release_notes.rst +194 -0
- ads/feature_store/docs/source/setup_feature_store.rst +81 -0
- ads/feature_store/docs/source/statistics.rst +58 -0
- ads/feature_store/docs/source/transformation.rst +199 -0
- ads/feature_store/docs/source/ui.rst +65 -0
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
- ads/feature_store/entity.py +718 -0
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
- ads/feature_store/execution_strategy/execution_strategy.py +113 -0
- ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
- ads/feature_store/feature.py +192 -0
- ads/feature_store/feature_group.py +1494 -0
- ads/feature_store/feature_group_expectation.py +346 -0
- ads/feature_store/feature_group_job.py +602 -0
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +180 -0
- ads/feature_store/feature_option_details.py +50 -0
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +99 -0
- ads/feature_store/feature_store.py +699 -0
- ads/feature_store/feature_store_registrar.py +518 -0
- ads/feature_store/input_feature_detail.py +149 -0
- ads/feature_store/mixin/__init__.py +4 -0
- ads/feature_store/mixin/oci_feature_store.py +145 -0
- ads/feature_store/model_details.py +73 -0
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +266 -0
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +298 -0
- ads/feature_store/query/join.py +161 -0
- ads/feature_store/query/query.py +403 -0
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +57 -0
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +68 -0
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +139 -0
- ads/feature_store/service/oci_dataset_job.py +199 -0
- ads/feature_store/service/oci_entity.py +125 -0
- ads/feature_store/service/oci_feature_group.py +164 -0
- ads/feature_store/service/oci_feature_group_job.py +214 -0
- ads/feature_store/service/oci_feature_store.py +182 -0
- ads/feature_store/service/oci_lineage.py +87 -0
- ads/feature_store/service/oci_transformation.py +104 -0
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +49 -0
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
- ads/feature_store/statistics/charts/box_plot.py +148 -0
- ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
- ads/feature_store/statistics/charts/probability_distribution.py +68 -0
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
- ads/feature_store/statistics/feature_stat.py +126 -0
- ads/feature_store/statistics/generic_feature_value.py +33 -0
- ads/feature_store/statistics/statistics.py +41 -0
- ads/feature_store/statistics_config.py +101 -0
- ads/feature_store/templates/feature_store_template.yaml +45 -0
- ads/feature_store/transformation.py +499 -0
- ads/feature_store/validation_output.py +57 -0
- ads/hpo/__init__.py +9 -0
- ads/hpo/_imports.py +91 -0
- ads/hpo/ads_search_space.py +439 -0
- ads/hpo/distributions.py +325 -0
- ads/hpo/objective.py +280 -0
- ads/hpo/search_cv.py +1657 -0
- ads/hpo/stopping_criterion.py +75 -0
- ads/hpo/tuner_artifact.py +413 -0
- ads/hpo/utils.py +91 -0
- ads/hpo/validation.py +140 -0
- ads/hpo/visualization/__init__.py +5 -0
- ads/hpo/visualization/_contour.py +23 -0
- ads/hpo/visualization/_edf.py +20 -0
- ads/hpo/visualization/_intermediate_values.py +21 -0
- ads/hpo/visualization/_optimization_history.py +25 -0
- ads/hpo/visualization/_parallel_coordinate.py +169 -0
- ads/hpo/visualization/_param_importances.py +26 -0
- ads/jobs/__init__.py +53 -0
- ads/jobs/ads_job.py +663 -0
- ads/jobs/builders/__init__.py +5 -0
- ads/jobs/builders/base.py +156 -0
- ads/jobs/builders/infrastructure/__init__.py +6 -0
- ads/jobs/builders/infrastructure/base.py +165 -0
- ads/jobs/builders/infrastructure/dataflow.py +1252 -0
- ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
- ads/jobs/builders/infrastructure/utils.py +65 -0
- ads/jobs/builders/runtimes/__init__.py +5 -0
- ads/jobs/builders/runtimes/artifact.py +338 -0
- ads/jobs/builders/runtimes/base.py +325 -0
- ads/jobs/builders/runtimes/container_runtime.py +242 -0
- ads/jobs/builders/runtimes/python_runtime.py +1016 -0
- ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
- ads/jobs/cli.py +104 -0
- ads/jobs/env_var_parser.py +131 -0
- ads/jobs/extension.py +160 -0
- ads/jobs/schema/__init__.py +5 -0
- ads/jobs/schema/infrastructure_schema.json +116 -0
- ads/jobs/schema/job_schema.json +42 -0
- ads/jobs/schema/runtime_schema.json +183 -0
- ads/jobs/schema/validator.py +141 -0
- ads/jobs/serializer.py +296 -0
- ads/jobs/templates/__init__.py +5 -0
- ads/jobs/templates/container.py +6 -0
- ads/jobs/templates/driver_notebook.py +177 -0
- ads/jobs/templates/driver_oci.py +500 -0
- ads/jobs/templates/driver_python.py +48 -0
- ads/jobs/templates/driver_pytorch.py +852 -0
- ads/jobs/templates/driver_utils.py +615 -0
- ads/jobs/templates/hostname_from_env.c +55 -0
- ads/jobs/templates/oci_metrics.py +181 -0
- ads/jobs/utils.py +104 -0
- ads/llm/__init__.py +28 -0
- ads/llm/autogen/__init__.py +2 -0
- ads/llm/autogen/constants.py +15 -0
- ads/llm/autogen/reports/__init__.py +2 -0
- ads/llm/autogen/reports/base.py +67 -0
- ads/llm/autogen/reports/data.py +103 -0
- ads/llm/autogen/reports/session.py +526 -0
- ads/llm/autogen/reports/templates/chat_box.html +13 -0
- ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
- ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
- ads/llm/autogen/reports/utils.py +56 -0
- ads/llm/autogen/v02/__init__.py +4 -0
- ads/llm/autogen/v02/client.py +295 -0
- ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
- ads/llm/autogen/v02/loggers/__init__.py +6 -0
- ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
- ads/llm/autogen/v02/loggers/session_logger.py +580 -0
- ads/llm/autogen/v02/loggers/utils.py +86 -0
- ads/llm/autogen/v02/runtime_logging.py +163 -0
- ads/llm/chain.py +268 -0
- ads/llm/chat_template.py +31 -0
- ads/llm/deploy.py +63 -0
- ads/llm/guardrails/__init__.py +5 -0
- ads/llm/guardrails/base.py +442 -0
- ads/llm/guardrails/huggingface.py +44 -0
- ads/llm/langchain/__init__.py +5 -0
- ads/llm/langchain/plugins/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
- ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
- ads/llm/langchain/plugins/llms/__init__.py +5 -0
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
- ads/llm/requirements.txt +3 -0
- ads/llm/serialize.py +219 -0
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +153 -0
- ads/llm/serializers/runnable_parallel.py +27 -0
- ads/llm/templates/score_chain.jinja2 +155 -0
- ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
- ads/model/__init__.py +52 -0
- ads/model/artifact.py +573 -0
- ads/model/artifact_downloader.py +254 -0
- ads/model/artifact_uploader.py +267 -0
- ads/model/base_properties.py +238 -0
- ads/model/common/.model-ignore +66 -0
- ads/model/common/__init__.py +5 -0
- ads/model/common/utils.py +142 -0
- ads/model/datascience_model.py +2635 -0
- ads/model/deployment/__init__.py +20 -0
- ads/model/deployment/common/__init__.py +5 -0
- ads/model/deployment/common/utils.py +308 -0
- ads/model/deployment/model_deployer.py +466 -0
- ads/model/deployment/model_deployment.py +1846 -0
- ads/model/deployment/model_deployment_infrastructure.py +671 -0
- ads/model/deployment/model_deployment_properties.py +493 -0
- ads/model/deployment/model_deployment_runtime.py +838 -0
- ads/model/extractor/__init__.py +5 -0
- ads/model/extractor/automl_extractor.py +74 -0
- ads/model/extractor/embedding_onnx_extractor.py +80 -0
- ads/model/extractor/huggingface_extractor.py +88 -0
- ads/model/extractor/keras_extractor.py +84 -0
- ads/model/extractor/lightgbm_extractor.py +93 -0
- ads/model/extractor/model_info_extractor.py +114 -0
- ads/model/extractor/model_info_extractor_factory.py +105 -0
- ads/model/extractor/pytorch_extractor.py +87 -0
- ads/model/extractor/sklearn_extractor.py +112 -0
- ads/model/extractor/spark_extractor.py +89 -0
- ads/model/extractor/tensorflow_extractor.py +85 -0
- ads/model/extractor/xgboost_extractor.py +94 -0
- ads/model/framework/__init__.py +5 -0
- ads/model/framework/automl_model.py +178 -0
- ads/model/framework/embedding_onnx_model.py +438 -0
- ads/model/framework/huggingface_model.py +399 -0
- ads/model/framework/lightgbm_model.py +266 -0
- ads/model/framework/pytorch_model.py +266 -0
- ads/model/framework/sklearn_model.py +250 -0
- ads/model/framework/spark_model.py +326 -0
- ads/model/framework/tensorflow_model.py +254 -0
- ads/model/framework/xgboost_model.py +258 -0
- ads/model/generic_model.py +3518 -0
- ads/model/model_artifact_boilerplate/README.md +381 -0
- ads/model/model_artifact_boilerplate/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
- ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
- ads/model/model_artifact_boilerplate/score.py +61 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_introspect.py +331 -0
- ads/model/model_metadata.py +1810 -0
- ads/model/model_metadata_mixin.py +460 -0
- ads/model/model_properties.py +63 -0
- ads/model/model_version_set.py +739 -0
- ads/model/runtime/__init__.py +5 -0
- ads/model/runtime/env_info.py +306 -0
- ads/model/runtime/model_deployment_details.py +37 -0
- ads/model/runtime/model_provenance_details.py +58 -0
- ads/model/runtime/runtime_info.py +81 -0
- ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
- ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
- ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
- ads/model/runtime/utils.py +201 -0
- ads/model/serde/__init__.py +5 -0
- ads/model/serde/common.py +40 -0
- ads/model/serde/model_input.py +547 -0
- ads/model/serde/model_serializer.py +1184 -0
- ads/model/service/__init__.py +5 -0
- ads/model/service/oci_datascience_model.py +1076 -0
- ads/model/service/oci_datascience_model_deployment.py +500 -0
- ads/model/service/oci_datascience_model_version_set.py +176 -0
- ads/model/transformer/__init__.py +5 -0
- ads/model/transformer/onnx_transformer.py +324 -0
- ads/mysqldb/__init__.py +5 -0
- ads/mysqldb/mysql_db.py +227 -0
- ads/opctl/__init__.py +18 -0
- ads/opctl/anomaly_detection.py +11 -0
- ads/opctl/backend/__init__.py +5 -0
- ads/opctl/backend/ads_dataflow.py +353 -0
- ads/opctl/backend/ads_ml_job.py +710 -0
- ads/opctl/backend/ads_ml_pipeline.py +164 -0
- ads/opctl/backend/ads_model_deployment.py +209 -0
- ads/opctl/backend/base.py +146 -0
- ads/opctl/backend/local.py +1053 -0
- ads/opctl/backend/marketplace/__init__.py +9 -0
- ads/opctl/backend/marketplace/helm_helper.py +173 -0
- ads/opctl/backend/marketplace/local_marketplace.py +271 -0
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
- ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
- ads/opctl/backend/marketplace/models/__init__.py +5 -0
- ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
- ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
- ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
- ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
- ads/opctl/cli.py +707 -0
- ads/opctl/cmds.py +869 -0
- ads/opctl/conda/__init__.py +5 -0
- ads/opctl/conda/cli.py +193 -0
- ads/opctl/conda/cmds.py +749 -0
- ads/opctl/conda/config.yaml +34 -0
- ads/opctl/conda/manifest_template.yaml +13 -0
- ads/opctl/conda/multipart_uploader.py +188 -0
- ads/opctl/conda/pack.py +89 -0
- ads/opctl/config/__init__.py +5 -0
- ads/opctl/config/base.py +57 -0
- ads/opctl/config/diagnostics/__init__.py +5 -0
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
- ads/opctl/config/merger.py +255 -0
- ads/opctl/config/resolver.py +297 -0
- ads/opctl/config/utils.py +79 -0
- ads/opctl/config/validator.py +17 -0
- ads/opctl/config/versioner.py +68 -0
- ads/opctl/config/yaml_parsers/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/base.py +58 -0
- ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
- ads/opctl/constants.py +66 -0
- ads/opctl/decorator/__init__.py +5 -0
- ads/opctl/decorator/common.py +129 -0
- ads/opctl/diagnostics/__init__.py +5 -0
- ads/opctl/diagnostics/__main__.py +25 -0
- ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
- ads/opctl/diagnostics/check_requirements.py +144 -0
- ads/opctl/diagnostics/requirement_exception.py +9 -0
- ads/opctl/distributed/README.md +109 -0
- ads/opctl/distributed/__init__.py +5 -0
- ads/opctl/distributed/certificates.py +32 -0
- ads/opctl/distributed/cli.py +207 -0
- ads/opctl/distributed/cmds.py +731 -0
- ads/opctl/distributed/common/__init__.py +5 -0
- ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
- ads/opctl/distributed/common/cluster_config_helper.py +103 -0
- ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
- ads/opctl/distributed/common/cluster_runner.py +54 -0
- ads/opctl/distributed/common/framework_factory.py +29 -0
- ads/opctl/docker/Dockerfile.job +103 -0
- ads/opctl/docker/Dockerfile.job.arm +107 -0
- ads/opctl/docker/Dockerfile.job.gpu +175 -0
- ads/opctl/docker/base-env.yaml +13 -0
- ads/opctl/docker/cuda.repo +6 -0
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +41 -0
- ads/opctl/docker/operator/Dockerfile.gpu +85 -0
- ads/opctl/docker/operator/cuda.repo +6 -0
- ads/opctl/docker/operator/environment.yaml +8 -0
- ads/opctl/forecast.py +11 -0
- ads/opctl/index.yaml +3 -0
- ads/opctl/model/__init__.py +5 -0
- ads/opctl/model/cli.py +65 -0
- ads/opctl/model/cmds.py +73 -0
- ads/opctl/operator/README.md +4 -0
- ads/opctl/operator/__init__.py +31 -0
- ads/opctl/operator/cli.py +344 -0
- ads/opctl/operator/cmd.py +596 -0
- ads/opctl/operator/common/__init__.py +5 -0
- ads/opctl/operator/common/backend_factory.py +460 -0
- ads/opctl/operator/common/const.py +27 -0
- ads/opctl/operator/common/data/synthetic.csv +16001 -0
- ads/opctl/operator/common/dictionary_merger.py +148 -0
- ads/opctl/operator/common/errors.py +42 -0
- ads/opctl/operator/common/operator_config.py +99 -0
- ads/opctl/operator/common/operator_loader.py +811 -0
- ads/opctl/operator/common/operator_schema.yaml +130 -0
- ads/opctl/operator/common/operator_yaml_generator.py +152 -0
- ads/opctl/operator/common/utils.py +208 -0
- ads/opctl/operator/lowcode/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
- ads/opctl/operator/lowcode/anomaly/README.md +207 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +167 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +116 -0
- ads/opctl/operator/lowcode/common/errors.py +47 -0
- ads/opctl/operator/lowcode/common/transformations.py +296 -0
- ads/opctl/operator/lowcode/common/utils.py +384 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
- ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
- ads/opctl/operator/lowcode/forecast/README.md +209 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
- ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
- ads/opctl/operator/lowcode/forecast/const.py +92 -0
- ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
- ads/opctl/operator/lowcode/forecast/errors.py +26 -0
- ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
- ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
- ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
- ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
- ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
- ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
- ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
- ads/opctl/operator/lowcode/forecast/utils.py +397 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
- ads/opctl/operator/lowcode/pii/MLoperator +17 -0
- ads/opctl/operator/lowcode/pii/README.md +208 -0
- ads/opctl/operator/lowcode/pii/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/__main__.py +78 -0
- ads/opctl/operator/lowcode/pii/cmd.py +39 -0
- ads/opctl/operator/lowcode/pii/constant.py +84 -0
- ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
- ads/opctl/operator/lowcode/pii/errors.py +27 -0
- ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
- ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
- ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
- ads/opctl/operator/lowcode/pii/model/report.py +487 -0
- ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
- ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
- ads/opctl/operator/lowcode/pii/utils.py +43 -0
- ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
- ads/opctl/operator/lowcode/recommender/README.md +206 -0
- ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
- ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
- ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
- ads/opctl/operator/lowcode/recommender/constant.py +30 -0
- ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
- ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
- ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
- ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
- ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
- ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
- ads/opctl/operator/lowcode/recommender/utils.py +13 -0
- ads/opctl/operator/runtime/__init__.py +5 -0
- ads/opctl/operator/runtime/const.py +17 -0
- ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
- ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/runtime.py +115 -0
- ads/opctl/schema.yaml.yml +36 -0
- ads/opctl/script.py +40 -0
- ads/opctl/spark/__init__.py +5 -0
- ads/opctl/spark/cli.py +43 -0
- ads/opctl/spark/cmds.py +147 -0
- ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
- ads/opctl/utils.py +344 -0
- ads/oracledb/__init__.py +5 -0
- ads/oracledb/oracle_db.py +346 -0
- ads/pipeline/__init__.py +39 -0
- ads/pipeline/ads_pipeline.py +2279 -0
- ads/pipeline/ads_pipeline_run.py +772 -0
- ads/pipeline/ads_pipeline_step.py +605 -0
- ads/pipeline/builders/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/custom_script.py +32 -0
- ads/pipeline/cli.py +119 -0
- ads/pipeline/extension.py +291 -0
- ads/pipeline/schema/__init__.py +5 -0
- ads/pipeline/schema/cs_step_schema.json +35 -0
- ads/pipeline/schema/ml_step_schema.json +31 -0
- ads/pipeline/schema/pipeline_schema.json +71 -0
- ads/pipeline/visualizer/__init__.py +5 -0
- ads/pipeline/visualizer/base.py +570 -0
- ads/pipeline/visualizer/graph_renderer.py +272 -0
- ads/pipeline/visualizer/text_renderer.py +84 -0
- ads/secrets/__init__.py +11 -0
- ads/secrets/adb.py +386 -0
- ads/secrets/auth_token.py +86 -0
- ads/secrets/big_data_service.py +365 -0
- ads/secrets/mysqldb.py +149 -0
- ads/secrets/oracledb.py +160 -0
- ads/secrets/secrets.py +407 -0
- ads/telemetry/__init__.py +7 -0
- ads/telemetry/base.py +69 -0
- ads/telemetry/client.py +122 -0
- ads/telemetry/telemetry.py +257 -0
- ads/templates/dataflow_pyspark.jinja2 +13 -0
- ads/templates/dataflow_sparksql.jinja2 +22 -0
- ads/templates/func.jinja2 +20 -0
- ads/templates/schemas/openapi.json +1740 -0
- ads/templates/score-pkl.jinja2 +173 -0
- ads/templates/score.jinja2 +322 -0
- ads/templates/score_embedding_onnx.jinja2 +202 -0
- ads/templates/score_generic.jinja2 +165 -0
- ads/templates/score_huggingface_pipeline.jinja2 +217 -0
- ads/templates/score_lightgbm.jinja2 +185 -0
- ads/templates/score_onnx.jinja2 +407 -0
- ads/templates/score_onnx_new.jinja2 +473 -0
- ads/templates/score_oracle_automl.jinja2 +185 -0
- ads/templates/score_pyspark.jinja2 +154 -0
- ads/templates/score_pytorch.jinja2 +219 -0
- ads/templates/score_scikit-learn.jinja2 +184 -0
- ads/templates/score_tensorflow.jinja2 +184 -0
- ads/templates/score_xgboost.jinja2 +178 -0
- ads/text_dataset/__init__.py +5 -0
- ads/text_dataset/backends.py +211 -0
- ads/text_dataset/dataset.py +445 -0
- ads/text_dataset/extractor.py +207 -0
- ads/text_dataset/options.py +53 -0
- ads/text_dataset/udfs.py +22 -0
- ads/text_dataset/utils.py +49 -0
- ads/type_discovery/__init__.py +9 -0
- ads/type_discovery/abstract_detector.py +21 -0
- ads/type_discovery/constant_detector.py +41 -0
- ads/type_discovery/continuous_detector.py +54 -0
- ads/type_discovery/credit_card_detector.py +99 -0
- ads/type_discovery/datetime_detector.py +92 -0
- ads/type_discovery/discrete_detector.py +118 -0
- ads/type_discovery/document_detector.py +146 -0
- ads/type_discovery/ip_detector.py +68 -0
- ads/type_discovery/latlon_detector.py +90 -0
- ads/type_discovery/phone_number_detector.py +63 -0
- ads/type_discovery/type_discovery_driver.py +87 -0
- ads/type_discovery/typed_feature.py +594 -0
- ads/type_discovery/unknown_detector.py +41 -0
- ads/type_discovery/zipcode_detector.py +48 -0
- ads/vault/__init__.py +7 -0
- ads/vault/vault.py +237 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/METADATA +150 -149
- oracle_ads-2.13.10.dist-info/RECORD +858 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/WHEEL +1 -2
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/entry_points.txt +2 -1
- oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,244 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import logging
|
8
|
+
import re
|
9
|
+
import time
|
10
|
+
from urllib.parse import urlparse
|
11
|
+
|
12
|
+
import pandas as pd
|
13
|
+
from ads.common import auth, oci_client, utils
|
14
|
+
from ads.common.oci_mixin import OCIWorkRequestMixin
|
15
|
+
from ads.common.utils import snake_to_camel
|
16
|
+
from ads.config import JOB_RUN_COMPARTMENT_OCID, NB_SESSION_COMPARTMENT_OCID
|
17
|
+
from ads.common.object_storage_details import (
|
18
|
+
ObjectStorageDetails,
|
19
|
+
InvalidObjectStoragePath,
|
20
|
+
)
|
21
|
+
from oci import pagination
|
22
|
+
from oci.data_labeling_service.models import (
|
23
|
+
ObjectStorageSnapshotExportDetails,
|
24
|
+
SnapshotDatasetDetails,
|
25
|
+
WorkRequest,
|
26
|
+
)
|
27
|
+
from oci.util import to_dict
|
28
|
+
|
29
|
+
logger = logging.getLogger(__name__)
|
30
|
+
NUM_PROGRESS_BAR = 8
|
31
|
+
|
32
|
+
|
33
|
+
class DataLabeling(OCIWorkRequestMixin):
|
34
|
+
"""Class for data labeling service. Integrate the data labeling service APIs.
|
35
|
+
|
36
|
+
Examples
|
37
|
+
--------
|
38
|
+
>>> import ads
|
39
|
+
>>> import pandas
|
40
|
+
>>> from ads.data_labeling.data_labeling_service import DataLabeling
|
41
|
+
>>> ads.set_auth("api_key")
|
42
|
+
>>> dls = DataLabeling()
|
43
|
+
>>> dls.list_dataset()
|
44
|
+
>>> metadata_path = dls.export(dataset_id="your dataset id",
|
45
|
+
... path="oci://<bucket_name>@<namespace>/folder")
|
46
|
+
>>> df = pd.DataFrame.ads.read_labeled_data(metadata_path)
|
47
|
+
"""
|
48
|
+
|
49
|
+
def __init__(
|
50
|
+
self,
|
51
|
+
compartment_id: str = None,
|
52
|
+
dls_cp_client_auth: dict = None,
|
53
|
+
dls_dp_client_auth: dict = None,
|
54
|
+
) -> None:
|
55
|
+
"""Initialize a DataLabeling class.
|
56
|
+
|
57
|
+
Parameters
|
58
|
+
----------
|
59
|
+
compartment_id : str, optional
|
60
|
+
OCID of data labeling datasets' compartment
|
61
|
+
dls_cp_client_auth : dict, optional
|
62
|
+
Data Labeling control plane client auth. Default is None. The default authetication is set using `ads.set_auth` API. If you need to override the
|
63
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
64
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
65
|
+
dls_dp_client_auth : dict, optional
|
66
|
+
Data Labeling data plane client auth. Default is None. The default authetication is set using `ads.set_auth` API. If you need to override the
|
67
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
68
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
69
|
+
|
70
|
+
Returns
|
71
|
+
-------
|
72
|
+
None
|
73
|
+
Nothing.
|
74
|
+
"""
|
75
|
+
self.compartment_id = compartment_id
|
76
|
+
if self.compartment_id is None:
|
77
|
+
self.compartment_id = (
|
78
|
+
NB_SESSION_COMPARTMENT_OCID or JOB_RUN_COMPARTMENT_OCID
|
79
|
+
)
|
80
|
+
|
81
|
+
if not self.compartment_id:
|
82
|
+
raise ValueError("The parameter `compartment_id` is required.")
|
83
|
+
|
84
|
+
self.dls_cp_client_auth = dls_cp_client_auth or auth.default_signer()
|
85
|
+
self.dls_dp_client_auth = dls_dp_client_auth or auth.default_signer()
|
86
|
+
|
87
|
+
self.dls_dp_client = oci_client.OCIClientFactory(
|
88
|
+
**self.dls_dp_client_auth
|
89
|
+
).data_labeling_dp
|
90
|
+
|
91
|
+
self.dls_cp_client = oci_client.OCIClientFactory(
|
92
|
+
**self.dls_cp_client_auth
|
93
|
+
).data_labeling_cp
|
94
|
+
|
95
|
+
def list_dataset(self, **kwargs) -> pd.DataFrame:
|
96
|
+
"""List all the datasets created from the data labeling service under a given compartment.
|
97
|
+
|
98
|
+
Parameters
|
99
|
+
----------
|
100
|
+
kwargs : dict, optional
|
101
|
+
Additional keyword arguments will be passed to oci.data_labeling_serviceDataLabelingManagementClient.list_datasets method.
|
102
|
+
|
103
|
+
Returns
|
104
|
+
-------
|
105
|
+
pandas.DataFrame
|
106
|
+
pandas dataframe which contains the dataset information.
|
107
|
+
|
108
|
+
Raises
|
109
|
+
------
|
110
|
+
Exception
|
111
|
+
If pagination.list_call_get_all_results() fails
|
112
|
+
"""
|
113
|
+
try:
|
114
|
+
items = pagination.list_call_get_all_results(
|
115
|
+
self.dls_cp_client.list_datasets, self.compartment_id, **kwargs
|
116
|
+
).data
|
117
|
+
except Exception as e:
|
118
|
+
raise e
|
119
|
+
|
120
|
+
df = pd.DataFrame()
|
121
|
+
if items:
|
122
|
+
df = pd.concat(
|
123
|
+
[
|
124
|
+
pd.DataFrame(to_dict(dataset), index=[i])
|
125
|
+
for i, dataset in enumerate(items)
|
126
|
+
],
|
127
|
+
axis=0,
|
128
|
+
)
|
129
|
+
df = (
|
130
|
+
df.reset_index(drop=True).set_index("id").drop(columns="compartment_id")
|
131
|
+
)
|
132
|
+
df.columns = [
|
133
|
+
snake_to_camel(name, capitalized_first_token=True) for name in df.columns
|
134
|
+
]
|
135
|
+
return df
|
136
|
+
|
137
|
+
def export(self, dataset_id: str, path: str, include_unlabeled=False) -> str:
|
138
|
+
"""Export dataset based on the dataset_id and save the jsonl files under the path
|
139
|
+
(metadata jsonl file and the records jsonl file) to the object storage path provided by the user
|
140
|
+
and return the metadata jsonl path.
|
141
|
+
|
142
|
+
Parameters
|
143
|
+
----------
|
144
|
+
dataset_id : str
|
145
|
+
The dataset id of which the snapshot will be generated.
|
146
|
+
path : str
|
147
|
+
The object storage path to store the generated snapshot.
|
148
|
+
"oci://<bucket_name>@<namespace>/prefix"
|
149
|
+
include_unlabeled: bool, Optional. Defaults to False.
|
150
|
+
Whether to include unlabeled records or not.
|
151
|
+
|
152
|
+
Returns
|
153
|
+
-------
|
154
|
+
str
|
155
|
+
oci path of the metadata jsonl file.
|
156
|
+
"""
|
157
|
+
|
158
|
+
if not re.match(r"oci://*@*", path):
|
159
|
+
raise InvalidObjectStoragePath(
|
160
|
+
"The parameter `path` is not valid. It must follow the pattern 'oci://<bucket_name>@<namespace>/key'."
|
161
|
+
)
|
162
|
+
url_parse = urlparse(path)
|
163
|
+
bucketname = url_parse.username
|
164
|
+
namespace = url_parse.hostname
|
165
|
+
if not bucketname:
|
166
|
+
raise InvalidObjectStoragePath(
|
167
|
+
f"The parameter `path` is not valid. The bucket name ({bucketname}) was not found. It must follow the pattern 'oci://<bucket_name>@<namespace>/key'."
|
168
|
+
)
|
169
|
+
if not namespace:
|
170
|
+
raise InvalidObjectStoragePath(
|
171
|
+
f"The parameter `path` is not valid. The name space ({namespace}) was not found. It must follow the pattern 'oci://<bucket_name>@<namespace>/key'."
|
172
|
+
)
|
173
|
+
|
174
|
+
prefix = url_parse.path.strip("/")
|
175
|
+
self.client = self.dls_cp_client
|
176
|
+
|
177
|
+
if not prefix.endswith("/"):
|
178
|
+
prefix = prefix + "/"
|
179
|
+
os_snapshot_export_detail = ObjectStorageSnapshotExportDetails(
|
180
|
+
export_type="OBJECT_STORAGE",
|
181
|
+
namespace=namespace,
|
182
|
+
bucket=bucketname,
|
183
|
+
prefix=prefix,
|
184
|
+
)
|
185
|
+
|
186
|
+
snapshot_detail = SnapshotDatasetDetails(
|
187
|
+
are_annotations_included=True,
|
188
|
+
are_unannotated_records_included=include_unlabeled,
|
189
|
+
export_details=os_snapshot_export_detail,
|
190
|
+
)
|
191
|
+
try:
|
192
|
+
snapshot_detail_response = self.dls_cp_client.snapshot_dataset(
|
193
|
+
dataset_id, snapshot_detail
|
194
|
+
)
|
195
|
+
except Exception as error:
|
196
|
+
if dataset_id not in self.list_dataset().index:
|
197
|
+
raise ValueError(
|
198
|
+
"The parameter `dataset_id` is invalid. "
|
199
|
+
"Use the `.list_dataset()` method to obtain a list of all available datasets."
|
200
|
+
)
|
201
|
+
raise error
|
202
|
+
|
203
|
+
res_work_request = self._wait_for_work_request(snapshot_detail_response)
|
204
|
+
|
205
|
+
metadata = res_work_request.data.resources[1].metadata
|
206
|
+
return ObjectStorageDetails(
|
207
|
+
metadata["BUCKET"], metadata["NAMESPACE"], metadata["OBJECT"]
|
208
|
+
).path
|
209
|
+
|
210
|
+
def _wait_for_work_request(self, snapshot_detail_response):
|
211
|
+
successful_state = WorkRequest.STATUS_SUCCEEDED
|
212
|
+
wait_for_states = (
|
213
|
+
WorkRequest.STATUS_CANCELED,
|
214
|
+
WorkRequest.STATUS_CANCELING,
|
215
|
+
WorkRequest.STATUS_FAILED,
|
216
|
+
)
|
217
|
+
work_request_id = snapshot_detail_response.headers["opc-work-request-id"]
|
218
|
+
work_request_log_entires = self.dls_cp_client.list_work_request_logs(
|
219
|
+
work_request_id
|
220
|
+
).data.items
|
221
|
+
i = 0
|
222
|
+
res_work_request = self.dls_cp_client.get_work_request(work_request_id)
|
223
|
+
self._num_progress_bar = NUM_PROGRESS_BAR
|
224
|
+
with utils.get_progress_bar(self._num_progress_bar) as progress:
|
225
|
+
while len(work_request_log_entires) <= self._num_progress_bar - 1:
|
226
|
+
new_work_request_log_entires = work_request_log_entires[i:]
|
227
|
+
for work_request_log_entry in new_work_request_log_entires:
|
228
|
+
progress.update(work_request_log_entry.message)
|
229
|
+
i += 1
|
230
|
+
if (
|
231
|
+
len(work_request_log_entires) == self._num_progress_bar - 1
|
232
|
+
or res_work_request.data.status in wait_for_states
|
233
|
+
):
|
234
|
+
progress.update(work_request_log_entires[-1].message)
|
235
|
+
if res_work_request.data.status != successful_state:
|
236
|
+
raise Exception(work_request_log_entires[-1].message)
|
237
|
+
else:
|
238
|
+
break
|
239
|
+
time.sleep(i)
|
240
|
+
work_request_log_entires = self.dls_cp_client.list_work_request_logs(
|
241
|
+
work_request_id
|
242
|
+
).data.items
|
243
|
+
res_work_request = self.dls_cp_client.get_work_request(work_request_id)
|
244
|
+
return res_work_request
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from abc import ABC, abstractmethod
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
|
11
|
+
class Loader(ABC):
|
12
|
+
"""Data Loader Interface."""
|
13
|
+
|
14
|
+
@abstractmethod
|
15
|
+
def load(self, **kwargs) -> Any:
|
16
|
+
pass
|
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from abc import ABC, abstractmethod
|
8
|
+
from typing import Any
|
9
|
+
|
10
|
+
|
11
|
+
class Parser(ABC):
|
12
|
+
"""Data Parser Interface."""
|
13
|
+
|
14
|
+
@abstractmethod
|
15
|
+
def parse(self) -> Any:
|
16
|
+
pass
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from abc import ABC, abstractmethod
|
8
|
+
from typing import Any
|
9
|
+
from ads.common.serializer import Serializable
|
10
|
+
|
11
|
+
|
12
|
+
class Reader(ABC):
|
13
|
+
"""Data Reader Interface."""
|
14
|
+
|
15
|
+
def info(self) -> Serializable:
|
16
|
+
NotImplementedError(
|
17
|
+
f"The class {self.__class__.__name__} did not implement the required method "
|
18
|
+
"`info()`. Contact the class maintainer."
|
19
|
+
)
|
20
|
+
|
21
|
+
@abstractmethod
|
22
|
+
def read(self) -> Any:
|
23
|
+
pass
|
@@ -0,0 +1,241 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import os
|
8
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
9
|
+
from io import BytesIO
|
10
|
+
from typing import Any, Dict, List, Union
|
11
|
+
|
12
|
+
import fsspec
|
13
|
+
import PIL
|
14
|
+
from ads.common import auth as authutil
|
15
|
+
from ads.data_labeling.constants import DatasetType
|
16
|
+
from ads.data_labeling.interface.loader import Loader
|
17
|
+
from ads.text_dataset.dataset import TextDatasetFactory, backends
|
18
|
+
from PIL import Image
|
19
|
+
|
20
|
+
THREAD_POOL_MAX_WORKERS = 10
|
21
|
+
|
22
|
+
|
23
|
+
class FileLoader:
|
24
|
+
"""FileLoader Base Class.
|
25
|
+
|
26
|
+
Attributes:
|
27
|
+
----------
|
28
|
+
auth: (dict, optional). Defaults to None.
|
29
|
+
The default authetication is set using `ads.set_auth` API. If you need to override the
|
30
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
31
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
32
|
+
|
33
|
+
Examples
|
34
|
+
--------
|
35
|
+
>>> from ads.data_labeling.loader.file_loader import FileLoader
|
36
|
+
>>> import oci
|
37
|
+
>>> import os
|
38
|
+
>>> from ads.common import auth as authutil
|
39
|
+
>>> path = "path/to/your_text_file.txt"
|
40
|
+
>>> file_content = FileLoader(auth=authutil.api_keys()).load(path)
|
41
|
+
"""
|
42
|
+
|
43
|
+
def __init__(self, auth: Dict = None) -> "FileLoader":
|
44
|
+
"""Initiates a FileLoader instance.
|
45
|
+
|
46
|
+
Parameters
|
47
|
+
----------
|
48
|
+
auth: (dict, optional). Defaults to None.
|
49
|
+
The default authetication is set using `ads.set_auth` API. If you need to override the
|
50
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
51
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
52
|
+
"""
|
53
|
+
self.auth = auth or authutil.default_signer()
|
54
|
+
|
55
|
+
def load(self, path: str, **kwargs) -> BytesIO:
|
56
|
+
"""Loads the file content from the path.
|
57
|
+
|
58
|
+
Parameters
|
59
|
+
----------
|
60
|
+
path: str
|
61
|
+
The file path, can be local or object storage path.
|
62
|
+
kwargs:
|
63
|
+
Nothing.
|
64
|
+
|
65
|
+
Returns
|
66
|
+
-------
|
67
|
+
BytesIO
|
68
|
+
The data in BytesIO format.
|
69
|
+
"""
|
70
|
+
data = None
|
71
|
+
try:
|
72
|
+
with fsspec.open(path, **self.auth) as f:
|
73
|
+
data = f.read()
|
74
|
+
except FileNotFoundError:
|
75
|
+
raise FileNotFoundError(f"{path} not found.")
|
76
|
+
except Exception as e:
|
77
|
+
raise e
|
78
|
+
|
79
|
+
return data
|
80
|
+
|
81
|
+
def bulk_load(self, paths: List[str], **kwargs) -> Dict[str, Any]:
|
82
|
+
"""Loads the files content from the list of paths.
|
83
|
+
The ThreadPoolExecutor is used to load the files in parallel threads.
|
84
|
+
|
85
|
+
Parameters
|
86
|
+
----------
|
87
|
+
paths: List[str]
|
88
|
+
The list of file paths, can be local or object storage paths.
|
89
|
+
|
90
|
+
Returns
|
91
|
+
-------
|
92
|
+
Dict[str, Any]
|
93
|
+
The map between file path and file content.
|
94
|
+
"""
|
95
|
+
result = {}
|
96
|
+
if not paths or not isinstance(paths, list) or len(paths) == 0:
|
97
|
+
return result
|
98
|
+
|
99
|
+
with ThreadPoolExecutor(max_workers=THREAD_POOL_MAX_WORKERS) as pool:
|
100
|
+
futures = {pool.submit(self.load, path, **kwargs): path for path in paths}
|
101
|
+
result = {futures[task]: task.result() for task in as_completed(futures)}
|
102
|
+
return result
|
103
|
+
|
104
|
+
|
105
|
+
class TextFileLoader(FileLoader):
|
106
|
+
"""
|
107
|
+
TextFileLoader class which loads text files.
|
108
|
+
|
109
|
+
Examples
|
110
|
+
--------
|
111
|
+
>>> from ads.data_labeling import TextFileLoader
|
112
|
+
>>> import oci
|
113
|
+
>>> import os
|
114
|
+
>>> from ads.common import auth as authutil
|
115
|
+
>>> path = "path/to/your_text_file.txt"
|
116
|
+
>>> file_content = TextFileLoader(auth=authutil.api_keys()).load(path)
|
117
|
+
"""
|
118
|
+
|
119
|
+
def load(
|
120
|
+
self, path: str, backend: Union[str, backends.Base] = "default", **kwargs
|
121
|
+
) -> str:
|
122
|
+
"""Loads the content from the path.
|
123
|
+
|
124
|
+
Parameters
|
125
|
+
----------
|
126
|
+
path: str
|
127
|
+
Text file path, can be local or object storage path.
|
128
|
+
backend: Union[str, backends.Base]
|
129
|
+
Default to "default". Valid options are "default" and "tika" or
|
130
|
+
ads.text_dataset.backends.Base, ads.text_dataset.backends.Tika
|
131
|
+
kwargs:
|
132
|
+
encoding: (str, optional). Defaults to 'utf-8'.
|
133
|
+
Encoding for text files. Used only to extract the content of the text dataset contents.
|
134
|
+
|
135
|
+
Returns
|
136
|
+
-------
|
137
|
+
str
|
138
|
+
Content of the text file.
|
139
|
+
"""
|
140
|
+
format = os.path.splitext(path)[1].replace(".", "")
|
141
|
+
try:
|
142
|
+
content = next(
|
143
|
+
TextDatasetFactory.format(format.lower())
|
144
|
+
.backend(backend)
|
145
|
+
.read_text(path, storage_options=self.auth, **kwargs)
|
146
|
+
)[0]
|
147
|
+
except FileNotFoundError:
|
148
|
+
raise FileNotFoundError(f"{path} not found.")
|
149
|
+
except Exception as e:
|
150
|
+
raise e
|
151
|
+
return content
|
152
|
+
|
153
|
+
|
154
|
+
class ImageFileLoader(FileLoader):
|
155
|
+
"""
|
156
|
+
ImageFileLoader class which loads image files.
|
157
|
+
|
158
|
+
Examples
|
159
|
+
--------
|
160
|
+
>>> from ads.data_labeling import ImageFileLoader
|
161
|
+
>>> import oci
|
162
|
+
>>> import os
|
163
|
+
>>> from ads.common import auth as authutil
|
164
|
+
>>> path = "path/to/image.png"
|
165
|
+
>>> image = ImageFileLoader(auth=authutil.api_keys()).load(path)
|
166
|
+
"""
|
167
|
+
|
168
|
+
def load(self, path: str, **kwargs) -> PIL.ImageFile.ImageFile:
|
169
|
+
"""Loads the image from the path.
|
170
|
+
|
171
|
+
Parameters
|
172
|
+
----------
|
173
|
+
path: str
|
174
|
+
Image file path, can be local or object storage path.
|
175
|
+
kwargs:
|
176
|
+
Nothing.
|
177
|
+
|
178
|
+
Returns
|
179
|
+
-------
|
180
|
+
PIL.ImageFile.ImageFile
|
181
|
+
Image opened by Pillow.
|
182
|
+
"""
|
183
|
+
data = None
|
184
|
+
data = super().load(path=path)
|
185
|
+
|
186
|
+
return Image.open(BytesIO(data))
|
187
|
+
|
188
|
+
|
189
|
+
class FileLoaderFactory:
|
190
|
+
"""FileLoaderFactory class to create/register FileLoaders."""
|
191
|
+
|
192
|
+
_loaders = {
|
193
|
+
DatasetType.TEXT: TextFileLoader,
|
194
|
+
DatasetType.IMAGE: ImageFileLoader,
|
195
|
+
DatasetType.DOCUMENT: FileLoader,
|
196
|
+
}
|
197
|
+
|
198
|
+
@staticmethod
|
199
|
+
def loader(dataset_type: str, auth: Dict = None) -> FileLoader:
|
200
|
+
"""Gets the loader based on the dataset_type.
|
201
|
+
|
202
|
+
Parameters
|
203
|
+
----------
|
204
|
+
dataset_type: str
|
205
|
+
Dataset type. Currently supports TEXT, IMAGE and DOCUMENT.
|
206
|
+
auth: (dict, optional). Defaults to None.
|
207
|
+
The default authetication is set using `ads.set_auth` API. If you need to override the
|
208
|
+
default, use the `ads.common.auth.api_keys` or `ads.common.auth.resource_principal` to create appropriate
|
209
|
+
authentication signer and kwargs required to instantiate IdentityClient object.
|
210
|
+
|
211
|
+
Returns
|
212
|
+
-------
|
213
|
+
FileLoader
|
214
|
+
A FileLoader instance corresponding to the dataset_type.
|
215
|
+
"""
|
216
|
+
if not dataset_type in FileLoaderFactory._loaders:
|
217
|
+
raise ValueError(
|
218
|
+
f"The wrong dataset type has been provided. "
|
219
|
+
f"Supported dataset types are: `{DatasetType.TEXT}`, "
|
220
|
+
f"`{DatasetType.IMAGE}` and `{DatasetType.DOCUMENT}`."
|
221
|
+
)
|
222
|
+
|
223
|
+
return FileLoaderFactory._loaders[dataset_type](auth=auth)
|
224
|
+
|
225
|
+
@classmethod
|
226
|
+
def register(cls, dataset_type: str, loader: Loader) -> None:
|
227
|
+
"""Registers a new loader for a given dataset_type.
|
228
|
+
|
229
|
+
Parameters
|
230
|
+
----------
|
231
|
+
dataset_type: str
|
232
|
+
Dataset type. Currently supports TEXT and IMAGE.
|
233
|
+
loader: Loader
|
234
|
+
A Loader class which supports loading content of the given dataset_type.
|
235
|
+
|
236
|
+
Returns
|
237
|
+
-------
|
238
|
+
None
|
239
|
+
Nothing.
|
240
|
+
"""
|
241
|
+
cls._parsers[dataset_type] = loader
|
@@ -0,0 +1,110 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2024 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from dataclasses import asdict, dataclass, field
|
8
|
+
from typing import Any, Dict, List
|
9
|
+
|
10
|
+
import pandas as pd
|
11
|
+
from ads.common.object_storage_details import ObjectStorageDetails
|
12
|
+
from ads.common.serializer import DataClassSerializable
|
13
|
+
from oci.data_labeling_service_dataplane.models.dataset import Dataset as OCIDLSDataset
|
14
|
+
|
15
|
+
|
16
|
+
@dataclass
|
17
|
+
class Metadata(DataClassSerializable):
|
18
|
+
"""The class that representing the labeled dataset metadata.
|
19
|
+
|
20
|
+
Attributes
|
21
|
+
----------
|
22
|
+
source_path: str
|
23
|
+
Contains information on where all the source data(image/text/document) stores.
|
24
|
+
records_path: str
|
25
|
+
Contains information on where records jsonl file stores.
|
26
|
+
labels: List
|
27
|
+
List of classes/labels for the dataset.
|
28
|
+
dataset_name: str
|
29
|
+
Dataset display name on the Data Labeling Service console.
|
30
|
+
compartment_id: str
|
31
|
+
Compartment id of the labeled dataset.
|
32
|
+
dataset_id: str
|
33
|
+
Dataset id.
|
34
|
+
annotation_type: str
|
35
|
+
Type of the labeling/annotation task. Currently supports SINGLE_LABEL,
|
36
|
+
MULTI_LABEL, ENTITY_EXTRACTION, BOUNDING_BOX.
|
37
|
+
dataset_type: str
|
38
|
+
Type of the dataset. Currently supports Text, Image, DOCUMENT.
|
39
|
+
"""
|
40
|
+
|
41
|
+
source_path: str = ""
|
42
|
+
records_path: str = ""
|
43
|
+
labels: List[str] = field(default_factory=list)
|
44
|
+
dataset_name: str = ""
|
45
|
+
compartment_id: str = ""
|
46
|
+
dataset_id: str = ""
|
47
|
+
annotation_type: str = ""
|
48
|
+
dataset_type: str = ""
|
49
|
+
|
50
|
+
def to_dict(self) -> Dict:
|
51
|
+
"""Converts to dictionary representation.
|
52
|
+
|
53
|
+
Returns
|
54
|
+
-------
|
55
|
+
Dict
|
56
|
+
The metadata in dictionary type.
|
57
|
+
"""
|
58
|
+
return asdict(self)
|
59
|
+
|
60
|
+
def __repr__(self):
|
61
|
+
"""Show the Metadata in yaml format."""
|
62
|
+
return self.to_yaml()
|
63
|
+
|
64
|
+
def to_dataframe(self) -> pd.DataFrame:
|
65
|
+
"""
|
66
|
+
Converts the metadata to dataframe format.
|
67
|
+
|
68
|
+
Returns
|
69
|
+
-------
|
70
|
+
pandas.DataFrame
|
71
|
+
The metadata in Pandas dataframe format.
|
72
|
+
"""
|
73
|
+
return pd.DataFrame({"": self.to_dict()})
|
74
|
+
|
75
|
+
def _repr_html_(self):
|
76
|
+
"""Shows metadata in dataframe format."""
|
77
|
+
return (
|
78
|
+
self.to_dataframe().style.set_properties(**{"margin-left": "0px"}).to_html()
|
79
|
+
)
|
80
|
+
|
81
|
+
@classmethod
|
82
|
+
def from_dls_dataset(cls, dataset: OCIDLSDataset) -> "Metadata":
|
83
|
+
"""Contructs a Metadata instance from OCI DLS dataset.
|
84
|
+
|
85
|
+
Parameters
|
86
|
+
----------
|
87
|
+
dataset: OCIDLSDataset
|
88
|
+
OCIDLSDataset object.
|
89
|
+
|
90
|
+
Returns
|
91
|
+
-------
|
92
|
+
Metadata
|
93
|
+
The ads labeled dataset metadata instance.
|
94
|
+
"""
|
95
|
+
oci_labels = [d.name for d in dataset.label_set.items]
|
96
|
+
oci_source_path = ObjectStorageDetails(
|
97
|
+
dataset.dataset_source_details.bucket,
|
98
|
+
dataset.dataset_source_details.namespace,
|
99
|
+
dataset.dataset_source_details.prefix,
|
100
|
+
).path
|
101
|
+
|
102
|
+
return Metadata(
|
103
|
+
annotation_type=dataset.annotation_format,
|
104
|
+
dataset_name=dataset.display_name,
|
105
|
+
source_path=oci_source_path,
|
106
|
+
labels=oci_labels,
|
107
|
+
compartment_id=dataset.compartment_id,
|
108
|
+
dataset_id=dataset.id,
|
109
|
+
dataset_type=dataset.dataset_format_details.format_type,
|
110
|
+
)
|