oracle-ads 2.13.7__py3-none-any.whl → 2.13.9rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {oracle_ads-2.13.7.dist-info → oracle_ads-2.13.9rc0.dist-info}/METADATA +151 -151
- oracle_ads-2.13.9rc0.dist-info/RECORD +9 -0
- {oracle_ads-2.13.7.dist-info → oracle_ads-2.13.9rc0.dist-info}/WHEEL +2 -1
- {oracle_ads-2.13.7.dist-info → oracle_ads-2.13.9rc0.dist-info}/entry_points.txt +1 -2
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +1 -0
- ads/aqua/__init__.py +0 -40
- ads/aqua/app.py +0 -506
- ads/aqua/cli.py +0 -96
- ads/aqua/client/__init__.py +0 -3
- ads/aqua/client/client.py +0 -836
- ads/aqua/client/openai_client.py +0 -305
- ads/aqua/common/__init__.py +0 -5
- ads/aqua/common/decorator.py +0 -125
- ads/aqua/common/entities.py +0 -266
- ads/aqua/common/enums.py +0 -122
- ads/aqua/common/errors.py +0 -109
- ads/aqua/common/utils.py +0 -1285
- ads/aqua/config/__init__.py +0 -4
- ads/aqua/config/container_config.py +0 -248
- ads/aqua/config/evaluation/__init__.py +0 -4
- ads/aqua/config/evaluation/evaluation_service_config.py +0 -147
- ads/aqua/config/utils/__init__.py +0 -4
- ads/aqua/config/utils/serializer.py +0 -339
- ads/aqua/constants.py +0 -114
- ads/aqua/data.py +0 -14
- ads/aqua/dummy_data/icon.txt +0 -1
- ads/aqua/dummy_data/oci_model_deployments.json +0 -56
- ads/aqua/dummy_data/oci_models.json +0 -1
- ads/aqua/dummy_data/readme.md +0 -26
- ads/aqua/evaluation/__init__.py +0 -8
- ads/aqua/evaluation/constants.py +0 -53
- ads/aqua/evaluation/entities.py +0 -186
- ads/aqua/evaluation/errors.py +0 -70
- ads/aqua/evaluation/evaluation.py +0 -1814
- ads/aqua/extension/__init__.py +0 -42
- ads/aqua/extension/aqua_ws_msg_handler.py +0 -76
- ads/aqua/extension/base_handler.py +0 -90
- ads/aqua/extension/common_handler.py +0 -121
- ads/aqua/extension/common_ws_msg_handler.py +0 -36
- ads/aqua/extension/deployment_handler.py +0 -298
- ads/aqua/extension/deployment_ws_msg_handler.py +0 -54
- ads/aqua/extension/errors.py +0 -30
- ads/aqua/extension/evaluation_handler.py +0 -129
- ads/aqua/extension/evaluation_ws_msg_handler.py +0 -61
- ads/aqua/extension/finetune_handler.py +0 -96
- ads/aqua/extension/model_handler.py +0 -390
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +0 -145
- ads/aqua/extension/models_ws_msg_handler.py +0 -50
- ads/aqua/extension/ui_handler.py +0 -282
- ads/aqua/extension/ui_websocket_handler.py +0 -130
- ads/aqua/extension/utils.py +0 -133
- ads/aqua/finetuning/__init__.py +0 -7
- ads/aqua/finetuning/constants.py +0 -23
- ads/aqua/finetuning/entities.py +0 -181
- ads/aqua/finetuning/finetuning.py +0 -731
- ads/aqua/model/__init__.py +0 -8
- ads/aqua/model/constants.py +0 -60
- ads/aqua/model/entities.py +0 -306
- ads/aqua/model/enums.py +0 -30
- ads/aqua/model/model.py +0 -2079
- ads/aqua/modeldeployment/__init__.py +0 -8
- ads/aqua/modeldeployment/constants.py +0 -10
- ads/aqua/modeldeployment/deployment.py +0 -1324
- ads/aqua/modeldeployment/entities.py +0 -653
- ads/aqua/modeldeployment/inference.py +0 -74
- ads/aqua/modeldeployment/utils.py +0 -543
- ads/aqua/resources/gpu_shapes_index.json +0 -94
- ads/aqua/server/__init__.py +0 -4
- ads/aqua/server/__main__.py +0 -24
- ads/aqua/server/app.py +0 -47
- ads/aqua/server/aqua_spec.yml +0 -1291
- ads/aqua/training/__init__.py +0 -4
- ads/aqua/training/exceptions.py +0 -476
- ads/aqua/ui.py +0 -499
- ads/automl/__init__.py +0 -9
- ads/automl/driver.py +0 -330
- ads/automl/provider.py +0 -975
- ads/bds/__init__.py +0 -5
- ads/bds/auth.py +0 -127
- ads/bds/big_data_service.py +0 -255
- ads/catalog/__init__.py +0 -19
- ads/catalog/model.py +0 -1576
- ads/catalog/notebook.py +0 -461
- ads/catalog/project.py +0 -468
- ads/catalog/summary.py +0 -178
- ads/common/__init__.py +0 -11
- ads/common/analyzer.py +0 -65
- ads/common/artifact/.model-ignore +0 -63
- ads/common/artifact/__init__.py +0 -10
- ads/common/auth.py +0 -1122
- ads/common/card_identifier.py +0 -83
- ads/common/config.py +0 -647
- ads/common/data.py +0 -165
- ads/common/decorator/__init__.py +0 -9
- ads/common/decorator/argument_to_case.py +0 -88
- ads/common/decorator/deprecate.py +0 -69
- ads/common/decorator/require_nonempty_arg.py +0 -65
- ads/common/decorator/runtime_dependency.py +0 -178
- ads/common/decorator/threaded.py +0 -97
- ads/common/decorator/utils.py +0 -35
- ads/common/dsc_file_system.py +0 -303
- ads/common/error.py +0 -14
- ads/common/extended_enum.py +0 -81
- ads/common/function/__init__.py +0 -5
- ads/common/function/fn_util.py +0 -142
- ads/common/function/func_conf.yaml +0 -25
- ads/common/ipython.py +0 -76
- ads/common/model.py +0 -679
- ads/common/model_artifact.py +0 -1759
- ads/common/model_artifact_schema.json +0 -107
- ads/common/model_export_util.py +0 -664
- ads/common/model_metadata.py +0 -24
- ads/common/object_storage_details.py +0 -296
- ads/common/oci_client.py +0 -175
- ads/common/oci_datascience.py +0 -46
- ads/common/oci_logging.py +0 -1144
- ads/common/oci_mixin.py +0 -957
- ads/common/oci_resource.py +0 -136
- ads/common/serializer.py +0 -559
- ads/common/utils.py +0 -1852
- ads/common/word_lists.py +0 -1491
- ads/common/work_request.py +0 -189
- ads/data_labeling/__init__.py +0 -13
- ads/data_labeling/boundingbox.py +0 -253
- ads/data_labeling/constants.py +0 -47
- ads/data_labeling/data_labeling_service.py +0 -244
- ads/data_labeling/interface/__init__.py +0 -5
- ads/data_labeling/interface/loader.py +0 -16
- ads/data_labeling/interface/parser.py +0 -16
- ads/data_labeling/interface/reader.py +0 -23
- ads/data_labeling/loader/__init__.py +0 -5
- ads/data_labeling/loader/file_loader.py +0 -241
- ads/data_labeling/metadata.py +0 -110
- ads/data_labeling/mixin/__init__.py +0 -5
- ads/data_labeling/mixin/data_labeling.py +0 -232
- ads/data_labeling/ner.py +0 -129
- ads/data_labeling/parser/__init__.py +0 -5
- ads/data_labeling/parser/dls_record_parser.py +0 -388
- ads/data_labeling/parser/export_metadata_parser.py +0 -94
- ads/data_labeling/parser/export_record_parser.py +0 -473
- ads/data_labeling/reader/__init__.py +0 -5
- ads/data_labeling/reader/dataset_reader.py +0 -574
- ads/data_labeling/reader/dls_record_reader.py +0 -121
- ads/data_labeling/reader/export_record_reader.py +0 -62
- ads/data_labeling/reader/jsonl_reader.py +0 -75
- ads/data_labeling/reader/metadata_reader.py +0 -203
- ads/data_labeling/reader/record_reader.py +0 -263
- ads/data_labeling/record.py +0 -52
- ads/data_labeling/visualizer/__init__.py +0 -5
- ads/data_labeling/visualizer/image_visualizer.py +0 -525
- ads/data_labeling/visualizer/text_visualizer.py +0 -357
- ads/database/__init__.py +0 -5
- ads/database/connection.py +0 -338
- ads/dataset/__init__.py +0 -10
- ads/dataset/capabilities.md +0 -51
- ads/dataset/classification_dataset.py +0 -339
- ads/dataset/correlation.py +0 -226
- ads/dataset/correlation_plot.py +0 -563
- ads/dataset/dask_series.py +0 -173
- ads/dataset/dataframe_transformer.py +0 -110
- ads/dataset/dataset.py +0 -1979
- ads/dataset/dataset_browser.py +0 -360
- ads/dataset/dataset_with_target.py +0 -995
- ads/dataset/exception.py +0 -25
- ads/dataset/factory.py +0 -987
- ads/dataset/feature_engineering_transformer.py +0 -35
- ads/dataset/feature_selection.py +0 -107
- ads/dataset/forecasting_dataset.py +0 -26
- ads/dataset/helper.py +0 -1450
- ads/dataset/label_encoder.py +0 -99
- ads/dataset/mixin/__init__.py +0 -5
- ads/dataset/mixin/dataset_accessor.py +0 -134
- ads/dataset/pipeline.py +0 -58
- ads/dataset/plot.py +0 -710
- ads/dataset/progress.py +0 -86
- ads/dataset/recommendation.py +0 -297
- ads/dataset/recommendation_transformer.py +0 -502
- ads/dataset/regression_dataset.py +0 -14
- ads/dataset/sampled_dataset.py +0 -1050
- ads/dataset/target.py +0 -98
- ads/dataset/timeseries.py +0 -18
- ads/dbmixin/__init__.py +0 -5
- ads/dbmixin/db_pandas_accessor.py +0 -153
- ads/environment/__init__.py +0 -9
- ads/environment/ml_runtime.py +0 -66
- ads/evaluations/README.md +0 -14
- ads/evaluations/__init__.py +0 -109
- ads/evaluations/evaluation_plot.py +0 -983
- ads/evaluations/evaluator.py +0 -1334
- ads/evaluations/statistical_metrics.py +0 -543
- ads/experiments/__init__.py +0 -9
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +0 -21
- ads/explanations/base_explainer.py +0 -142
- ads/explanations/capabilities.md +0 -83
- ads/explanations/explainer.py +0 -190
- ads/explanations/mlx_global_explainer.py +0 -1050
- ads/explanations/mlx_interface.py +0 -386
- ads/explanations/mlx_local_explainer.py +0 -287
- ads/explanations/mlx_whatif_explainer.py +0 -201
- ads/feature_engineering/__init__.py +0 -20
- ads/feature_engineering/accessor/__init__.py +0 -5
- ads/feature_engineering/accessor/dataframe_accessor.py +0 -535
- ads/feature_engineering/accessor/mixin/__init__.py +0 -5
- ads/feature_engineering/accessor/mixin/correlation.py +0 -166
- ads/feature_engineering/accessor/mixin/eda_mixin.py +0 -266
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +0 -85
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +0 -211
- ads/feature_engineering/accessor/mixin/utils.py +0 -65
- ads/feature_engineering/accessor/series_accessor.py +0 -431
- ads/feature_engineering/adsimage/__init__.py +0 -5
- ads/feature_engineering/adsimage/image.py +0 -192
- ads/feature_engineering/adsimage/image_reader.py +0 -170
- ads/feature_engineering/adsimage/interface/__init__.py +0 -5
- ads/feature_engineering/adsimage/interface/reader.py +0 -19
- ads/feature_engineering/adsstring/__init__.py +0 -7
- ads/feature_engineering/adsstring/oci_language/__init__.py +0 -8
- ads/feature_engineering/adsstring/string/__init__.py +0 -8
- ads/feature_engineering/data_schema.json +0 -57
- ads/feature_engineering/dataset/__init__.py +0 -5
- ads/feature_engineering/dataset/zip_code_data.py +0 -42062
- ads/feature_engineering/exceptions.py +0 -40
- ads/feature_engineering/feature_type/__init__.py +0 -133
- ads/feature_engineering/feature_type/address.py +0 -184
- ads/feature_engineering/feature_type/adsstring/__init__.py +0 -5
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +0 -164
- ads/feature_engineering/feature_type/adsstring/oci_language.py +0 -93
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +0 -5
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +0 -47
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +0 -96
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +0 -221
- ads/feature_engineering/feature_type/adsstring/string.py +0 -258
- ads/feature_engineering/feature_type/base.py +0 -58
- ads/feature_engineering/feature_type/boolean.py +0 -183
- ads/feature_engineering/feature_type/category.py +0 -146
- ads/feature_engineering/feature_type/constant.py +0 -137
- ads/feature_engineering/feature_type/continuous.py +0 -151
- ads/feature_engineering/feature_type/creditcard.py +0 -314
- ads/feature_engineering/feature_type/datetime.py +0 -190
- ads/feature_engineering/feature_type/discrete.py +0 -134
- ads/feature_engineering/feature_type/document.py +0 -43
- ads/feature_engineering/feature_type/gis.py +0 -251
- ads/feature_engineering/feature_type/handler/__init__.py +0 -5
- ads/feature_engineering/feature_type/handler/feature_validator.py +0 -524
- ads/feature_engineering/feature_type/handler/feature_warning.py +0 -319
- ads/feature_engineering/feature_type/handler/warnings.py +0 -128
- ads/feature_engineering/feature_type/integer.py +0 -142
- ads/feature_engineering/feature_type/ip_address.py +0 -144
- ads/feature_engineering/feature_type/ip_address_v4.py +0 -138
- ads/feature_engineering/feature_type/ip_address_v6.py +0 -138
- ads/feature_engineering/feature_type/lat_long.py +0 -256
- ads/feature_engineering/feature_type/object.py +0 -43
- ads/feature_engineering/feature_type/ordinal.py +0 -132
- ads/feature_engineering/feature_type/phone_number.py +0 -135
- ads/feature_engineering/feature_type/string.py +0 -171
- ads/feature_engineering/feature_type/text.py +0 -93
- ads/feature_engineering/feature_type/unknown.py +0 -43
- ads/feature_engineering/feature_type/zip_code.py +0 -164
- ads/feature_engineering/feature_type_manager.py +0 -406
- ads/feature_engineering/schema.py +0 -795
- ads/feature_engineering/utils.py +0 -245
- ads/feature_store/.readthedocs.yaml +0 -19
- ads/feature_store/README.md +0 -65
- ads/feature_store/__init__.py +0 -9
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +0 -339
- ads/feature_store/common/exceptions.py +0 -18
- ads/feature_store/common/spark_session_singleton.py +0 -125
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +0 -72
- ads/feature_store/common/utils/feature_schema_mapper.py +0 -283
- ads/feature_store/common/utils/transformation_utils.py +0 -82
- ads/feature_store/common/utils/utility.py +0 -403
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +0 -129
- ads/feature_store/dataset.py +0 -1230
- ads/feature_store/dataset_job.py +0 -530
- ads/feature_store/docs/Dockerfile +0 -7
- ads/feature_store/docs/Makefile +0 -44
- ads/feature_store/docs/conf.py +0 -28
- ads/feature_store/docs/requirements.txt +0 -14
- ads/feature_store/docs/source/ads.feature_store.query.rst +0 -20
- ads/feature_store/docs/source/cicd.rst +0 -137
- ads/feature_store/docs/source/conf.py +0 -86
- ads/feature_store/docs/source/data_versioning.rst +0 -33
- ads/feature_store/docs/source/dataset.rst +0 -388
- ads/feature_store/docs/source/dataset_job.rst +0 -27
- ads/feature_store/docs/source/demo.rst +0 -70
- ads/feature_store/docs/source/entity.rst +0 -78
- ads/feature_store/docs/source/feature_group.rst +0 -624
- ads/feature_store/docs/source/feature_group_job.rst +0 -29
- ads/feature_store/docs/source/feature_store.rst +0 -122
- ads/feature_store/docs/source/feature_store_class.rst +0 -123
- ads/feature_store/docs/source/feature_validation.rst +0 -66
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +0 -81
- ads/feature_store/docs/source/module.rst +0 -8
- ads/feature_store/docs/source/notebook.rst +0 -94
- ads/feature_store/docs/source/overview.rst +0 -47
- ads/feature_store/docs/source/quickstart.rst +0 -176
- ads/feature_store/docs/source/release_notes.rst +0 -194
- ads/feature_store/docs/source/setup_feature_store.rst +0 -81
- ads/feature_store/docs/source/statistics.rst +0 -58
- ads/feature_store/docs/source/transformation.rst +0 -199
- ads/feature_store/docs/source/ui.rst +0 -65
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +0 -66
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +0 -192
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +0 -338
- ads/feature_store/entity.py +0 -718
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +0 -375
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +0 -316
- ads/feature_store/execution_strategy/execution_strategy.py +0 -113
- ads/feature_store/execution_strategy/execution_strategy_provider.py +0 -47
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +0 -618
- ads/feature_store/feature.py +0 -192
- ads/feature_store/feature_group.py +0 -1494
- ads/feature_store/feature_group_expectation.py +0 -346
- ads/feature_store/feature_group_job.py +0 -602
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +0 -180
- ads/feature_store/feature_option_details.py +0 -50
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +0 -99
- ads/feature_store/feature_store.py +0 -699
- ads/feature_store/feature_store_registrar.py +0 -518
- ads/feature_store/input_feature_detail.py +0 -149
- ads/feature_store/mixin/__init__.py +0 -4
- ads/feature_store/mixin/oci_feature_store.py +0 -145
- ads/feature_store/model_details.py +0 -73
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +0 -266
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +0 -298
- ads/feature_store/query/join.py +0 -161
- ads/feature_store/query/query.py +0 -403
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +0 -57
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +0 -68
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +0 -139
- ads/feature_store/service/oci_dataset_job.py +0 -199
- ads/feature_store/service/oci_entity.py +0 -125
- ads/feature_store/service/oci_feature_group.py +0 -164
- ads/feature_store/service/oci_feature_group_job.py +0 -214
- ads/feature_store/service/oci_feature_store.py +0 -182
- ads/feature_store/service/oci_lineage.py +0 -87
- ads/feature_store/service/oci_transformation.py +0 -104
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +0 -49
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +0 -37
- ads/feature_store/statistics/charts/box_plot.py +0 -148
- ads/feature_store/statistics/charts/frequency_distribution.py +0 -65
- ads/feature_store/statistics/charts/probability_distribution.py +0 -68
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +0 -98
- ads/feature_store/statistics/feature_stat.py +0 -126
- ads/feature_store/statistics/generic_feature_value.py +0 -33
- ads/feature_store/statistics/statistics.py +0 -41
- ads/feature_store/statistics_config.py +0 -101
- ads/feature_store/templates/feature_store_template.yaml +0 -45
- ads/feature_store/transformation.py +0 -499
- ads/feature_store/validation_output.py +0 -57
- ads/hpo/__init__.py +0 -9
- ads/hpo/_imports.py +0 -91
- ads/hpo/ads_search_space.py +0 -439
- ads/hpo/distributions.py +0 -325
- ads/hpo/objective.py +0 -280
- ads/hpo/search_cv.py +0 -1657
- ads/hpo/stopping_criterion.py +0 -75
- ads/hpo/tuner_artifact.py +0 -413
- ads/hpo/utils.py +0 -91
- ads/hpo/validation.py +0 -140
- ads/hpo/visualization/__init__.py +0 -5
- ads/hpo/visualization/_contour.py +0 -23
- ads/hpo/visualization/_edf.py +0 -20
- ads/hpo/visualization/_intermediate_values.py +0 -21
- ads/hpo/visualization/_optimization_history.py +0 -25
- ads/hpo/visualization/_parallel_coordinate.py +0 -169
- ads/hpo/visualization/_param_importances.py +0 -26
- ads/jobs/__init__.py +0 -53
- ads/jobs/ads_job.py +0 -663
- ads/jobs/builders/__init__.py +0 -5
- ads/jobs/builders/base.py +0 -156
- ads/jobs/builders/infrastructure/__init__.py +0 -6
- ads/jobs/builders/infrastructure/base.py +0 -165
- ads/jobs/builders/infrastructure/dataflow.py +0 -1252
- ads/jobs/builders/infrastructure/dsc_job.py +0 -1894
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +0 -1233
- ads/jobs/builders/infrastructure/utils.py +0 -65
- ads/jobs/builders/runtimes/__init__.py +0 -5
- ads/jobs/builders/runtimes/artifact.py +0 -338
- ads/jobs/builders/runtimes/base.py +0 -325
- ads/jobs/builders/runtimes/container_runtime.py +0 -242
- ads/jobs/builders/runtimes/python_runtime.py +0 -1016
- ads/jobs/builders/runtimes/pytorch_runtime.py +0 -204
- ads/jobs/cli.py +0 -104
- ads/jobs/env_var_parser.py +0 -131
- ads/jobs/extension.py +0 -160
- ads/jobs/schema/__init__.py +0 -5
- ads/jobs/schema/infrastructure_schema.json +0 -116
- ads/jobs/schema/job_schema.json +0 -42
- ads/jobs/schema/runtime_schema.json +0 -183
- ads/jobs/schema/validator.py +0 -141
- ads/jobs/serializer.py +0 -296
- ads/jobs/templates/__init__.py +0 -5
- ads/jobs/templates/container.py +0 -6
- ads/jobs/templates/driver_notebook.py +0 -177
- ads/jobs/templates/driver_oci.py +0 -500
- ads/jobs/templates/driver_python.py +0 -48
- ads/jobs/templates/driver_pytorch.py +0 -852
- ads/jobs/templates/driver_utils.py +0 -615
- ads/jobs/templates/hostname_from_env.c +0 -55
- ads/jobs/templates/oci_metrics.py +0 -181
- ads/jobs/utils.py +0 -104
- ads/llm/__init__.py +0 -28
- ads/llm/autogen/__init__.py +0 -2
- ads/llm/autogen/constants.py +0 -15
- ads/llm/autogen/reports/__init__.py +0 -2
- ads/llm/autogen/reports/base.py +0 -67
- ads/llm/autogen/reports/data.py +0 -103
- ads/llm/autogen/reports/session.py +0 -526
- ads/llm/autogen/reports/templates/chat_box.html +0 -13
- ads/llm/autogen/reports/templates/chat_box_lt.html +0 -5
- ads/llm/autogen/reports/templates/chat_box_rt.html +0 -6
- ads/llm/autogen/reports/utils.py +0 -56
- ads/llm/autogen/v02/__init__.py +0 -4
- ads/llm/autogen/v02/client.py +0 -295
- ads/llm/autogen/v02/log_handlers/__init__.py +0 -2
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +0 -83
- ads/llm/autogen/v02/loggers/__init__.py +0 -6
- ads/llm/autogen/v02/loggers/metric_logger.py +0 -320
- ads/llm/autogen/v02/loggers/session_logger.py +0 -580
- ads/llm/autogen/v02/loggers/utils.py +0 -86
- ads/llm/autogen/v02/runtime_logging.py +0 -163
- ads/llm/chain.py +0 -268
- ads/llm/chat_template.py +0 -31
- ads/llm/deploy.py +0 -63
- ads/llm/guardrails/__init__.py +0 -5
- ads/llm/guardrails/base.py +0 -442
- ads/llm/guardrails/huggingface.py +0 -44
- ads/llm/langchain/__init__.py +0 -5
- ads/llm/langchain/plugins/__init__.py +0 -5
- ads/llm/langchain/plugins/chat_models/__init__.py +0 -5
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +0 -1027
- ads/llm/langchain/plugins/embeddings/__init__.py +0 -4
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +0 -184
- ads/llm/langchain/plugins/llms/__init__.py +0 -5
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +0 -979
- ads/llm/requirements.txt +0 -3
- ads/llm/serialize.py +0 -219
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +0 -153
- ads/llm/serializers/runnable_parallel.py +0 -27
- ads/llm/templates/score_chain.jinja2 +0 -155
- ads/llm/templates/tool_chat_template_hermes.jinja +0 -130
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +0 -94
- ads/model/__init__.py +0 -52
- ads/model/artifact.py +0 -573
- ads/model/artifact_downloader.py +0 -254
- ads/model/artifact_uploader.py +0 -267
- ads/model/base_properties.py +0 -238
- ads/model/common/.model-ignore +0 -66
- ads/model/common/__init__.py +0 -5
- ads/model/common/utils.py +0 -142
- ads/model/datascience_model.py +0 -2635
- ads/model/deployment/__init__.py +0 -20
- ads/model/deployment/common/__init__.py +0 -5
- ads/model/deployment/common/utils.py +0 -308
- ads/model/deployment/model_deployer.py +0 -466
- ads/model/deployment/model_deployment.py +0 -1846
- ads/model/deployment/model_deployment_infrastructure.py +0 -671
- ads/model/deployment/model_deployment_properties.py +0 -493
- ads/model/deployment/model_deployment_runtime.py +0 -838
- ads/model/extractor/__init__.py +0 -5
- ads/model/extractor/automl_extractor.py +0 -74
- ads/model/extractor/embedding_onnx_extractor.py +0 -80
- ads/model/extractor/huggingface_extractor.py +0 -88
- ads/model/extractor/keras_extractor.py +0 -84
- ads/model/extractor/lightgbm_extractor.py +0 -93
- ads/model/extractor/model_info_extractor.py +0 -114
- ads/model/extractor/model_info_extractor_factory.py +0 -105
- ads/model/extractor/pytorch_extractor.py +0 -87
- ads/model/extractor/sklearn_extractor.py +0 -112
- ads/model/extractor/spark_extractor.py +0 -89
- ads/model/extractor/tensorflow_extractor.py +0 -85
- ads/model/extractor/xgboost_extractor.py +0 -94
- ads/model/framework/__init__.py +0 -5
- ads/model/framework/automl_model.py +0 -178
- ads/model/framework/embedding_onnx_model.py +0 -438
- ads/model/framework/huggingface_model.py +0 -399
- ads/model/framework/lightgbm_model.py +0 -266
- ads/model/framework/pytorch_model.py +0 -266
- ads/model/framework/sklearn_model.py +0 -250
- ads/model/framework/spark_model.py +0 -326
- ads/model/framework/tensorflow_model.py +0 -254
- ads/model/framework/xgboost_model.py +0 -258
- ads/model/generic_model.py +0 -3518
- ads/model/model_artifact_boilerplate/README.md +0 -381
- ads/model/model_artifact_boilerplate/__init__.py +0 -5
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +0 -5
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +0 -427
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +0 -2
- ads/model/model_artifact_boilerplate/runtime.yaml +0 -7
- ads/model/model_artifact_boilerplate/score.py +0 -61
- ads/model/model_file_description_schema.json +0 -68
- ads/model/model_introspect.py +0 -331
- ads/model/model_metadata.py +0 -1810
- ads/model/model_metadata_mixin.py +0 -460
- ads/model/model_properties.py +0 -63
- ads/model/model_version_set.py +0 -739
- ads/model/runtime/__init__.py +0 -5
- ads/model/runtime/env_info.py +0 -306
- ads/model/runtime/model_deployment_details.py +0 -37
- ads/model/runtime/model_provenance_details.py +0 -58
- ads/model/runtime/runtime_info.py +0 -81
- ads/model/runtime/schemas/inference_env_info_schema.yaml +0 -16
- ads/model/runtime/schemas/model_provenance_schema.yaml +0 -36
- ads/model/runtime/schemas/training_env_info_schema.yaml +0 -16
- ads/model/runtime/utils.py +0 -201
- ads/model/serde/__init__.py +0 -5
- ads/model/serde/common.py +0 -40
- ads/model/serde/model_input.py +0 -547
- ads/model/serde/model_serializer.py +0 -1184
- ads/model/service/__init__.py +0 -5
- ads/model/service/oci_datascience_model.py +0 -1076
- ads/model/service/oci_datascience_model_deployment.py +0 -500
- ads/model/service/oci_datascience_model_version_set.py +0 -176
- ads/model/transformer/__init__.py +0 -5
- ads/model/transformer/onnx_transformer.py +0 -324
- ads/mysqldb/__init__.py +0 -5
- ads/mysqldb/mysql_db.py +0 -227
- ads/opctl/__init__.py +0 -18
- ads/opctl/anomaly_detection.py +0 -11
- ads/opctl/backend/__init__.py +0 -5
- ads/opctl/backend/ads_dataflow.py +0 -353
- ads/opctl/backend/ads_ml_job.py +0 -710
- ads/opctl/backend/ads_ml_pipeline.py +0 -164
- ads/opctl/backend/ads_model_deployment.py +0 -209
- ads/opctl/backend/base.py +0 -146
- ads/opctl/backend/local.py +0 -1053
- ads/opctl/backend/marketplace/__init__.py +0 -9
- ads/opctl/backend/marketplace/helm_helper.py +0 -173
- ads/opctl/backend/marketplace/local_marketplace.py +0 -271
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +0 -71
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +0 -44
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +0 -24
- ads/opctl/backend/marketplace/marketplace_utils.py +0 -212
- ads/opctl/backend/marketplace/models/__init__.py +0 -5
- ads/opctl/backend/marketplace/models/bearer_token.py +0 -94
- ads/opctl/backend/marketplace/models/marketplace_type.py +0 -70
- ads/opctl/backend/marketplace/models/ocir_details.py +0 -56
- ads/opctl/backend/marketplace/prerequisite_checker.py +0 -238
- ads/opctl/cli.py +0 -707
- ads/opctl/cmds.py +0 -869
- ads/opctl/conda/__init__.py +0 -5
- ads/opctl/conda/cli.py +0 -193
- ads/opctl/conda/cmds.py +0 -749
- ads/opctl/conda/config.yaml +0 -34
- ads/opctl/conda/manifest_template.yaml +0 -13
- ads/opctl/conda/multipart_uploader.py +0 -188
- ads/opctl/conda/pack.py +0 -89
- ads/opctl/config/__init__.py +0 -5
- ads/opctl/config/base.py +0 -57
- ads/opctl/config/diagnostics/__init__.py +0 -5
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +0 -62
- ads/opctl/config/merger.py +0 -255
- ads/opctl/config/resolver.py +0 -297
- ads/opctl/config/utils.py +0 -79
- ads/opctl/config/validator.py +0 -17
- ads/opctl/config/versioner.py +0 -68
- ads/opctl/config/yaml_parsers/__init__.py +0 -7
- ads/opctl/config/yaml_parsers/base.py +0 -58
- ads/opctl/config/yaml_parsers/distributed/__init__.py +0 -7
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +0 -201
- ads/opctl/constants.py +0 -66
- ads/opctl/decorator/__init__.py +0 -5
- ads/opctl/decorator/common.py +0 -129
- ads/opctl/diagnostics/__init__.py +0 -5
- ads/opctl/diagnostics/__main__.py +0 -25
- ads/opctl/diagnostics/check_distributed_job_requirements.py +0 -212
- ads/opctl/diagnostics/check_requirements.py +0 -144
- ads/opctl/diagnostics/requirement_exception.py +0 -9
- ads/opctl/distributed/README.md +0 -109
- ads/opctl/distributed/__init__.py +0 -5
- ads/opctl/distributed/certificates.py +0 -32
- ads/opctl/distributed/cli.py +0 -207
- ads/opctl/distributed/cmds.py +0 -731
- ads/opctl/distributed/common/__init__.py +0 -5
- ads/opctl/distributed/common/abstract_cluster_provider.py +0 -449
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +0 -88
- ads/opctl/distributed/common/cluster_config_helper.py +0 -103
- ads/opctl/distributed/common/cluster_provider_factory.py +0 -21
- ads/opctl/distributed/common/cluster_runner.py +0 -54
- ads/opctl/distributed/common/framework_factory.py +0 -29
- ads/opctl/docker/Dockerfile.job +0 -103
- ads/opctl/docker/Dockerfile.job.arm +0 -107
- ads/opctl/docker/Dockerfile.job.gpu +0 -175
- ads/opctl/docker/base-env.yaml +0 -13
- ads/opctl/docker/cuda.repo +0 -6
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +0 -41
- ads/opctl/docker/operator/Dockerfile.gpu +0 -85
- ads/opctl/docker/operator/cuda.repo +0 -6
- ads/opctl/docker/operator/environment.yaml +0 -8
- ads/opctl/forecast.py +0 -11
- ads/opctl/index.yaml +0 -3
- ads/opctl/model/__init__.py +0 -5
- ads/opctl/model/cli.py +0 -65
- ads/opctl/model/cmds.py +0 -73
- ads/opctl/operator/README.md +0 -4
- ads/opctl/operator/__init__.py +0 -31
- ads/opctl/operator/cli.py +0 -344
- ads/opctl/operator/cmd.py +0 -596
- ads/opctl/operator/common/__init__.py +0 -5
- ads/opctl/operator/common/backend_factory.py +0 -460
- ads/opctl/operator/common/const.py +0 -27
- ads/opctl/operator/common/data/synthetic.csv +0 -16001
- ads/opctl/operator/common/dictionary_merger.py +0 -148
- ads/opctl/operator/common/errors.py +0 -42
- ads/opctl/operator/common/operator_config.py +0 -99
- ads/opctl/operator/common/operator_loader.py +0 -811
- ads/opctl/operator/common/operator_schema.yaml +0 -130
- ads/opctl/operator/common/operator_yaml_generator.py +0 -152
- ads/opctl/operator/common/utils.py +0 -208
- ads/opctl/operator/lowcode/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/MLoperator +0 -16
- ads/opctl/operator/lowcode/anomaly/README.md +0 -207
- ads/opctl/operator/lowcode/anomaly/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/__main__.py +0 -103
- ads/opctl/operator/lowcode/anomaly/cmd.py +0 -35
- ads/opctl/operator/lowcode/anomaly/const.py +0 -167
- ads/opctl/operator/lowcode/anomaly/environment.yaml +0 -10
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +0 -146
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +0 -162
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +0 -99
- ads/opctl/operator/lowcode/anomaly/model/autots.py +0 -115
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +0 -404
- ads/opctl/operator/lowcode/anomaly/model/factory.py +0 -110
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +0 -78
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +0 -78
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +0 -120
- ads/opctl/operator/lowcode/anomaly/model/tods.py +0 -119
- ads/opctl/operator/lowcode/anomaly/operator_config.py +0 -127
- ads/opctl/operator/lowcode/anomaly/schema.yaml +0 -401
- ads/opctl/operator/lowcode/anomaly/utils.py +0 -88
- ads/opctl/operator/lowcode/common/__init__.py +0 -5
- ads/opctl/operator/lowcode/common/const.py +0 -10
- ads/opctl/operator/lowcode/common/data.py +0 -116
- ads/opctl/operator/lowcode/common/errors.py +0 -47
- ads/opctl/operator/lowcode/common/transformations.py +0 -296
- ads/opctl/operator/lowcode/common/utils.py +0 -293
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +0 -13
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +0 -30
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +0 -5
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +0 -116
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +0 -85
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +0 -15
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +0 -4
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +0 -32
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +0 -43
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +0 -120
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +0 -34
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +0 -386
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +0 -160
- ads/opctl/operator/lowcode/forecast/MLoperator +0 -25
- ads/opctl/operator/lowcode/forecast/README.md +0 -209
- ads/opctl/operator/lowcode/forecast/__init__.py +0 -5
- ads/opctl/operator/lowcode/forecast/__main__.py +0 -89
- ads/opctl/operator/lowcode/forecast/cmd.py +0 -40
- ads/opctl/operator/lowcode/forecast/const.py +0 -92
- ads/opctl/operator/lowcode/forecast/environment.yaml +0 -20
- ads/opctl/operator/lowcode/forecast/errors.py +0 -26
- ads/opctl/operator/lowcode/forecast/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/forecast/model/arima.py +0 -279
- ads/opctl/operator/lowcode/forecast/model/automlx.py +0 -542
- ads/opctl/operator/lowcode/forecast/model/autots.py +0 -312
- ads/opctl/operator/lowcode/forecast/model/base_model.py +0 -863
- ads/opctl/operator/lowcode/forecast/model/factory.py +0 -106
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +0 -492
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +0 -243
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +0 -486
- ads/opctl/operator/lowcode/forecast/model/prophet.py +0 -445
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +0 -244
- ads/opctl/operator/lowcode/forecast/operator_config.py +0 -234
- ads/opctl/operator/lowcode/forecast/schema.yaml +0 -506
- ads/opctl/operator/lowcode/forecast/utils.py +0 -413
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +0 -7
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +0 -285
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +0 -246
- ads/opctl/operator/lowcode/pii/MLoperator +0 -17
- ads/opctl/operator/lowcode/pii/README.md +0 -208
- ads/opctl/operator/lowcode/pii/__init__.py +0 -5
- ads/opctl/operator/lowcode/pii/__main__.py +0 -78
- ads/opctl/operator/lowcode/pii/cmd.py +0 -39
- ads/opctl/operator/lowcode/pii/constant.py +0 -84
- ads/opctl/operator/lowcode/pii/environment.yaml +0 -17
- ads/opctl/operator/lowcode/pii/errors.py +0 -27
- ads/opctl/operator/lowcode/pii/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/pii/model/factory.py +0 -82
- ads/opctl/operator/lowcode/pii/model/guardrails.py +0 -167
- ads/opctl/operator/lowcode/pii/model/pii.py +0 -145
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +0 -34
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +0 -34
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +0 -35
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +0 -225
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +0 -73
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +0 -26
- ads/opctl/operator/lowcode/pii/model/report.py +0 -487
- ads/opctl/operator/lowcode/pii/operator_config.py +0 -95
- ads/opctl/operator/lowcode/pii/schema.yaml +0 -108
- ads/opctl/operator/lowcode/pii/utils.py +0 -43
- ads/opctl/operator/lowcode/recommender/MLoperator +0 -16
- ads/opctl/operator/lowcode/recommender/README.md +0 -206
- ads/opctl/operator/lowcode/recommender/__init__.py +0 -5
- ads/opctl/operator/lowcode/recommender/__main__.py +0 -82
- ads/opctl/operator/lowcode/recommender/cmd.py +0 -33
- ads/opctl/operator/lowcode/recommender/constant.py +0 -30
- ads/opctl/operator/lowcode/recommender/environment.yaml +0 -11
- ads/opctl/operator/lowcode/recommender/model/base_model.py +0 -212
- ads/opctl/operator/lowcode/recommender/model/factory.py +0 -56
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +0 -25
- ads/opctl/operator/lowcode/recommender/model/svd.py +0 -106
- ads/opctl/operator/lowcode/recommender/operator_config.py +0 -81
- ads/opctl/operator/lowcode/recommender/schema.yaml +0 -265
- ads/opctl/operator/lowcode/recommender/utils.py +0 -13
- ads/opctl/operator/runtime/__init__.py +0 -5
- ads/opctl/operator/runtime/const.py +0 -17
- ads/opctl/operator/runtime/container_runtime_schema.yaml +0 -50
- ads/opctl/operator/runtime/marketplace_runtime.py +0 -50
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +0 -21
- ads/opctl/operator/runtime/python_runtime_schema.yaml +0 -21
- ads/opctl/operator/runtime/runtime.py +0 -115
- ads/opctl/schema.yaml.yml +0 -36
- ads/opctl/script.py +0 -40
- ads/opctl/spark/__init__.py +0 -5
- ads/opctl/spark/cli.py +0 -43
- ads/opctl/spark/cmds.py +0 -147
- ads/opctl/templates/diagnostic_report_template.jinja2 +0 -102
- ads/opctl/utils.py +0 -344
- ads/oracledb/__init__.py +0 -5
- ads/oracledb/oracle_db.py +0 -346
- ads/pipeline/__init__.py +0 -39
- ads/pipeline/ads_pipeline.py +0 -2279
- ads/pipeline/ads_pipeline_run.py +0 -772
- ads/pipeline/ads_pipeline_step.py +0 -605
- ads/pipeline/builders/__init__.py +0 -5
- ads/pipeline/builders/infrastructure/__init__.py +0 -5
- ads/pipeline/builders/infrastructure/custom_script.py +0 -32
- ads/pipeline/cli.py +0 -119
- ads/pipeline/extension.py +0 -291
- ads/pipeline/schema/__init__.py +0 -5
- ads/pipeline/schema/cs_step_schema.json +0 -35
- ads/pipeline/schema/ml_step_schema.json +0 -31
- ads/pipeline/schema/pipeline_schema.json +0 -71
- ads/pipeline/visualizer/__init__.py +0 -5
- ads/pipeline/visualizer/base.py +0 -570
- ads/pipeline/visualizer/graph_renderer.py +0 -272
- ads/pipeline/visualizer/text_renderer.py +0 -84
- ads/secrets/__init__.py +0 -11
- ads/secrets/adb.py +0 -386
- ads/secrets/auth_token.py +0 -86
- ads/secrets/big_data_service.py +0 -365
- ads/secrets/mysqldb.py +0 -149
- ads/secrets/oracledb.py +0 -160
- ads/secrets/secrets.py +0 -407
- ads/telemetry/__init__.py +0 -7
- ads/telemetry/base.py +0 -69
- ads/telemetry/client.py +0 -125
- ads/telemetry/telemetry.py +0 -257
- ads/templates/dataflow_pyspark.jinja2 +0 -13
- ads/templates/dataflow_sparksql.jinja2 +0 -22
- ads/templates/func.jinja2 +0 -20
- ads/templates/schemas/openapi.json +0 -1740
- ads/templates/score-pkl.jinja2 +0 -173
- ads/templates/score.jinja2 +0 -322
- ads/templates/score_embedding_onnx.jinja2 +0 -202
- ads/templates/score_generic.jinja2 +0 -165
- ads/templates/score_huggingface_pipeline.jinja2 +0 -217
- ads/templates/score_lightgbm.jinja2 +0 -185
- ads/templates/score_onnx.jinja2 +0 -407
- ads/templates/score_onnx_new.jinja2 +0 -473
- ads/templates/score_oracle_automl.jinja2 +0 -185
- ads/templates/score_pyspark.jinja2 +0 -154
- ads/templates/score_pytorch.jinja2 +0 -219
- ads/templates/score_scikit-learn.jinja2 +0 -184
- ads/templates/score_tensorflow.jinja2 +0 -184
- ads/templates/score_xgboost.jinja2 +0 -178
- ads/text_dataset/__init__.py +0 -5
- ads/text_dataset/backends.py +0 -211
- ads/text_dataset/dataset.py +0 -445
- ads/text_dataset/extractor.py +0 -207
- ads/text_dataset/options.py +0 -53
- ads/text_dataset/udfs.py +0 -22
- ads/text_dataset/utils.py +0 -49
- ads/type_discovery/__init__.py +0 -9
- ads/type_discovery/abstract_detector.py +0 -21
- ads/type_discovery/constant_detector.py +0 -41
- ads/type_discovery/continuous_detector.py +0 -54
- ads/type_discovery/credit_card_detector.py +0 -99
- ads/type_discovery/datetime_detector.py +0 -92
- ads/type_discovery/discrete_detector.py +0 -118
- ads/type_discovery/document_detector.py +0 -146
- ads/type_discovery/ip_detector.py +0 -68
- ads/type_discovery/latlon_detector.py +0 -90
- ads/type_discovery/phone_number_detector.py +0 -63
- ads/type_discovery/type_discovery_driver.py +0 -87
- ads/type_discovery/typed_feature.py +0 -594
- ads/type_discovery/unknown_detector.py +0 -41
- ads/type_discovery/zipcode_detector.py +0 -48
- ads/vault/__init__.py +0 -7
- ads/vault/vault.py +0 -237
- oracle_ads-2.13.7.dist-info/RECORD +0 -858
- {oracle_ads-2.13.7.dist-info → oracle_ads-2.13.9rc0.dist-info}/licenses/LICENSE.txt +0 -0
ads/dataset/factory.py
DELETED
@@ -1,987 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8; -*-
|
3
|
-
|
4
|
-
# Copyright (c) 2020, 2024 Oracle and/or its affiliates.
|
5
|
-
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
-
|
7
|
-
from __future__ import print_function, absolute_import
|
8
|
-
|
9
|
-
import os
|
10
|
-
import re
|
11
|
-
import warnings
|
12
|
-
import oci
|
13
|
-
import datetime
|
14
|
-
import pandas as pd
|
15
|
-
from fsspec.utils import infer_storage_options
|
16
|
-
import inspect
|
17
|
-
import fsspec
|
18
|
-
|
19
|
-
from ads.common import utils
|
20
|
-
from ads.common.utils import is_same_class
|
21
|
-
from ads.dataset import logger
|
22
|
-
from ads.dataset.classification_dataset import (
|
23
|
-
BinaryClassificationDataset,
|
24
|
-
MultiClassClassificationDataset,
|
25
|
-
BinaryTextClassificationDataset,
|
26
|
-
MultiClassTextClassificationDataset,
|
27
|
-
)
|
28
|
-
from ads.dataset.dataset import ADSDataset
|
29
|
-
from ads.dataset.forecasting_dataset import ForecastingDataset
|
30
|
-
from ads.dataset.helper import (
|
31
|
-
get_feature_type,
|
32
|
-
is_text_data,
|
33
|
-
generate_sample,
|
34
|
-
DatasetDefaults,
|
35
|
-
ElaboratedPath,
|
36
|
-
DatasetLoadException,
|
37
|
-
)
|
38
|
-
from ads.dataset.regression_dataset import RegressionDataset
|
39
|
-
from ads.type_discovery.type_discovery_driver import TypeDiscoveryDriver
|
40
|
-
from ads.type_discovery.typed_feature import (
|
41
|
-
ContinuousTypedFeature,
|
42
|
-
DateTimeTypedFeature,
|
43
|
-
CategoricalTypedFeature,
|
44
|
-
OrdinalTypedFeature,
|
45
|
-
GISTypedFeature,
|
46
|
-
DocumentTypedFeature,
|
47
|
-
)
|
48
|
-
from ads.type_discovery.typed_feature import TypedFeature
|
49
|
-
from typing import Callable, Tuple
|
50
|
-
from ocifs import OCIFileSystem
|
51
|
-
from ads.common.decorator.runtime_dependency import (
|
52
|
-
runtime_dependency,
|
53
|
-
OptionalDependency,
|
54
|
-
)
|
55
|
-
from ads.common.decorator.deprecate import deprecated
|
56
|
-
|
57
|
-
default_snapshots_dir = None
|
58
|
-
default_storage_options = None
|
59
|
-
mindate = datetime.date(datetime.MINYEAR, 1, 1)
|
60
|
-
|
61
|
-
|
62
|
-
warnings.warn(
|
63
|
-
(
|
64
|
-
"The `ads.dataset.factory` is deprecated in `oracle-ads 2.8.8` and will be removed in `oracle-ads 3.0`."
|
65
|
-
"Use Pandas to read from local files or object storage directly. "
|
66
|
-
"Check https://accelerated-data-science.readthedocs.io/en/latest/user_guide/loading_data/connect.html."
|
67
|
-
),
|
68
|
-
DeprecationWarning,
|
69
|
-
stacklevel=2,
|
70
|
-
)
|
71
|
-
|
72
|
-
|
73
|
-
class DatasetFactory:
|
74
|
-
@staticmethod
|
75
|
-
@deprecated(
|
76
|
-
"2.6.6",
|
77
|
-
details="Deprecated in favor of using Pandas. Pandas supports reading from object storage directly. Check https://accelerated-data-science.readthedocs.io/en/latest/user_guide/loading_data/connect.html",
|
78
|
-
)
|
79
|
-
def open(
|
80
|
-
source,
|
81
|
-
target=None,
|
82
|
-
format="infer",
|
83
|
-
reader_fn: Callable = None,
|
84
|
-
name: str = None,
|
85
|
-
description="",
|
86
|
-
npartitions: int = None,
|
87
|
-
type_discovery=True,
|
88
|
-
html_table_index=None,
|
89
|
-
column_names="infer",
|
90
|
-
sample_max_rows=10000,
|
91
|
-
positive_class=None,
|
92
|
-
transformer_pipeline=None,
|
93
|
-
types={},
|
94
|
-
**kwargs,
|
95
|
-
):
|
96
|
-
"""
|
97
|
-
Returns an object of ADSDataset or ADSDatasetWithTarget read from the given path
|
98
|
-
|
99
|
-
.. deprecated:: 2.6.6
|
100
|
-
"Deprecated in favor of using Pandas. Pandas supports reading from object storage directly.
|
101
|
-
Check https://accelerated-data-science.readthedocs.io/en/latest/user_guide/loading_data/connect.html",
|
102
|
-
|
103
|
-
Parameters
|
104
|
-
----------
|
105
|
-
source: Union[str, pandas.DataFrame, h2o.DataFrame, pyspark.sql.dataframe.DataFrame]
|
106
|
-
If str, URI for the dataset. The dataset could be read from local or network file system, hdfs, s3, gcs and optionally pyspark in pyspark
|
107
|
-
conda env
|
108
|
-
target: str, optional
|
109
|
-
Name of the target in dataset.
|
110
|
-
If set an ADSDatasetWithTarget object is returned, otherwise an ADSDataset object is returned which can be
|
111
|
-
used to understand the dataset through visualizations
|
112
|
-
format: str, default: infer
|
113
|
-
Format of the dataset.
|
114
|
-
Supported formats: CSV, TSV, Parquet, libsvm, JSON, XLS/XLSX (Excel), HDF5, SQL, XML,
|
115
|
-
Apache server log files (clf, log), ARFF.
|
116
|
-
By default, the format would be inferred from the ending of the dataset file path.
|
117
|
-
reader_fn: Callable, default: None
|
118
|
-
The user may pass in their own custom reader function.
|
119
|
-
It must accept `(path, **kwarg)` and return a pandas DataFrame
|
120
|
-
name: str, optional default: ""
|
121
|
-
description: str, optional default: ""
|
122
|
-
Text describing the dataset
|
123
|
-
npartitions: int, deprecated
|
124
|
-
Number of partitions to split the data
|
125
|
-
By default this is set to the max number of cores supported by the backend compute accelerator
|
126
|
-
type_discovery: bool, default: True
|
127
|
-
If false, the data types of the dataframe are used as such.
|
128
|
-
By default, the dataframe columns are associated with the best suited data types. Associating the features
|
129
|
-
with the disovered datatypes would impact visualizations and model prediction.
|
130
|
-
html_table_index: int, optional
|
131
|
-
The index of the dataframe table in html content. This is used when the format of dataset is html
|
132
|
-
column_names: 'infer', list of str or None, default: 'infer'
|
133
|
-
Supported only for CSV and TSV.
|
134
|
-
List of column names to use.
|
135
|
-
By default, column names are inferred from the first line of the file.
|
136
|
-
If set to None, column names would be auto-generated instead of inferring from file.
|
137
|
-
If the file already contains a column header, specify header=0 to ignore the existing column names.
|
138
|
-
sample_max_rows: int, default: 10000, use -1 auto calculate sample size, use 0 (zero) for no sampling
|
139
|
-
Sample size of the dataframe to use for visualization and optimization.
|
140
|
-
positive_class: Any, optional
|
141
|
-
Label in target for binary classification problems which should be identified as positive for modeling.
|
142
|
-
By default, the first unique value is considered as the positive label.
|
143
|
-
types: dict, optional
|
144
|
-
Dictionary of <feature_name> : <data_type> to override the data type of features.
|
145
|
-
transformer_pipeline: datasets.pipeline.TransformerPipeline, optional
|
146
|
-
A pipeline of transformations done outside the sdk and need to be applied at the time of scoring
|
147
|
-
storage_options: dict, default: varies by source type
|
148
|
-
Parameters passed on to the backend filesystem class.
|
149
|
-
sep: str
|
150
|
-
Delimiting character for parsing the input file.
|
151
|
-
kwargs: additional keyword arguments that would be passed to underlying dataframe read API
|
152
|
-
based on the format of the dataset
|
153
|
-
|
154
|
-
Returns
|
155
|
-
-------
|
156
|
-
dataset : An instance of ADSDataset
|
157
|
-
(or)
|
158
|
-
dataset_with_target : An instance of ADSDatasetWithTarget
|
159
|
-
|
160
|
-
Examples
|
161
|
-
--------
|
162
|
-
>>> ds = DatasetFactory.open("/path/to/data.data", format='csv', delimiter=" ",
|
163
|
-
... na_values="n/a", skipinitialspace=True)
|
164
|
-
|
165
|
-
>>> ds = DatasetFactory.open("/path/to/data.csv", target="col_1", prefix="col_",
|
166
|
-
... skiprows=1, encoding="ISO-8859-1")
|
167
|
-
|
168
|
-
>>> ds = DatasetFactory.open("oci://bucket@namespace/path/to/data.tsv",
|
169
|
-
... column_names=["col1", "col2", "col3"], header=0)
|
170
|
-
|
171
|
-
>>> ds = DatasetFactory.open("oci://bucket@namespace/path/to/data.csv",
|
172
|
-
... storage_options={"config": "~/.oci/config",
|
173
|
-
... "profile": "USER_2"}, delimiter = ';')
|
174
|
-
|
175
|
-
>>> ds = DatasetFactory.open("/path/to/data.parquet", engine='pyarrow',
|
176
|
-
... types={"col1": "ordinal",
|
177
|
-
... "col2": "categorical",
|
178
|
-
... "col3" : "continuous",
|
179
|
-
... "col4" : "float64"})
|
180
|
-
|
181
|
-
>>> ds = DatasetFactory.open(df, target="class", sample_max_rows=5000,
|
182
|
-
... positive_class="yes")
|
183
|
-
|
184
|
-
>>> ds = DatasetFactory.open("s3://path/to/data.json.gz", format="json",
|
185
|
-
... compression="gzip", orient="records")
|
186
|
-
"""
|
187
|
-
if npartitions:
|
188
|
-
warnings.warn(
|
189
|
-
"Variable `npartitions` is deprecated and will not be used",
|
190
|
-
DeprecationWarning,
|
191
|
-
stacklevel=2,
|
192
|
-
)
|
193
|
-
if (
|
194
|
-
"storage_options" not in kwargs
|
195
|
-
and type(source) is str
|
196
|
-
and len(source) > 6
|
197
|
-
and source[:6] == "oci://"
|
198
|
-
):
|
199
|
-
kwargs["storage_options"] = {"config": {}}
|
200
|
-
|
201
|
-
if isinstance(source, str) or isinstance(source, list):
|
202
|
-
progress = utils.get_progress_bar(4)
|
203
|
-
progress.update("Opening data")
|
204
|
-
path = ElaboratedPath(source, format=format, **kwargs)
|
205
|
-
reader_fn = (
|
206
|
-
get_format_reader(path=path, **kwargs)
|
207
|
-
if reader_fn is None
|
208
|
-
else reader_fn
|
209
|
-
)
|
210
|
-
df = load_dataset(path=path, reader_fn=reader_fn, **kwargs)
|
211
|
-
name = path.name
|
212
|
-
elif isinstance(source, pd.DataFrame):
|
213
|
-
progress = utils.get_progress_bar(4)
|
214
|
-
progress.update("Partitioning data")
|
215
|
-
df = source
|
216
|
-
name = "User Provided DataFrame" if name is None else name
|
217
|
-
else:
|
218
|
-
raise TypeError(
|
219
|
-
f"The Source type: {type(source)} is not supported for DatasetFactory."
|
220
|
-
)
|
221
|
-
shape = df.shape
|
222
|
-
return DatasetFactory._build_dataset(
|
223
|
-
df=df,
|
224
|
-
shape=shape,
|
225
|
-
target=target,
|
226
|
-
sample_max_rows=sample_max_rows,
|
227
|
-
type_discovery=type_discovery,
|
228
|
-
types=types,
|
229
|
-
positive_class=positive_class,
|
230
|
-
name=name,
|
231
|
-
transformer_pipeline=transformer_pipeline,
|
232
|
-
description=description,
|
233
|
-
progress=progress,
|
234
|
-
**utils.inject_and_copy_kwargs(
|
235
|
-
kwargs,
|
236
|
-
**{"html_table_index": html_table_index, "column_names": column_names},
|
237
|
-
),
|
238
|
-
)
|
239
|
-
|
240
|
-
@staticmethod
|
241
|
-
def open_to_pandas(
|
242
|
-
source: str, format: str = None, reader_fn: Callable = None, **kwargs
|
243
|
-
) -> pd.DataFrame:
|
244
|
-
path = ElaboratedPath(source, format=format, **kwargs)
|
245
|
-
reader_fn = (
|
246
|
-
get_format_reader(path=path, **kwargs) if reader_fn is None else reader_fn
|
247
|
-
)
|
248
|
-
df = load_dataset(path=path, reader_fn=reader_fn, **kwargs)
|
249
|
-
return df
|
250
|
-
|
251
|
-
@staticmethod
|
252
|
-
def from_dataframe(df, target: str = None, **kwargs):
|
253
|
-
"""
|
254
|
-
Returns an object of ADSDatasetWithTarget or ADSDataset given a pandas.DataFrame
|
255
|
-
|
256
|
-
Parameters
|
257
|
-
----------
|
258
|
-
df: pandas.DataFrame
|
259
|
-
target: str
|
260
|
-
kwargs: dict
|
261
|
-
See DatasetFactory.open() for supported kwargs
|
262
|
-
|
263
|
-
Returns
|
264
|
-
-------
|
265
|
-
dataset: an object of ADSDataset target is not specified, otherwise an object of ADSDatasetWithTarget tagged
|
266
|
-
according to the type of target
|
267
|
-
|
268
|
-
Examples
|
269
|
-
--------
|
270
|
-
>>> df = pd.DataFrame(data)
|
271
|
-
>>> ds = from_dataframe(df)
|
272
|
-
"""
|
273
|
-
return DatasetFactory.open(df, target=target, **kwargs)
|
274
|
-
|
275
|
-
@staticmethod
|
276
|
-
@runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
|
277
|
-
@runtime_dependency(
|
278
|
-
module="ipywidgets",
|
279
|
-
object="HTML",
|
280
|
-
is_for_notebook_only=True,
|
281
|
-
install_from=OptionalDependency.NOTEBOOK,
|
282
|
-
)
|
283
|
-
def list_snapshots(snapshot_dir=None, name="", storage_options=None, **kwargs):
|
284
|
-
"""
|
285
|
-
Displays the URIs for dataset snapshots under the given directory path.
|
286
|
-
|
287
|
-
Parameters
|
288
|
-
----------
|
289
|
-
snapshot_dir: str
|
290
|
-
Return all dataset snapshots created using ADSDataset.snapshot() within this directory.
|
291
|
-
The path can contain protocols such as oci, s3.
|
292
|
-
name: str, optional
|
293
|
-
The list of snapshots in the directory gets filtered by the name. Accepts glob expressions.
|
294
|
-
default = `"ads_"`
|
295
|
-
storage_options: dict
|
296
|
-
Parameters passed on to the backend filesystem class.
|
297
|
-
|
298
|
-
Example
|
299
|
-
--------
|
300
|
-
>>> DatasetFactory.list_snapshots(snapshot_dir="oci://my_bucket/snapshots_dir",
|
301
|
-
... name="ads_iris_")
|
302
|
-
|
303
|
-
Returns a list of all snapshots (recursively) saved to obj storage bucket `"my_bucket"` with prefix
|
304
|
-
`"/snapshots_dir/ads_iris_**"` sorted by time created.
|
305
|
-
"""
|
306
|
-
if snapshot_dir is None:
|
307
|
-
snapshot_dir = default_snapshots_dir
|
308
|
-
if snapshot_dir is None:
|
309
|
-
raise ValueError(
|
310
|
-
"Specify snapshot_dir or use DatasetFactory.set_default_storage() to set default \
|
311
|
-
storage options"
|
312
|
-
)
|
313
|
-
else:
|
314
|
-
logger.info("Using default snapshots dir %s" % snapshot_dir)
|
315
|
-
if storage_options is None:
|
316
|
-
if default_storage_options is not None:
|
317
|
-
storage_options = default_storage_options
|
318
|
-
logger.info("Using default storage options")
|
319
|
-
else:
|
320
|
-
storage_options = dict()
|
321
|
-
assert isinstance(storage_options, dict), (
|
322
|
-
"The storage options parameter must be a dictionary. You can set "
|
323
|
-
"this gloabally by calling DatasetFactory.set_default_storage("
|
324
|
-
"storage_options={'config': 'location'}). "
|
325
|
-
)
|
326
|
-
url_options = infer_storage_options(snapshot_dir)
|
327
|
-
protocol = url_options.pop("protocol", None)
|
328
|
-
|
329
|
-
fs = OCIFileSystem(config=storage_options.get("config", None))
|
330
|
-
kwargs.update({"refresh": True})
|
331
|
-
obj_list = [
|
332
|
-
(k, v.get("timeCreated", mindate).strftime("%Y-%m-%d %H:%M:%S"))
|
333
|
-
for k, v in fs.glob(
|
334
|
-
os.path.join(snapshot_dir, name + "**"), detail=True, **kwargs
|
335
|
-
).items()
|
336
|
-
if v["type"] == "file"
|
337
|
-
]
|
338
|
-
|
339
|
-
files = []
|
340
|
-
for file, file_time in obj_list:
|
341
|
-
if protocol in ["oci"]:
|
342
|
-
r1 = re.compile(r"/part\.[0-9]{1,6}\.parquet$")
|
343
|
-
parquet_part = r1.search(file)
|
344
|
-
if parquet_part is not None:
|
345
|
-
parquet_filename = file[: parquet_part.start()]
|
346
|
-
elif file.endswith("/_common_metadata"):
|
347
|
-
parquet_filename = file[: -len("/_common_metadata")]
|
348
|
-
elif file.endswith("/_metadata"):
|
349
|
-
parquet_filename = file[: -len("/_metadata")]
|
350
|
-
else:
|
351
|
-
parquet_filename = file
|
352
|
-
else:
|
353
|
-
parquet_filename = file
|
354
|
-
parent_path = "%s://" % protocol
|
355
|
-
files.append((parent_path + parquet_filename, file_time))
|
356
|
-
files.sort(key=lambda x: x[1] or mindate, reverse=True)
|
357
|
-
list_df = pd.DataFrame(files, columns=["Name", "Created Time"])
|
358
|
-
list_df = list_df.drop_duplicates(subset=["Name"]).reset_index()
|
359
|
-
if len(list_df) == 0:
|
360
|
-
print(f"No snapshots found at: {os.path.join(snapshot_dir, name)}")
|
361
|
-
|
362
|
-
# display in HTML format if sdk is run in notebook mode
|
363
|
-
if utils.is_notebook():
|
364
|
-
from IPython.core.display import display
|
365
|
-
|
366
|
-
display(
|
367
|
-
HTML(
|
368
|
-
list_df.style.set_table_attributes("class=table")
|
369
|
-
.hide()
|
370
|
-
.to_html()
|
371
|
-
)
|
372
|
-
)
|
373
|
-
return list_df
|
374
|
-
|
375
|
-
@staticmethod
|
376
|
-
def download(remote_path, local_path, storage=None, overwrite=False):
|
377
|
-
"""
|
378
|
-
Download a remote file or directory to local storage.
|
379
|
-
|
380
|
-
Parameters
|
381
|
-
---------
|
382
|
-
remote_path: str
|
383
|
-
Supports protocols like oci, s3, also supports glob expressions
|
384
|
-
local_path: str
|
385
|
-
Supports glob expressions
|
386
|
-
storage: dict
|
387
|
-
Parameters passed on to the backend remote filesystem class.
|
388
|
-
overwrite: bool, default False
|
389
|
-
If True, the method will overwrite any existing files in the local_path
|
390
|
-
|
391
|
-
Examples
|
392
|
-
---------
|
393
|
-
>>> DatasetFactory.download("oci://Bucket/prefix/to/data/*.csv",
|
394
|
-
... "/home/datascience/data/")
|
395
|
-
"""
|
396
|
-
if storage is None:
|
397
|
-
if default_storage_options is not None:
|
398
|
-
storage = default_storage_options
|
399
|
-
logger.info("Using default storage options")
|
400
|
-
else:
|
401
|
-
storage = dict()
|
402
|
-
|
403
|
-
remote_files = fsspec.open_files(
|
404
|
-
remote_path, mode="rb", name_function=lambda i: "", **storage
|
405
|
-
)
|
406
|
-
if len(remote_files) < 1:
|
407
|
-
raise FileNotFoundError(remote_path)
|
408
|
-
display_error, error_msg = DatasetFactory._download_files(
|
409
|
-
remote_files=remote_files, local_path=local_path, overwrite=overwrite
|
410
|
-
)
|
411
|
-
if display_error:
|
412
|
-
logger.error(error_msg)
|
413
|
-
else:
|
414
|
-
logger.info(f"Download {remote_path} to {local_path}.")
|
415
|
-
|
416
|
-
@staticmethod
|
417
|
-
def _download_files(remote_files, local_path, overwrite=False):
|
418
|
-
display_error, error_msg = False, ""
|
419
|
-
for remote_file in remote_files:
|
420
|
-
bucket_idx = remote_file.path.find("/")
|
421
|
-
suffix = remote_file.path[bucket_idx + 1 :]
|
422
|
-
try:
|
423
|
-
with remote_file as f1:
|
424
|
-
local_filepath = (
|
425
|
-
os.path.join(local_path, suffix) if suffix else local_path
|
426
|
-
)
|
427
|
-
if os.path.exists(local_filepath) and not overwrite:
|
428
|
-
raise FileExistsError(
|
429
|
-
f"Trying to overwrite files in {local_filepath}. If you'd like to "
|
430
|
-
f"overwrite these files, set force_overwrite to True."
|
431
|
-
)
|
432
|
-
os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
|
433
|
-
with open(local_filepath, "wb") as f2:
|
434
|
-
f2.write(f1.read())
|
435
|
-
except oci.exceptions.ServiceError as e:
|
436
|
-
raise FileNotFoundError(f"Unable to open file: {remote_file.path}")
|
437
|
-
return display_error, error_msg
|
438
|
-
|
439
|
-
@staticmethod
|
440
|
-
def upload(local_file_or_dir, remote_file_or_dir, storage_options=None):
|
441
|
-
"""
|
442
|
-
Upload local file or directory to remote storage
|
443
|
-
|
444
|
-
Parameters
|
445
|
-
---------
|
446
|
-
local_file_or_dir: str
|
447
|
-
Supports glob expressions
|
448
|
-
remote_file_or_dir: str
|
449
|
-
Supports protocols like oci, s3, also supports glob expressions
|
450
|
-
storage_options: dict
|
451
|
-
Parameters passed on to the backend remote filesystem class.
|
452
|
-
"""
|
453
|
-
if not os.path.exists(local_file_or_dir):
|
454
|
-
raise ValueError("File/Directory does not exist: %s" % local_file_or_dir)
|
455
|
-
if storage_options is None and default_storage_options is not None:
|
456
|
-
storage_options = default_storage_options
|
457
|
-
logger.info("Using default storage options")
|
458
|
-
|
459
|
-
if os.path.isdir(local_file_or_dir):
|
460
|
-
for subdir, dirs, files in os.walk(local_file_or_dir):
|
461
|
-
for file in files:
|
462
|
-
if os.path.abspath(subdir) == os.path.abspath(local_file_or_dir):
|
463
|
-
path = file
|
464
|
-
else:
|
465
|
-
path = os.path.join(
|
466
|
-
os.path.abspath(subdir).split("/", 2)[2], file
|
467
|
-
)
|
468
|
-
DatasetFactory._upload_file(
|
469
|
-
os.path.join(subdir, file),
|
470
|
-
os.path.join(remote_file_or_dir, path),
|
471
|
-
storage_options=storage_options,
|
472
|
-
)
|
473
|
-
else:
|
474
|
-
DatasetFactory._upload_file(
|
475
|
-
local_file_or_dir, remote_file_or_dir, storage_options=storage_options
|
476
|
-
)
|
477
|
-
|
478
|
-
@staticmethod
|
479
|
-
def set_default_storage(snapshots_dir=None, storage_options=None):
|
480
|
-
"""
|
481
|
-
Set default storage directory and options.
|
482
|
-
|
483
|
-
Both snapshots_dir and storage_options can be overridden at the API scope.
|
484
|
-
|
485
|
-
Parameters
|
486
|
-
----------
|
487
|
-
snapshots_dir: str
|
488
|
-
Path for the snapshots directory. Can contain protocols such as oci, s3
|
489
|
-
storage_options: dict, optional
|
490
|
-
Parameters passed on to the backend filesystem class.
|
491
|
-
"""
|
492
|
-
global default_snapshots_dir
|
493
|
-
default_snapshots_dir = snapshots_dir
|
494
|
-
global default_storage_options
|
495
|
-
if storage_options is not None:
|
496
|
-
assert isinstance(storage_options, dict), (
|
497
|
-
f"The storage options parameter must be a dictionary. Instead "
|
498
|
-
f"we got the type: {type(storage_options)} "
|
499
|
-
)
|
500
|
-
default_storage_options = storage_options
|
501
|
-
|
502
|
-
@classmethod
|
503
|
-
def _upload_file(cls, local_file, remote_file, storage_options=None):
|
504
|
-
kwargs = {}
|
505
|
-
if storage_options is not None:
|
506
|
-
kwargs = {"storage_options": storage_options}
|
507
|
-
remote_file_handler = fsspec.open_files(
|
508
|
-
remote_file + "*", mode="wb", name_function=lambda i: "", **kwargs
|
509
|
-
)[0]
|
510
|
-
with remote_file_handler as f1:
|
511
|
-
with open(local_file, "rb") as f2:
|
512
|
-
for line in f2:
|
513
|
-
f1.write(line)
|
514
|
-
print("Uploaded %s to %s" % (local_file, remote_file))
|
515
|
-
|
516
|
-
@classmethod
|
517
|
-
def _build_dataset(
|
518
|
-
cls,
|
519
|
-
df: pd.DataFrame,
|
520
|
-
shape: Tuple[int, int],
|
521
|
-
target: str = None,
|
522
|
-
progress=None,
|
523
|
-
**kwargs,
|
524
|
-
):
|
525
|
-
n = shape[0]
|
526
|
-
if progress:
|
527
|
-
progress.update("Generating data sample")
|
528
|
-
|
529
|
-
sampled_df = generate_sample(
|
530
|
-
df,
|
531
|
-
n,
|
532
|
-
DatasetDefaults.sampling_confidence_level,
|
533
|
-
DatasetDefaults.sampling_confidence_interval,
|
534
|
-
**kwargs,
|
535
|
-
)
|
536
|
-
|
537
|
-
if target is None:
|
538
|
-
if progress:
|
539
|
-
progress.update("Building the dataset with no target.")
|
540
|
-
result = ADSDataset(df=df, sampled_df=sampled_df, shape=shape, **kwargs)
|
541
|
-
if progress:
|
542
|
-
progress.update("Done")
|
543
|
-
logger.info(
|
544
|
-
"Use `set_target()` to type the dataset for a particular learning task."
|
545
|
-
)
|
546
|
-
return result
|
547
|
-
|
548
|
-
if progress:
|
549
|
-
progress.update("Building dataset")
|
550
|
-
|
551
|
-
discover_target_type = kwargs["type_discovery"]
|
552
|
-
if target in kwargs["types"]:
|
553
|
-
sampled_df[target] = sampled_df[target].astype(kwargs["types"][target])
|
554
|
-
discover_target_type = False
|
555
|
-
|
556
|
-
# if type discovery is turned off, infer type from pandas dtype
|
557
|
-
target_type = DatasetFactory.infer_target_type(
|
558
|
-
target, sampled_df[target], discover_target_type
|
559
|
-
)
|
560
|
-
|
561
|
-
result = DatasetFactory._get_dataset(
|
562
|
-
df=df,
|
563
|
-
sampled_df=sampled_df,
|
564
|
-
target=target,
|
565
|
-
target_type=target_type,
|
566
|
-
shape=shape,
|
567
|
-
**kwargs,
|
568
|
-
)
|
569
|
-
if progress:
|
570
|
-
progress.update("Done")
|
571
|
-
logger.info(
|
572
|
-
"Use `suggest_recommendations()` to view and apply recommendations for dataset optimization."
|
573
|
-
)
|
574
|
-
return result
|
575
|
-
|
576
|
-
@classmethod
|
577
|
-
def infer_target_type(cls, target, target_series, discover_target_type=True):
|
578
|
-
# if type discovery is turned off, infer type from pandas dtype
|
579
|
-
if discover_target_type:
|
580
|
-
target_type = TypeDiscoveryDriver().discover(
|
581
|
-
target, target_series, is_target=True
|
582
|
-
)
|
583
|
-
else:
|
584
|
-
target_type = get_feature_type(target, target_series)
|
585
|
-
return target_type
|
586
|
-
|
587
|
-
@classmethod
|
588
|
-
def _get_dataset(
|
589
|
-
cls,
|
590
|
-
df: pd.DataFrame,
|
591
|
-
sampled_df: pd.DataFrame,
|
592
|
-
target: str,
|
593
|
-
target_type: TypedFeature,
|
594
|
-
shape: Tuple[int, int],
|
595
|
-
positive_class=None,
|
596
|
-
**init_kwargs,
|
597
|
-
):
|
598
|
-
if len(df[target].dropna()) == 0:
|
599
|
-
logger.warning(
|
600
|
-
"It is not recommended to use an empty column as the target variable."
|
601
|
-
)
|
602
|
-
raise ValueError(
|
603
|
-
f"We do not support using empty columns as the chosen target"
|
604
|
-
)
|
605
|
-
if is_same_class(target_type, ContinuousTypedFeature):
|
606
|
-
return RegressionDataset(
|
607
|
-
df=df,
|
608
|
-
sampled_df=sampled_df,
|
609
|
-
target=target,
|
610
|
-
target_type=target_type,
|
611
|
-
shape=shape,
|
612
|
-
**init_kwargs,
|
613
|
-
)
|
614
|
-
elif is_same_class(
|
615
|
-
target_type, DateTimeTypedFeature
|
616
|
-
) or df.index.dtype.name.startswith("datetime"):
|
617
|
-
return ForecastingDataset(
|
618
|
-
df=df,
|
619
|
-
sampled_df=sampled_df,
|
620
|
-
target=target,
|
621
|
-
target_type=target_type,
|
622
|
-
shape=shape,
|
623
|
-
**init_kwargs,
|
624
|
-
)
|
625
|
-
|
626
|
-
# Adding ordinal typed feature, but ultimately we should rethink how we want to model this type
|
627
|
-
elif is_same_class(target_type, CategoricalTypedFeature) or is_same_class(
|
628
|
-
target_type, OrdinalTypedFeature
|
629
|
-
):
|
630
|
-
if target_type.meta_data["internal"]["unique"] == 2:
|
631
|
-
if is_text_data(sampled_df, target):
|
632
|
-
return BinaryTextClassificationDataset(
|
633
|
-
df=df,
|
634
|
-
sampled_df=sampled_df,
|
635
|
-
target=target,
|
636
|
-
shape=shape,
|
637
|
-
target_type=target_type,
|
638
|
-
positive_class=positive_class,
|
639
|
-
**init_kwargs,
|
640
|
-
)
|
641
|
-
|
642
|
-
return BinaryClassificationDataset(
|
643
|
-
df=df,
|
644
|
-
sampled_df=sampled_df,
|
645
|
-
target=target,
|
646
|
-
shape=shape,
|
647
|
-
target_type=target_type,
|
648
|
-
positive_class=positive_class,
|
649
|
-
**init_kwargs,
|
650
|
-
)
|
651
|
-
else:
|
652
|
-
if is_text_data(sampled_df, target):
|
653
|
-
return MultiClassTextClassificationDataset(
|
654
|
-
df=df,
|
655
|
-
sampled_df=sampled_df,
|
656
|
-
target=target,
|
657
|
-
target_type=target_type,
|
658
|
-
shape=shape,
|
659
|
-
**init_kwargs,
|
660
|
-
)
|
661
|
-
return MultiClassClassificationDataset(
|
662
|
-
df=df,
|
663
|
-
sampled_df=sampled_df,
|
664
|
-
target=target,
|
665
|
-
target_type=target_type,
|
666
|
-
shape=shape,
|
667
|
-
**init_kwargs,
|
668
|
-
)
|
669
|
-
elif (
|
670
|
-
is_same_class(target, DocumentTypedFeature)
|
671
|
-
or "text" in target_type["type"]
|
672
|
-
or "text" in target
|
673
|
-
):
|
674
|
-
raise ValueError(
|
675
|
-
f"The column {target} cannot be used as the target column."
|
676
|
-
)
|
677
|
-
elif (
|
678
|
-
is_same_class(target_type, GISTypedFeature)
|
679
|
-
or "coord" in target_type["type"]
|
680
|
-
or "coord" in target
|
681
|
-
):
|
682
|
-
raise ValueError(
|
683
|
-
f"The column {target} cannot be used as the target column."
|
684
|
-
)
|
685
|
-
# This is to catch constant columns that are boolean. Added as a fix for pd.isnull(), and datasets with a
|
686
|
-
# binary target, but only data on one instance
|
687
|
-
elif target_type["low_level_type"] == "bool":
|
688
|
-
return BinaryClassificationDataset(
|
689
|
-
df=df,
|
690
|
-
sampled_df=sampled_df,
|
691
|
-
target=target,
|
692
|
-
shape=shape,
|
693
|
-
target_type=target_type,
|
694
|
-
positive_class=positive_class,
|
695
|
-
**init_kwargs,
|
696
|
-
)
|
697
|
-
raise ValueError(
|
698
|
-
f"Unable to identify problem type. Specify the data type of {target} using 'types'. "
|
699
|
-
f"For example, types = {{{target}: 'category'}}"
|
700
|
-
)
|
701
|
-
|
702
|
-
|
703
|
-
class CustomFormatReaders:
|
704
|
-
@staticmethod
|
705
|
-
def read_tsv(path: str, **kwargs) -> pd.DataFrame:
|
706
|
-
return pd.read_csv(
|
707
|
-
path, **utils.inject_and_copy_kwargs(kwargs, **{"sep": "\t"})
|
708
|
-
)
|
709
|
-
|
710
|
-
@staticmethod
|
711
|
-
def read_json(path: str, **kwargs) -> pd.DataFrame:
|
712
|
-
try:
|
713
|
-
return pd.read_json(path, **kwargs)
|
714
|
-
except ValueError as e:
|
715
|
-
return pd.read_json(
|
716
|
-
path, **utils.inject_and_copy_kwargs(kwargs, **{"lines": True})
|
717
|
-
)
|
718
|
-
|
719
|
-
@staticmethod
|
720
|
-
def read_libsvm(path: str, **kwargs) -> pd.DataFrame:
|
721
|
-
from sklearn.datasets import load_svmlight_file
|
722
|
-
from joblib import Memory
|
723
|
-
|
724
|
-
mem = Memory("./mycache")
|
725
|
-
|
726
|
-
@mem.cache
|
727
|
-
def get_data(path):
|
728
|
-
X, y = load_svmlight_file(path)
|
729
|
-
df = pd.DataFrame(X.todense())
|
730
|
-
df["target"] = y
|
731
|
-
return df
|
732
|
-
|
733
|
-
return get_data(path)
|
734
|
-
|
735
|
-
@staticmethod
|
736
|
-
@runtime_dependency(
|
737
|
-
module="pandavro", object="read_avro", install_from=OptionalDependency.DATA
|
738
|
-
)
|
739
|
-
def read_avro(path: str, **kwargs) -> pd.DataFrame:
|
740
|
-
return read_avro(path, **kwargs)
|
741
|
-
|
742
|
-
DEFAULT_SQL_CHUNKSIZE = 12007
|
743
|
-
DEFAULT_SQL_ARRAYSIZE = 50000
|
744
|
-
DEFAULT_SQL_MIL = 128
|
745
|
-
DEFAULT_SQL_CTU = False
|
746
|
-
|
747
|
-
@classmethod
|
748
|
-
def read_sql(cls, path: str, table: str = None, **kwargs) -> pd.DataFrame:
|
749
|
-
"""
|
750
|
-
|
751
|
-
:param path: str
|
752
|
-
This is the connection URL that gets passed to sqlalchemy's create_engine method
|
753
|
-
:param table: str
|
754
|
-
This is either the name of a table to select * from or a sql query to be run
|
755
|
-
:param kwargs:
|
756
|
-
:return: pd.DataFrame
|
757
|
-
"""
|
758
|
-
if table is None:
|
759
|
-
raise ValueError(
|
760
|
-
"In order to read from a database you need to specify the table using the `table` "
|
761
|
-
"argument."
|
762
|
-
)
|
763
|
-
# check if it's oracle dialect
|
764
|
-
if str(path).lower().startswith("oracle"):
|
765
|
-
kwargs = utils.inject_and_copy_kwargs(
|
766
|
-
kwargs,
|
767
|
-
**{
|
768
|
-
"arraysize": cls.DEFAULT_SQL_ARRAYSIZE,
|
769
|
-
"max_identifier_length": cls.DEFAULT_SQL_MIL,
|
770
|
-
"coerce_to_unicode": cls.DEFAULT_SQL_CTU,
|
771
|
-
},
|
772
|
-
)
|
773
|
-
engine = utils.get_sqlalchemy_engine(path, **kwargs)
|
774
|
-
|
775
|
-
table_name = table.strip()
|
776
|
-
with engine.connect() as connection:
|
777
|
-
# if it's a query expression:
|
778
|
-
if table_name.lower().startswith("select"):
|
779
|
-
sql_query = table_name
|
780
|
-
else:
|
781
|
-
sql_query = f"select * from {table_name}"
|
782
|
-
|
783
|
-
chunks = pd.read_sql_query(
|
784
|
-
sql_query,
|
785
|
-
con=connection,
|
786
|
-
**_validate_kwargs(
|
787
|
-
pd.read_sql_query,
|
788
|
-
utils.inject_and_copy_kwargs(
|
789
|
-
kwargs, **{"chunksize": cls.DEFAULT_SQL_CHUNKSIZE}
|
790
|
-
),
|
791
|
-
),
|
792
|
-
)
|
793
|
-
df = pd.DataFrame()
|
794
|
-
from tqdm import tqdm
|
795
|
-
|
796
|
-
with tqdm(chunks, unit=" rows") as t:
|
797
|
-
for chunk in chunks:
|
798
|
-
df = pd.concat([df, chunk])
|
799
|
-
t.update(len(chunk))
|
800
|
-
|
801
|
-
df = df.reset_index(drop=True)
|
802
|
-
if df.shape[0] == 0:
|
803
|
-
logger.warning(
|
804
|
-
"The SQL expression returned zero rows. Therefore, no `ADSdataset` object was created."
|
805
|
-
)
|
806
|
-
raise Exception("The SQL expression returned no rows")
|
807
|
-
return df
|
808
|
-
|
809
|
-
@staticmethod
|
810
|
-
def read_log(path, **kwargs):
|
811
|
-
from ads.dataset.helper import parse_apache_log_str, parse_apache_log_datetime
|
812
|
-
|
813
|
-
df = pd.read_csv(
|
814
|
-
path,
|
815
|
-
# assume_missing=True,
|
816
|
-
sep=r'\s(?=(?:[^"]*"[^"]*")*[^"]*$)(?![^\[]*\])',
|
817
|
-
engine="python",
|
818
|
-
na_values="-",
|
819
|
-
header=None,
|
820
|
-
names=[
|
821
|
-
"host",
|
822
|
-
"identity",
|
823
|
-
"user",
|
824
|
-
"time",
|
825
|
-
"request",
|
826
|
-
"http_code",
|
827
|
-
"response_bytes",
|
828
|
-
"referer",
|
829
|
-
"user_agent",
|
830
|
-
"unknown",
|
831
|
-
],
|
832
|
-
converters={
|
833
|
-
"time": parse_apache_log_datetime,
|
834
|
-
"request": parse_apache_log_str,
|
835
|
-
"status": int,
|
836
|
-
"size": int,
|
837
|
-
"referer": parse_apache_log_str,
|
838
|
-
"user_agent": parse_apache_log_str,
|
839
|
-
},
|
840
|
-
**kwargs,
|
841
|
-
)
|
842
|
-
return df
|
843
|
-
|
844
|
-
@staticmethod
|
845
|
-
def read_html(path, html_table_index: int = None, **kwargs):
|
846
|
-
if html_table_index is None:
|
847
|
-
return pd.concat(df for df in pd.read_html(path, **kwargs))
|
848
|
-
else:
|
849
|
-
return pd.read_html(path, **kwargs)[html_table_index]
|
850
|
-
|
851
|
-
@staticmethod
|
852
|
-
@runtime_dependency(module="scipy", install_from=OptionalDependency.VIZ)
|
853
|
-
def read_arff(path, **kwargs):
|
854
|
-
from scipy.io import arff
|
855
|
-
import requests
|
856
|
-
from io import BytesIO, TextIOWrapper
|
857
|
-
|
858
|
-
data = None
|
859
|
-
if os.path.isfile(path):
|
860
|
-
data, _ = arff.loadarff(path)
|
861
|
-
else:
|
862
|
-
with requests.get(path) as r:
|
863
|
-
if r.status_code == requests.codes.ok:
|
864
|
-
f = TextIOWrapper(BytesIO(r.content))
|
865
|
-
data, _ = arff.loadarff(f)
|
866
|
-
return pd.DataFrame(data)
|
867
|
-
|
868
|
-
@staticmethod
|
869
|
-
def read_xml(path: str, **kwargs) -> pd.DataFrame:
|
870
|
-
"""
|
871
|
-
Load data from xml file.
|
872
|
-
|
873
|
-
Parameters
|
874
|
-
----------
|
875
|
-
path: str
|
876
|
-
Path to XML file
|
877
|
-
storage_options: dict, optional
|
878
|
-
Storage options passed to Pandas to read the file.
|
879
|
-
|
880
|
-
Returns
|
881
|
-
-------
|
882
|
-
dataframe : pandas.DataFrame
|
883
|
-
"""
|
884
|
-
import xml.etree.cElementTree as et
|
885
|
-
|
886
|
-
def get_children(df, node, parent, i):
|
887
|
-
for name in node.attrib.keys():
|
888
|
-
df.at[i, parent + name] = node.attrib[name]
|
889
|
-
for child in list(node):
|
890
|
-
if len(list(child)) > 0:
|
891
|
-
get_children(df, child, parent + child.tag + "/", i)
|
892
|
-
else:
|
893
|
-
df.at[i, parent + child.tag] = child.text
|
894
|
-
|
895
|
-
storage_options = kwargs.get("storage_options", {})
|
896
|
-
|
897
|
-
file_handles = fsspec.open_files(path, mode="rb", **storage_options)
|
898
|
-
ret_df = pd.DataFrame()
|
899
|
-
last_i = 0
|
900
|
-
for file_handle in file_handles:
|
901
|
-
with file_handle:
|
902
|
-
parsed_xml = et.parse(path)
|
903
|
-
for i, node in enumerate(parsed_xml.getroot()):
|
904
|
-
get_children(ret_df, node, node.tag + "/", last_i + i)
|
905
|
-
last_i = i
|
906
|
-
return ret_df
|
907
|
-
|
908
|
-
|
909
|
-
reader_fns = {
|
910
|
-
"csv": pd.read_csv,
|
911
|
-
"tsv": CustomFormatReaders.read_tsv,
|
912
|
-
"json": CustomFormatReaders.read_json,
|
913
|
-
"jsonl": CustomFormatReaders.read_json,
|
914
|
-
"excel": pd.read_excel,
|
915
|
-
"xls": pd.read_excel,
|
916
|
-
"xlsx": pd.read_excel,
|
917
|
-
"parquet": pd.read_parquet,
|
918
|
-
"libsvm": CustomFormatReaders.read_libsvm,
|
919
|
-
"hdf": pd.read_hdf, # Todo: re.match(format, "hdf\d*") or format == "h5"
|
920
|
-
"hdf3": pd.read_hdf,
|
921
|
-
"hdf4": pd.read_hdf,
|
922
|
-
"h5": pd.read_hdf,
|
923
|
-
"avro": CustomFormatReaders.read_avro,
|
924
|
-
"avsc": CustomFormatReaders.read_avro,
|
925
|
-
"sql": CustomFormatReaders.read_sql,
|
926
|
-
"db": CustomFormatReaders.read_sql,
|
927
|
-
"log": CustomFormatReaders.read_log,
|
928
|
-
"clf": CustomFormatReaders.read_log,
|
929
|
-
"html": CustomFormatReaders.read_html,
|
930
|
-
"arff": CustomFormatReaders.read_arff,
|
931
|
-
"xml": CustomFormatReaders.read_xml,
|
932
|
-
}
|
933
|
-
|
934
|
-
|
935
|
-
def _validate_kwargs(func: Callable, kwargs):
|
936
|
-
valid_params = inspect.signature(func).parameters
|
937
|
-
if "kwargs" in valid_params:
|
938
|
-
return kwargs
|
939
|
-
else:
|
940
|
-
return {k: v for k, v in kwargs.items() if k in valid_params}
|
941
|
-
|
942
|
-
|
943
|
-
def get_format_reader(path: ElaboratedPath, **kwargs) -> Callable:
|
944
|
-
format_key = path.format
|
945
|
-
try:
|
946
|
-
reader_fn = reader_fns[format_key]
|
947
|
-
except (KeyError, NameError):
|
948
|
-
raise ValueError(
|
949
|
-
f"We were unable to load the specified dataset. We have interpreted the format "
|
950
|
-
f"as {format_key}, if this is not correct, call again and set the `format` parameter = "
|
951
|
-
f"to the desired format. Read more here: https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
|
952
|
-
f"-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
|
953
|
-
)
|
954
|
-
|
955
|
-
return reader_fn
|
956
|
-
|
957
|
-
|
958
|
-
def load_dataset(path: ElaboratedPath, reader_fn: Callable, **kwargs) -> pd.DataFrame:
|
959
|
-
dfs = []
|
960
|
-
for filename in path.paths:
|
961
|
-
data = reader_fn(filename, **_validate_kwargs(reader_fn, kwargs))
|
962
|
-
if not isinstance(data, pd.DataFrame):
|
963
|
-
fn_name = f"{reader_fn.__module__}.{reader_fn.__qualname__}"
|
964
|
-
raise ValueError(
|
965
|
-
f"{fn_name} is used to load the data. "
|
966
|
-
f"However, {fn_name} returned {type(data)} instead of pandas DataFrame. "
|
967
|
-
f"Refer to the usage of {fn_name} to set the correct arguments."
|
968
|
-
)
|
969
|
-
dfs.append(data)
|
970
|
-
if len(dfs) == 0:
|
971
|
-
raise ValueError(
|
972
|
-
f"We were unable to load the specified dataset. Read more here: "
|
973
|
-
f"https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
|
974
|
-
f"-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
|
975
|
-
)
|
976
|
-
|
977
|
-
df = pd.concat(dfs)
|
978
|
-
|
979
|
-
if df is None:
|
980
|
-
raise ValueError(
|
981
|
-
f"We were unable to load the specified dataset. Read more here: "
|
982
|
-
f"https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
|
983
|
-
f"-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
|
984
|
-
)
|
985
|
-
if df.empty:
|
986
|
-
raise DatasetLoadException("Empty DataFrame, not producing a ADSDataset")
|
987
|
-
return df
|