oracle-ads 2.13.8__py3-none-any.whl → 2.13.9rc0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/METADATA +151 -151
- oracle_ads-2.13.9rc0.dist-info/RECORD +9 -0
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/WHEEL +2 -1
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/entry_points.txt +1 -2
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +1 -0
- ads/aqua/__init__.py +0 -40
- ads/aqua/app.py +0 -506
- ads/aqua/cli.py +0 -96
- ads/aqua/client/__init__.py +0 -3
- ads/aqua/client/client.py +0 -836
- ads/aqua/client/openai_client.py +0 -305
- ads/aqua/common/__init__.py +0 -5
- ads/aqua/common/decorator.py +0 -125
- ads/aqua/common/entities.py +0 -266
- ads/aqua/common/enums.py +0 -122
- ads/aqua/common/errors.py +0 -109
- ads/aqua/common/utils.py +0 -1285
- ads/aqua/config/__init__.py +0 -4
- ads/aqua/config/container_config.py +0 -248
- ads/aqua/config/evaluation/__init__.py +0 -4
- ads/aqua/config/evaluation/evaluation_service_config.py +0 -147
- ads/aqua/config/utils/__init__.py +0 -4
- ads/aqua/config/utils/serializer.py +0 -339
- ads/aqua/constants.py +0 -114
- ads/aqua/data.py +0 -14
- ads/aqua/dummy_data/icon.txt +0 -1
- ads/aqua/dummy_data/oci_model_deployments.json +0 -56
- ads/aqua/dummy_data/oci_models.json +0 -1
- ads/aqua/dummy_data/readme.md +0 -26
- ads/aqua/evaluation/__init__.py +0 -8
- ads/aqua/evaluation/constants.py +0 -53
- ads/aqua/evaluation/entities.py +0 -186
- ads/aqua/evaluation/errors.py +0 -70
- ads/aqua/evaluation/evaluation.py +0 -1814
- ads/aqua/extension/__init__.py +0 -42
- ads/aqua/extension/aqua_ws_msg_handler.py +0 -76
- ads/aqua/extension/base_handler.py +0 -90
- ads/aqua/extension/common_handler.py +0 -121
- ads/aqua/extension/common_ws_msg_handler.py +0 -36
- ads/aqua/extension/deployment_handler.py +0 -298
- ads/aqua/extension/deployment_ws_msg_handler.py +0 -54
- ads/aqua/extension/errors.py +0 -30
- ads/aqua/extension/evaluation_handler.py +0 -129
- ads/aqua/extension/evaluation_ws_msg_handler.py +0 -61
- ads/aqua/extension/finetune_handler.py +0 -96
- ads/aqua/extension/model_handler.py +0 -390
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +0 -145
- ads/aqua/extension/models_ws_msg_handler.py +0 -50
- ads/aqua/extension/ui_handler.py +0 -282
- ads/aqua/extension/ui_websocket_handler.py +0 -130
- ads/aqua/extension/utils.py +0 -133
- ads/aqua/finetuning/__init__.py +0 -7
- ads/aqua/finetuning/constants.py +0 -23
- ads/aqua/finetuning/entities.py +0 -181
- ads/aqua/finetuning/finetuning.py +0 -731
- ads/aqua/model/__init__.py +0 -8
- ads/aqua/model/constants.py +0 -60
- ads/aqua/model/entities.py +0 -306
- ads/aqua/model/enums.py +0 -30
- ads/aqua/model/model.py +0 -2080
- ads/aqua/modeldeployment/__init__.py +0 -8
- ads/aqua/modeldeployment/constants.py +0 -10
- ads/aqua/modeldeployment/deployment.py +0 -1324
- ads/aqua/modeldeployment/entities.py +0 -653
- ads/aqua/modeldeployment/inference.py +0 -74
- ads/aqua/modeldeployment/utils.py +0 -543
- ads/aqua/resources/gpu_shapes_index.json +0 -94
- ads/aqua/server/__init__.py +0 -4
- ads/aqua/server/__main__.py +0 -24
- ads/aqua/server/app.py +0 -47
- ads/aqua/server/aqua_spec.yml +0 -1291
- ads/aqua/training/__init__.py +0 -4
- ads/aqua/training/exceptions.py +0 -476
- ads/aqua/ui.py +0 -499
- ads/automl/__init__.py +0 -9
- ads/automl/driver.py +0 -330
- ads/automl/provider.py +0 -975
- ads/bds/__init__.py +0 -5
- ads/bds/auth.py +0 -127
- ads/bds/big_data_service.py +0 -255
- ads/catalog/__init__.py +0 -19
- ads/catalog/model.py +0 -1576
- ads/catalog/notebook.py +0 -461
- ads/catalog/project.py +0 -468
- ads/catalog/summary.py +0 -178
- ads/common/__init__.py +0 -11
- ads/common/analyzer.py +0 -65
- ads/common/artifact/.model-ignore +0 -63
- ads/common/artifact/__init__.py +0 -10
- ads/common/auth.py +0 -1122
- ads/common/card_identifier.py +0 -83
- ads/common/config.py +0 -647
- ads/common/data.py +0 -165
- ads/common/decorator/__init__.py +0 -9
- ads/common/decorator/argument_to_case.py +0 -88
- ads/common/decorator/deprecate.py +0 -69
- ads/common/decorator/require_nonempty_arg.py +0 -65
- ads/common/decorator/runtime_dependency.py +0 -178
- ads/common/decorator/threaded.py +0 -97
- ads/common/decorator/utils.py +0 -35
- ads/common/dsc_file_system.py +0 -303
- ads/common/error.py +0 -14
- ads/common/extended_enum.py +0 -81
- ads/common/function/__init__.py +0 -5
- ads/common/function/fn_util.py +0 -142
- ads/common/function/func_conf.yaml +0 -25
- ads/common/ipython.py +0 -76
- ads/common/model.py +0 -679
- ads/common/model_artifact.py +0 -1759
- ads/common/model_artifact_schema.json +0 -107
- ads/common/model_export_util.py +0 -664
- ads/common/model_metadata.py +0 -24
- ads/common/object_storage_details.py +0 -296
- ads/common/oci_client.py +0 -175
- ads/common/oci_datascience.py +0 -46
- ads/common/oci_logging.py +0 -1144
- ads/common/oci_mixin.py +0 -957
- ads/common/oci_resource.py +0 -136
- ads/common/serializer.py +0 -559
- ads/common/utils.py +0 -1852
- ads/common/word_lists.py +0 -1491
- ads/common/work_request.py +0 -189
- ads/data_labeling/__init__.py +0 -13
- ads/data_labeling/boundingbox.py +0 -253
- ads/data_labeling/constants.py +0 -47
- ads/data_labeling/data_labeling_service.py +0 -244
- ads/data_labeling/interface/__init__.py +0 -5
- ads/data_labeling/interface/loader.py +0 -16
- ads/data_labeling/interface/parser.py +0 -16
- ads/data_labeling/interface/reader.py +0 -23
- ads/data_labeling/loader/__init__.py +0 -5
- ads/data_labeling/loader/file_loader.py +0 -241
- ads/data_labeling/metadata.py +0 -110
- ads/data_labeling/mixin/__init__.py +0 -5
- ads/data_labeling/mixin/data_labeling.py +0 -232
- ads/data_labeling/ner.py +0 -129
- ads/data_labeling/parser/__init__.py +0 -5
- ads/data_labeling/parser/dls_record_parser.py +0 -388
- ads/data_labeling/parser/export_metadata_parser.py +0 -94
- ads/data_labeling/parser/export_record_parser.py +0 -473
- ads/data_labeling/reader/__init__.py +0 -5
- ads/data_labeling/reader/dataset_reader.py +0 -574
- ads/data_labeling/reader/dls_record_reader.py +0 -121
- ads/data_labeling/reader/export_record_reader.py +0 -62
- ads/data_labeling/reader/jsonl_reader.py +0 -75
- ads/data_labeling/reader/metadata_reader.py +0 -203
- ads/data_labeling/reader/record_reader.py +0 -263
- ads/data_labeling/record.py +0 -52
- ads/data_labeling/visualizer/__init__.py +0 -5
- ads/data_labeling/visualizer/image_visualizer.py +0 -525
- ads/data_labeling/visualizer/text_visualizer.py +0 -357
- ads/database/__init__.py +0 -5
- ads/database/connection.py +0 -338
- ads/dataset/__init__.py +0 -10
- ads/dataset/capabilities.md +0 -51
- ads/dataset/classification_dataset.py +0 -339
- ads/dataset/correlation.py +0 -226
- ads/dataset/correlation_plot.py +0 -563
- ads/dataset/dask_series.py +0 -173
- ads/dataset/dataframe_transformer.py +0 -110
- ads/dataset/dataset.py +0 -1979
- ads/dataset/dataset_browser.py +0 -360
- ads/dataset/dataset_with_target.py +0 -995
- ads/dataset/exception.py +0 -25
- ads/dataset/factory.py +0 -987
- ads/dataset/feature_engineering_transformer.py +0 -35
- ads/dataset/feature_selection.py +0 -107
- ads/dataset/forecasting_dataset.py +0 -26
- ads/dataset/helper.py +0 -1450
- ads/dataset/label_encoder.py +0 -99
- ads/dataset/mixin/__init__.py +0 -5
- ads/dataset/mixin/dataset_accessor.py +0 -134
- ads/dataset/pipeline.py +0 -58
- ads/dataset/plot.py +0 -710
- ads/dataset/progress.py +0 -86
- ads/dataset/recommendation.py +0 -297
- ads/dataset/recommendation_transformer.py +0 -502
- ads/dataset/regression_dataset.py +0 -14
- ads/dataset/sampled_dataset.py +0 -1050
- ads/dataset/target.py +0 -98
- ads/dataset/timeseries.py +0 -18
- ads/dbmixin/__init__.py +0 -5
- ads/dbmixin/db_pandas_accessor.py +0 -153
- ads/environment/__init__.py +0 -9
- ads/environment/ml_runtime.py +0 -66
- ads/evaluations/README.md +0 -14
- ads/evaluations/__init__.py +0 -109
- ads/evaluations/evaluation_plot.py +0 -983
- ads/evaluations/evaluator.py +0 -1334
- ads/evaluations/statistical_metrics.py +0 -543
- ads/experiments/__init__.py +0 -9
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +0 -21
- ads/explanations/base_explainer.py +0 -142
- ads/explanations/capabilities.md +0 -83
- ads/explanations/explainer.py +0 -190
- ads/explanations/mlx_global_explainer.py +0 -1050
- ads/explanations/mlx_interface.py +0 -386
- ads/explanations/mlx_local_explainer.py +0 -287
- ads/explanations/mlx_whatif_explainer.py +0 -201
- ads/feature_engineering/__init__.py +0 -20
- ads/feature_engineering/accessor/__init__.py +0 -5
- ads/feature_engineering/accessor/dataframe_accessor.py +0 -535
- ads/feature_engineering/accessor/mixin/__init__.py +0 -5
- ads/feature_engineering/accessor/mixin/correlation.py +0 -166
- ads/feature_engineering/accessor/mixin/eda_mixin.py +0 -266
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +0 -85
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +0 -211
- ads/feature_engineering/accessor/mixin/utils.py +0 -65
- ads/feature_engineering/accessor/series_accessor.py +0 -431
- ads/feature_engineering/adsimage/__init__.py +0 -5
- ads/feature_engineering/adsimage/image.py +0 -192
- ads/feature_engineering/adsimage/image_reader.py +0 -170
- ads/feature_engineering/adsimage/interface/__init__.py +0 -5
- ads/feature_engineering/adsimage/interface/reader.py +0 -19
- ads/feature_engineering/adsstring/__init__.py +0 -7
- ads/feature_engineering/adsstring/oci_language/__init__.py +0 -8
- ads/feature_engineering/adsstring/string/__init__.py +0 -8
- ads/feature_engineering/data_schema.json +0 -57
- ads/feature_engineering/dataset/__init__.py +0 -5
- ads/feature_engineering/dataset/zip_code_data.py +0 -42062
- ads/feature_engineering/exceptions.py +0 -40
- ads/feature_engineering/feature_type/__init__.py +0 -133
- ads/feature_engineering/feature_type/address.py +0 -184
- ads/feature_engineering/feature_type/adsstring/__init__.py +0 -5
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +0 -164
- ads/feature_engineering/feature_type/adsstring/oci_language.py +0 -93
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +0 -5
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +0 -47
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +0 -96
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +0 -221
- ads/feature_engineering/feature_type/adsstring/string.py +0 -258
- ads/feature_engineering/feature_type/base.py +0 -58
- ads/feature_engineering/feature_type/boolean.py +0 -183
- ads/feature_engineering/feature_type/category.py +0 -146
- ads/feature_engineering/feature_type/constant.py +0 -137
- ads/feature_engineering/feature_type/continuous.py +0 -151
- ads/feature_engineering/feature_type/creditcard.py +0 -314
- ads/feature_engineering/feature_type/datetime.py +0 -190
- ads/feature_engineering/feature_type/discrete.py +0 -134
- ads/feature_engineering/feature_type/document.py +0 -43
- ads/feature_engineering/feature_type/gis.py +0 -251
- ads/feature_engineering/feature_type/handler/__init__.py +0 -5
- ads/feature_engineering/feature_type/handler/feature_validator.py +0 -524
- ads/feature_engineering/feature_type/handler/feature_warning.py +0 -319
- ads/feature_engineering/feature_type/handler/warnings.py +0 -128
- ads/feature_engineering/feature_type/integer.py +0 -142
- ads/feature_engineering/feature_type/ip_address.py +0 -144
- ads/feature_engineering/feature_type/ip_address_v4.py +0 -138
- ads/feature_engineering/feature_type/ip_address_v6.py +0 -138
- ads/feature_engineering/feature_type/lat_long.py +0 -256
- ads/feature_engineering/feature_type/object.py +0 -43
- ads/feature_engineering/feature_type/ordinal.py +0 -132
- ads/feature_engineering/feature_type/phone_number.py +0 -135
- ads/feature_engineering/feature_type/string.py +0 -171
- ads/feature_engineering/feature_type/text.py +0 -93
- ads/feature_engineering/feature_type/unknown.py +0 -43
- ads/feature_engineering/feature_type/zip_code.py +0 -164
- ads/feature_engineering/feature_type_manager.py +0 -406
- ads/feature_engineering/schema.py +0 -795
- ads/feature_engineering/utils.py +0 -245
- ads/feature_store/.readthedocs.yaml +0 -19
- ads/feature_store/README.md +0 -65
- ads/feature_store/__init__.py +0 -9
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +0 -339
- ads/feature_store/common/exceptions.py +0 -18
- ads/feature_store/common/spark_session_singleton.py +0 -125
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +0 -72
- ads/feature_store/common/utils/feature_schema_mapper.py +0 -283
- ads/feature_store/common/utils/transformation_utils.py +0 -82
- ads/feature_store/common/utils/utility.py +0 -403
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +0 -129
- ads/feature_store/dataset.py +0 -1230
- ads/feature_store/dataset_job.py +0 -530
- ads/feature_store/docs/Dockerfile +0 -7
- ads/feature_store/docs/Makefile +0 -44
- ads/feature_store/docs/conf.py +0 -28
- ads/feature_store/docs/requirements.txt +0 -14
- ads/feature_store/docs/source/ads.feature_store.query.rst +0 -20
- ads/feature_store/docs/source/cicd.rst +0 -137
- ads/feature_store/docs/source/conf.py +0 -86
- ads/feature_store/docs/source/data_versioning.rst +0 -33
- ads/feature_store/docs/source/dataset.rst +0 -388
- ads/feature_store/docs/source/dataset_job.rst +0 -27
- ads/feature_store/docs/source/demo.rst +0 -70
- ads/feature_store/docs/source/entity.rst +0 -78
- ads/feature_store/docs/source/feature_group.rst +0 -624
- ads/feature_store/docs/source/feature_group_job.rst +0 -29
- ads/feature_store/docs/source/feature_store.rst +0 -122
- ads/feature_store/docs/source/feature_store_class.rst +0 -123
- ads/feature_store/docs/source/feature_validation.rst +0 -66
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +0 -81
- ads/feature_store/docs/source/module.rst +0 -8
- ads/feature_store/docs/source/notebook.rst +0 -94
- ads/feature_store/docs/source/overview.rst +0 -47
- ads/feature_store/docs/source/quickstart.rst +0 -176
- ads/feature_store/docs/source/release_notes.rst +0 -194
- ads/feature_store/docs/source/setup_feature_store.rst +0 -81
- ads/feature_store/docs/source/statistics.rst +0 -58
- ads/feature_store/docs/source/transformation.rst +0 -199
- ads/feature_store/docs/source/ui.rst +0 -65
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +0 -66
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +0 -192
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +0 -338
- ads/feature_store/entity.py +0 -718
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +0 -375
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +0 -316
- ads/feature_store/execution_strategy/execution_strategy.py +0 -113
- ads/feature_store/execution_strategy/execution_strategy_provider.py +0 -47
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +0 -618
- ads/feature_store/feature.py +0 -192
- ads/feature_store/feature_group.py +0 -1494
- ads/feature_store/feature_group_expectation.py +0 -346
- ads/feature_store/feature_group_job.py +0 -602
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +0 -180
- ads/feature_store/feature_option_details.py +0 -50
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +0 -99
- ads/feature_store/feature_store.py +0 -699
- ads/feature_store/feature_store_registrar.py +0 -518
- ads/feature_store/input_feature_detail.py +0 -149
- ads/feature_store/mixin/__init__.py +0 -4
- ads/feature_store/mixin/oci_feature_store.py +0 -145
- ads/feature_store/model_details.py +0 -73
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +0 -266
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +0 -298
- ads/feature_store/query/join.py +0 -161
- ads/feature_store/query/query.py +0 -403
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +0 -57
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +0 -68
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +0 -139
- ads/feature_store/service/oci_dataset_job.py +0 -199
- ads/feature_store/service/oci_entity.py +0 -125
- ads/feature_store/service/oci_feature_group.py +0 -164
- ads/feature_store/service/oci_feature_group_job.py +0 -214
- ads/feature_store/service/oci_feature_store.py +0 -182
- ads/feature_store/service/oci_lineage.py +0 -87
- ads/feature_store/service/oci_transformation.py +0 -104
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +0 -49
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +0 -37
- ads/feature_store/statistics/charts/box_plot.py +0 -148
- ads/feature_store/statistics/charts/frequency_distribution.py +0 -65
- ads/feature_store/statistics/charts/probability_distribution.py +0 -68
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +0 -98
- ads/feature_store/statistics/feature_stat.py +0 -126
- ads/feature_store/statistics/generic_feature_value.py +0 -33
- ads/feature_store/statistics/statistics.py +0 -41
- ads/feature_store/statistics_config.py +0 -101
- ads/feature_store/templates/feature_store_template.yaml +0 -45
- ads/feature_store/transformation.py +0 -499
- ads/feature_store/validation_output.py +0 -57
- ads/hpo/__init__.py +0 -9
- ads/hpo/_imports.py +0 -91
- ads/hpo/ads_search_space.py +0 -439
- ads/hpo/distributions.py +0 -325
- ads/hpo/objective.py +0 -280
- ads/hpo/search_cv.py +0 -1657
- ads/hpo/stopping_criterion.py +0 -75
- ads/hpo/tuner_artifact.py +0 -413
- ads/hpo/utils.py +0 -91
- ads/hpo/validation.py +0 -140
- ads/hpo/visualization/__init__.py +0 -5
- ads/hpo/visualization/_contour.py +0 -23
- ads/hpo/visualization/_edf.py +0 -20
- ads/hpo/visualization/_intermediate_values.py +0 -21
- ads/hpo/visualization/_optimization_history.py +0 -25
- ads/hpo/visualization/_parallel_coordinate.py +0 -169
- ads/hpo/visualization/_param_importances.py +0 -26
- ads/jobs/__init__.py +0 -53
- ads/jobs/ads_job.py +0 -663
- ads/jobs/builders/__init__.py +0 -5
- ads/jobs/builders/base.py +0 -156
- ads/jobs/builders/infrastructure/__init__.py +0 -6
- ads/jobs/builders/infrastructure/base.py +0 -165
- ads/jobs/builders/infrastructure/dataflow.py +0 -1252
- ads/jobs/builders/infrastructure/dsc_job.py +0 -1894
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +0 -1233
- ads/jobs/builders/infrastructure/utils.py +0 -65
- ads/jobs/builders/runtimes/__init__.py +0 -5
- ads/jobs/builders/runtimes/artifact.py +0 -338
- ads/jobs/builders/runtimes/base.py +0 -325
- ads/jobs/builders/runtimes/container_runtime.py +0 -242
- ads/jobs/builders/runtimes/python_runtime.py +0 -1016
- ads/jobs/builders/runtimes/pytorch_runtime.py +0 -204
- ads/jobs/cli.py +0 -104
- ads/jobs/env_var_parser.py +0 -131
- ads/jobs/extension.py +0 -160
- ads/jobs/schema/__init__.py +0 -5
- ads/jobs/schema/infrastructure_schema.json +0 -116
- ads/jobs/schema/job_schema.json +0 -42
- ads/jobs/schema/runtime_schema.json +0 -183
- ads/jobs/schema/validator.py +0 -141
- ads/jobs/serializer.py +0 -296
- ads/jobs/templates/__init__.py +0 -5
- ads/jobs/templates/container.py +0 -6
- ads/jobs/templates/driver_notebook.py +0 -177
- ads/jobs/templates/driver_oci.py +0 -500
- ads/jobs/templates/driver_python.py +0 -48
- ads/jobs/templates/driver_pytorch.py +0 -852
- ads/jobs/templates/driver_utils.py +0 -615
- ads/jobs/templates/hostname_from_env.c +0 -55
- ads/jobs/templates/oci_metrics.py +0 -181
- ads/jobs/utils.py +0 -104
- ads/llm/__init__.py +0 -28
- ads/llm/autogen/__init__.py +0 -2
- ads/llm/autogen/constants.py +0 -15
- ads/llm/autogen/reports/__init__.py +0 -2
- ads/llm/autogen/reports/base.py +0 -67
- ads/llm/autogen/reports/data.py +0 -103
- ads/llm/autogen/reports/session.py +0 -526
- ads/llm/autogen/reports/templates/chat_box.html +0 -13
- ads/llm/autogen/reports/templates/chat_box_lt.html +0 -5
- ads/llm/autogen/reports/templates/chat_box_rt.html +0 -6
- ads/llm/autogen/reports/utils.py +0 -56
- ads/llm/autogen/v02/__init__.py +0 -4
- ads/llm/autogen/v02/client.py +0 -295
- ads/llm/autogen/v02/log_handlers/__init__.py +0 -2
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +0 -83
- ads/llm/autogen/v02/loggers/__init__.py +0 -6
- ads/llm/autogen/v02/loggers/metric_logger.py +0 -320
- ads/llm/autogen/v02/loggers/session_logger.py +0 -580
- ads/llm/autogen/v02/loggers/utils.py +0 -86
- ads/llm/autogen/v02/runtime_logging.py +0 -163
- ads/llm/chain.py +0 -268
- ads/llm/chat_template.py +0 -31
- ads/llm/deploy.py +0 -63
- ads/llm/guardrails/__init__.py +0 -5
- ads/llm/guardrails/base.py +0 -442
- ads/llm/guardrails/huggingface.py +0 -44
- ads/llm/langchain/__init__.py +0 -5
- ads/llm/langchain/plugins/__init__.py +0 -5
- ads/llm/langchain/plugins/chat_models/__init__.py +0 -5
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +0 -1027
- ads/llm/langchain/plugins/embeddings/__init__.py +0 -4
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +0 -184
- ads/llm/langchain/plugins/llms/__init__.py +0 -5
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +0 -979
- ads/llm/requirements.txt +0 -3
- ads/llm/serialize.py +0 -219
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +0 -153
- ads/llm/serializers/runnable_parallel.py +0 -27
- ads/llm/templates/score_chain.jinja2 +0 -155
- ads/llm/templates/tool_chat_template_hermes.jinja +0 -130
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +0 -94
- ads/model/__init__.py +0 -52
- ads/model/artifact.py +0 -573
- ads/model/artifact_downloader.py +0 -254
- ads/model/artifact_uploader.py +0 -267
- ads/model/base_properties.py +0 -238
- ads/model/common/.model-ignore +0 -66
- ads/model/common/__init__.py +0 -5
- ads/model/common/utils.py +0 -142
- ads/model/datascience_model.py +0 -2635
- ads/model/deployment/__init__.py +0 -20
- ads/model/deployment/common/__init__.py +0 -5
- ads/model/deployment/common/utils.py +0 -308
- ads/model/deployment/model_deployer.py +0 -466
- ads/model/deployment/model_deployment.py +0 -1846
- ads/model/deployment/model_deployment_infrastructure.py +0 -671
- ads/model/deployment/model_deployment_properties.py +0 -493
- ads/model/deployment/model_deployment_runtime.py +0 -838
- ads/model/extractor/__init__.py +0 -5
- ads/model/extractor/automl_extractor.py +0 -74
- ads/model/extractor/embedding_onnx_extractor.py +0 -80
- ads/model/extractor/huggingface_extractor.py +0 -88
- ads/model/extractor/keras_extractor.py +0 -84
- ads/model/extractor/lightgbm_extractor.py +0 -93
- ads/model/extractor/model_info_extractor.py +0 -114
- ads/model/extractor/model_info_extractor_factory.py +0 -105
- ads/model/extractor/pytorch_extractor.py +0 -87
- ads/model/extractor/sklearn_extractor.py +0 -112
- ads/model/extractor/spark_extractor.py +0 -89
- ads/model/extractor/tensorflow_extractor.py +0 -85
- ads/model/extractor/xgboost_extractor.py +0 -94
- ads/model/framework/__init__.py +0 -5
- ads/model/framework/automl_model.py +0 -178
- ads/model/framework/embedding_onnx_model.py +0 -438
- ads/model/framework/huggingface_model.py +0 -399
- ads/model/framework/lightgbm_model.py +0 -266
- ads/model/framework/pytorch_model.py +0 -266
- ads/model/framework/sklearn_model.py +0 -250
- ads/model/framework/spark_model.py +0 -326
- ads/model/framework/tensorflow_model.py +0 -254
- ads/model/framework/xgboost_model.py +0 -258
- ads/model/generic_model.py +0 -3518
- ads/model/model_artifact_boilerplate/README.md +0 -381
- ads/model/model_artifact_boilerplate/__init__.py +0 -5
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +0 -5
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +0 -427
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +0 -2
- ads/model/model_artifact_boilerplate/runtime.yaml +0 -7
- ads/model/model_artifact_boilerplate/score.py +0 -61
- ads/model/model_file_description_schema.json +0 -68
- ads/model/model_introspect.py +0 -331
- ads/model/model_metadata.py +0 -1810
- ads/model/model_metadata_mixin.py +0 -460
- ads/model/model_properties.py +0 -63
- ads/model/model_version_set.py +0 -739
- ads/model/runtime/__init__.py +0 -5
- ads/model/runtime/env_info.py +0 -306
- ads/model/runtime/model_deployment_details.py +0 -37
- ads/model/runtime/model_provenance_details.py +0 -58
- ads/model/runtime/runtime_info.py +0 -81
- ads/model/runtime/schemas/inference_env_info_schema.yaml +0 -16
- ads/model/runtime/schemas/model_provenance_schema.yaml +0 -36
- ads/model/runtime/schemas/training_env_info_schema.yaml +0 -16
- ads/model/runtime/utils.py +0 -201
- ads/model/serde/__init__.py +0 -5
- ads/model/serde/common.py +0 -40
- ads/model/serde/model_input.py +0 -547
- ads/model/serde/model_serializer.py +0 -1184
- ads/model/service/__init__.py +0 -5
- ads/model/service/oci_datascience_model.py +0 -1076
- ads/model/service/oci_datascience_model_deployment.py +0 -500
- ads/model/service/oci_datascience_model_version_set.py +0 -176
- ads/model/transformer/__init__.py +0 -5
- ads/model/transformer/onnx_transformer.py +0 -324
- ads/mysqldb/__init__.py +0 -5
- ads/mysqldb/mysql_db.py +0 -227
- ads/opctl/__init__.py +0 -18
- ads/opctl/anomaly_detection.py +0 -11
- ads/opctl/backend/__init__.py +0 -5
- ads/opctl/backend/ads_dataflow.py +0 -353
- ads/opctl/backend/ads_ml_job.py +0 -710
- ads/opctl/backend/ads_ml_pipeline.py +0 -164
- ads/opctl/backend/ads_model_deployment.py +0 -209
- ads/opctl/backend/base.py +0 -146
- ads/opctl/backend/local.py +0 -1053
- ads/opctl/backend/marketplace/__init__.py +0 -9
- ads/opctl/backend/marketplace/helm_helper.py +0 -173
- ads/opctl/backend/marketplace/local_marketplace.py +0 -271
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +0 -71
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +0 -44
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +0 -24
- ads/opctl/backend/marketplace/marketplace_utils.py +0 -212
- ads/opctl/backend/marketplace/models/__init__.py +0 -5
- ads/opctl/backend/marketplace/models/bearer_token.py +0 -94
- ads/opctl/backend/marketplace/models/marketplace_type.py +0 -70
- ads/opctl/backend/marketplace/models/ocir_details.py +0 -56
- ads/opctl/backend/marketplace/prerequisite_checker.py +0 -238
- ads/opctl/cli.py +0 -707
- ads/opctl/cmds.py +0 -869
- ads/opctl/conda/__init__.py +0 -5
- ads/opctl/conda/cli.py +0 -193
- ads/opctl/conda/cmds.py +0 -749
- ads/opctl/conda/config.yaml +0 -34
- ads/opctl/conda/manifest_template.yaml +0 -13
- ads/opctl/conda/multipart_uploader.py +0 -188
- ads/opctl/conda/pack.py +0 -89
- ads/opctl/config/__init__.py +0 -5
- ads/opctl/config/base.py +0 -57
- ads/opctl/config/diagnostics/__init__.py +0 -5
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +0 -62
- ads/opctl/config/merger.py +0 -255
- ads/opctl/config/resolver.py +0 -297
- ads/opctl/config/utils.py +0 -79
- ads/opctl/config/validator.py +0 -17
- ads/opctl/config/versioner.py +0 -68
- ads/opctl/config/yaml_parsers/__init__.py +0 -7
- ads/opctl/config/yaml_parsers/base.py +0 -58
- ads/opctl/config/yaml_parsers/distributed/__init__.py +0 -7
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +0 -201
- ads/opctl/constants.py +0 -66
- ads/opctl/decorator/__init__.py +0 -5
- ads/opctl/decorator/common.py +0 -129
- ads/opctl/diagnostics/__init__.py +0 -5
- ads/opctl/diagnostics/__main__.py +0 -25
- ads/opctl/diagnostics/check_distributed_job_requirements.py +0 -212
- ads/opctl/diagnostics/check_requirements.py +0 -144
- ads/opctl/diagnostics/requirement_exception.py +0 -9
- ads/opctl/distributed/README.md +0 -109
- ads/opctl/distributed/__init__.py +0 -5
- ads/opctl/distributed/certificates.py +0 -32
- ads/opctl/distributed/cli.py +0 -207
- ads/opctl/distributed/cmds.py +0 -731
- ads/opctl/distributed/common/__init__.py +0 -5
- ads/opctl/distributed/common/abstract_cluster_provider.py +0 -449
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +0 -88
- ads/opctl/distributed/common/cluster_config_helper.py +0 -103
- ads/opctl/distributed/common/cluster_provider_factory.py +0 -21
- ads/opctl/distributed/common/cluster_runner.py +0 -54
- ads/opctl/distributed/common/framework_factory.py +0 -29
- ads/opctl/docker/Dockerfile.job +0 -103
- ads/opctl/docker/Dockerfile.job.arm +0 -107
- ads/opctl/docker/Dockerfile.job.gpu +0 -175
- ads/opctl/docker/base-env.yaml +0 -13
- ads/opctl/docker/cuda.repo +0 -6
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +0 -41
- ads/opctl/docker/operator/Dockerfile.gpu +0 -85
- ads/opctl/docker/operator/cuda.repo +0 -6
- ads/opctl/docker/operator/environment.yaml +0 -8
- ads/opctl/forecast.py +0 -11
- ads/opctl/index.yaml +0 -3
- ads/opctl/model/__init__.py +0 -5
- ads/opctl/model/cli.py +0 -65
- ads/opctl/model/cmds.py +0 -73
- ads/opctl/operator/README.md +0 -4
- ads/opctl/operator/__init__.py +0 -31
- ads/opctl/operator/cli.py +0 -344
- ads/opctl/operator/cmd.py +0 -596
- ads/opctl/operator/common/__init__.py +0 -5
- ads/opctl/operator/common/backend_factory.py +0 -460
- ads/opctl/operator/common/const.py +0 -27
- ads/opctl/operator/common/data/synthetic.csv +0 -16001
- ads/opctl/operator/common/dictionary_merger.py +0 -148
- ads/opctl/operator/common/errors.py +0 -42
- ads/opctl/operator/common/operator_config.py +0 -99
- ads/opctl/operator/common/operator_loader.py +0 -811
- ads/opctl/operator/common/operator_schema.yaml +0 -130
- ads/opctl/operator/common/operator_yaml_generator.py +0 -152
- ads/opctl/operator/common/utils.py +0 -208
- ads/opctl/operator/lowcode/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/MLoperator +0 -16
- ads/opctl/operator/lowcode/anomaly/README.md +0 -207
- ads/opctl/operator/lowcode/anomaly/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/__main__.py +0 -103
- ads/opctl/operator/lowcode/anomaly/cmd.py +0 -35
- ads/opctl/operator/lowcode/anomaly/const.py +0 -167
- ads/opctl/operator/lowcode/anomaly/environment.yaml +0 -10
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +0 -146
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +0 -162
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +0 -99
- ads/opctl/operator/lowcode/anomaly/model/autots.py +0 -115
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +0 -404
- ads/opctl/operator/lowcode/anomaly/model/factory.py +0 -110
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +0 -78
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +0 -78
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +0 -120
- ads/opctl/operator/lowcode/anomaly/model/tods.py +0 -119
- ads/opctl/operator/lowcode/anomaly/operator_config.py +0 -127
- ads/opctl/operator/lowcode/anomaly/schema.yaml +0 -401
- ads/opctl/operator/lowcode/anomaly/utils.py +0 -88
- ads/opctl/operator/lowcode/common/__init__.py +0 -5
- ads/opctl/operator/lowcode/common/const.py +0 -10
- ads/opctl/operator/lowcode/common/data.py +0 -116
- ads/opctl/operator/lowcode/common/errors.py +0 -47
- ads/opctl/operator/lowcode/common/transformations.py +0 -296
- ads/opctl/operator/lowcode/common/utils.py +0 -293
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +0 -13
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +0 -30
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +0 -5
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +0 -116
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +0 -85
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +0 -15
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +0 -4
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +0 -32
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +0 -43
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +0 -120
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +0 -34
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +0 -386
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +0 -160
- ads/opctl/operator/lowcode/forecast/MLoperator +0 -25
- ads/opctl/operator/lowcode/forecast/README.md +0 -209
- ads/opctl/operator/lowcode/forecast/__init__.py +0 -5
- ads/opctl/operator/lowcode/forecast/__main__.py +0 -89
- ads/opctl/operator/lowcode/forecast/cmd.py +0 -40
- ads/opctl/operator/lowcode/forecast/const.py +0 -92
- ads/opctl/operator/lowcode/forecast/environment.yaml +0 -20
- ads/opctl/operator/lowcode/forecast/errors.py +0 -26
- ads/opctl/operator/lowcode/forecast/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/forecast/model/arima.py +0 -279
- ads/opctl/operator/lowcode/forecast/model/automlx.py +0 -542
- ads/opctl/operator/lowcode/forecast/model/autots.py +0 -312
- ads/opctl/operator/lowcode/forecast/model/base_model.py +0 -863
- ads/opctl/operator/lowcode/forecast/model/factory.py +0 -106
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +0 -492
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +0 -243
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +0 -486
- ads/opctl/operator/lowcode/forecast/model/prophet.py +0 -445
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +0 -244
- ads/opctl/operator/lowcode/forecast/operator_config.py +0 -234
- ads/opctl/operator/lowcode/forecast/schema.yaml +0 -506
- ads/opctl/operator/lowcode/forecast/utils.py +0 -413
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +0 -7
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +0 -285
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +0 -246
- ads/opctl/operator/lowcode/pii/MLoperator +0 -17
- ads/opctl/operator/lowcode/pii/README.md +0 -208
- ads/opctl/operator/lowcode/pii/__init__.py +0 -5
- ads/opctl/operator/lowcode/pii/__main__.py +0 -78
- ads/opctl/operator/lowcode/pii/cmd.py +0 -39
- ads/opctl/operator/lowcode/pii/constant.py +0 -84
- ads/opctl/operator/lowcode/pii/environment.yaml +0 -17
- ads/opctl/operator/lowcode/pii/errors.py +0 -27
- ads/opctl/operator/lowcode/pii/model/__init__.py +0 -5
- ads/opctl/operator/lowcode/pii/model/factory.py +0 -82
- ads/opctl/operator/lowcode/pii/model/guardrails.py +0 -167
- ads/opctl/operator/lowcode/pii/model/pii.py +0 -145
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +0 -34
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +0 -34
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +0 -35
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +0 -225
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +0 -73
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +0 -26
- ads/opctl/operator/lowcode/pii/model/report.py +0 -487
- ads/opctl/operator/lowcode/pii/operator_config.py +0 -95
- ads/opctl/operator/lowcode/pii/schema.yaml +0 -108
- ads/opctl/operator/lowcode/pii/utils.py +0 -43
- ads/opctl/operator/lowcode/recommender/MLoperator +0 -16
- ads/opctl/operator/lowcode/recommender/README.md +0 -206
- ads/opctl/operator/lowcode/recommender/__init__.py +0 -5
- ads/opctl/operator/lowcode/recommender/__main__.py +0 -82
- ads/opctl/operator/lowcode/recommender/cmd.py +0 -33
- ads/opctl/operator/lowcode/recommender/constant.py +0 -30
- ads/opctl/operator/lowcode/recommender/environment.yaml +0 -11
- ads/opctl/operator/lowcode/recommender/model/base_model.py +0 -212
- ads/opctl/operator/lowcode/recommender/model/factory.py +0 -56
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +0 -25
- ads/opctl/operator/lowcode/recommender/model/svd.py +0 -106
- ads/opctl/operator/lowcode/recommender/operator_config.py +0 -81
- ads/opctl/operator/lowcode/recommender/schema.yaml +0 -265
- ads/opctl/operator/lowcode/recommender/utils.py +0 -13
- ads/opctl/operator/runtime/__init__.py +0 -5
- ads/opctl/operator/runtime/const.py +0 -17
- ads/opctl/operator/runtime/container_runtime_schema.yaml +0 -50
- ads/opctl/operator/runtime/marketplace_runtime.py +0 -50
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +0 -21
- ads/opctl/operator/runtime/python_runtime_schema.yaml +0 -21
- ads/opctl/operator/runtime/runtime.py +0 -115
- ads/opctl/schema.yaml.yml +0 -36
- ads/opctl/script.py +0 -40
- ads/opctl/spark/__init__.py +0 -5
- ads/opctl/spark/cli.py +0 -43
- ads/opctl/spark/cmds.py +0 -147
- ads/opctl/templates/diagnostic_report_template.jinja2 +0 -102
- ads/opctl/utils.py +0 -344
- ads/oracledb/__init__.py +0 -5
- ads/oracledb/oracle_db.py +0 -346
- ads/pipeline/__init__.py +0 -39
- ads/pipeline/ads_pipeline.py +0 -2279
- ads/pipeline/ads_pipeline_run.py +0 -772
- ads/pipeline/ads_pipeline_step.py +0 -605
- ads/pipeline/builders/__init__.py +0 -5
- ads/pipeline/builders/infrastructure/__init__.py +0 -5
- ads/pipeline/builders/infrastructure/custom_script.py +0 -32
- ads/pipeline/cli.py +0 -119
- ads/pipeline/extension.py +0 -291
- ads/pipeline/schema/__init__.py +0 -5
- ads/pipeline/schema/cs_step_schema.json +0 -35
- ads/pipeline/schema/ml_step_schema.json +0 -31
- ads/pipeline/schema/pipeline_schema.json +0 -71
- ads/pipeline/visualizer/__init__.py +0 -5
- ads/pipeline/visualizer/base.py +0 -570
- ads/pipeline/visualizer/graph_renderer.py +0 -272
- ads/pipeline/visualizer/text_renderer.py +0 -84
- ads/secrets/__init__.py +0 -11
- ads/secrets/adb.py +0 -386
- ads/secrets/auth_token.py +0 -86
- ads/secrets/big_data_service.py +0 -365
- ads/secrets/mysqldb.py +0 -149
- ads/secrets/oracledb.py +0 -160
- ads/secrets/secrets.py +0 -407
- ads/telemetry/__init__.py +0 -7
- ads/telemetry/base.py +0 -69
- ads/telemetry/client.py +0 -125
- ads/telemetry/telemetry.py +0 -257
- ads/templates/dataflow_pyspark.jinja2 +0 -13
- ads/templates/dataflow_sparksql.jinja2 +0 -22
- ads/templates/func.jinja2 +0 -20
- ads/templates/schemas/openapi.json +0 -1740
- ads/templates/score-pkl.jinja2 +0 -173
- ads/templates/score.jinja2 +0 -322
- ads/templates/score_embedding_onnx.jinja2 +0 -202
- ads/templates/score_generic.jinja2 +0 -165
- ads/templates/score_huggingface_pipeline.jinja2 +0 -217
- ads/templates/score_lightgbm.jinja2 +0 -185
- ads/templates/score_onnx.jinja2 +0 -407
- ads/templates/score_onnx_new.jinja2 +0 -473
- ads/templates/score_oracle_automl.jinja2 +0 -185
- ads/templates/score_pyspark.jinja2 +0 -154
- ads/templates/score_pytorch.jinja2 +0 -219
- ads/templates/score_scikit-learn.jinja2 +0 -184
- ads/templates/score_tensorflow.jinja2 +0 -184
- ads/templates/score_xgboost.jinja2 +0 -178
- ads/text_dataset/__init__.py +0 -5
- ads/text_dataset/backends.py +0 -211
- ads/text_dataset/dataset.py +0 -445
- ads/text_dataset/extractor.py +0 -207
- ads/text_dataset/options.py +0 -53
- ads/text_dataset/udfs.py +0 -22
- ads/text_dataset/utils.py +0 -49
- ads/type_discovery/__init__.py +0 -9
- ads/type_discovery/abstract_detector.py +0 -21
- ads/type_discovery/constant_detector.py +0 -41
- ads/type_discovery/continuous_detector.py +0 -54
- ads/type_discovery/credit_card_detector.py +0 -99
- ads/type_discovery/datetime_detector.py +0 -92
- ads/type_discovery/discrete_detector.py +0 -118
- ads/type_discovery/document_detector.py +0 -146
- ads/type_discovery/ip_detector.py +0 -68
- ads/type_discovery/latlon_detector.py +0 -90
- ads/type_discovery/phone_number_detector.py +0 -63
- ads/type_discovery/type_discovery_driver.py +0 -87
- ads/type_discovery/typed_feature.py +0 -594
- ads/type_discovery/unknown_detector.py +0 -41
- ads/type_discovery/zipcode_detector.py +0 -48
- ads/vault/__init__.py +0 -7
- ads/vault/vault.py +0 -237
- oracle_ads-2.13.8.dist-info/RECORD +0 -858
- {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/licenses/LICENSE.txt +0 -0
ads/text_dataset/dataset.py
DELETED
@@ -1,445 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8; -*-
|
3
|
-
|
4
|
-
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
-
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
-
|
7
|
-
import itertools
|
8
|
-
from typing import Any, Callable, Dict, Generator, List, Union
|
9
|
-
|
10
|
-
import ads
|
11
|
-
import ads.text_dataset.extractor as te
|
12
|
-
import fsspec
|
13
|
-
import pandas as pd
|
14
|
-
from ads.text_dataset import backends
|
15
|
-
from ads.text_dataset.options import OptionFactory, Options
|
16
|
-
from ads.text_dataset.udfs import UDF
|
17
|
-
from ads.text_dataset.utils import NotSupportedError
|
18
|
-
|
19
|
-
|
20
|
-
class DataLoader:
|
21
|
-
"""
|
22
|
-
DataLoader binds engine, FileProcessor and File handler(in this case it is fsspec)
|
23
|
-
together to produce a dataframe of parsed text from files.
|
24
|
-
|
25
|
-
This class is expected to be used mainly from TextDatasetFactory class.
|
26
|
-
|
27
|
-
Attributes
|
28
|
-
----------
|
29
|
-
processor: `ads.text_dataset.extractor.FileProcessor`
|
30
|
-
processor that is used for loading data.
|
31
|
-
|
32
|
-
Examples
|
33
|
-
--------
|
34
|
-
>>> import oci
|
35
|
-
>>> from ads.text_dataset.dataset import TextDatasetFactory as textfactory
|
36
|
-
>>> from ads.text_dataset.options import Options
|
37
|
-
>>> df = textfactory.format('pdf').engine('pandas').read_line(
|
38
|
-
... 'oci://<bucket-name>@<namespace>/<path>/*.pdf',
|
39
|
-
... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
|
40
|
-
... )
|
41
|
-
>>> data_gen = textfactory.format('pdf').option(Options.FILE_NAME).backend('pdfplumber').read_text(
|
42
|
-
... 'oci://<bucket-name>@<namespace>/<path>/*.pdf',
|
43
|
-
... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
|
44
|
-
... )
|
45
|
-
>>> textfactory.format('docx').convert_to_text(
|
46
|
-
... 'oci://<bucket-name>@<namespace>/<path>/*.docx',
|
47
|
-
... './extracted',
|
48
|
-
... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
|
49
|
-
... )
|
50
|
-
>>> textfactory.format('docx').convert_to_text(
|
51
|
-
... 'oci://<bucket-name>@<namespace>/<path>/*.docx',
|
52
|
-
... 'oci://<bucket-name>@<namespace>/<out_path>',
|
53
|
-
... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
|
54
|
-
... )
|
55
|
-
>>> meta_gen = textfactory.format('docx').metadata_schema(
|
56
|
-
... 'oci://<bucket-name>@<namespace>/papers/*.pdf',
|
57
|
-
... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
|
58
|
-
... )
|
59
|
-
>>> df = textfactory.format('pdf').engine('pandas').option(Options.FILE_METADATA, {'extract': ['Author']}).read_text(
|
60
|
-
... 'oci://<bucket-name>@<namespace>/<path>/*.pdf',
|
61
|
-
... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
|
62
|
-
... total_files=10,
|
63
|
-
... )
|
64
|
-
>>> df = textfactory.format('txt').engine('cudf').read_line(
|
65
|
-
... 'oci://<bucket-name>@<namespace>/<path>/*.log',
|
66
|
-
... udf=r'^\[(\S+)\s(\S+)\s(\d+)\s(\d+\:\d+\:\d+)\s(\d+)]\s(\S+)\s(\S+)\s(\S+)\s(\S+)',
|
67
|
-
... df_args={"columns":["day", "month", "date", "time", "year", "type", "method", "status", "file"]},
|
68
|
-
... n_lines_per_file=10,
|
69
|
-
... )
|
70
|
-
"""
|
71
|
-
|
72
|
-
def __init__(self, engine: str = None) -> None:
|
73
|
-
"""Initialize a DataLoader object.
|
74
|
-
|
75
|
-
Parameters
|
76
|
-
----------
|
77
|
-
engine : str, optional
|
78
|
-
dataframe engine, by default None.
|
79
|
-
|
80
|
-
Returns
|
81
|
-
-------
|
82
|
-
None
|
83
|
-
"""
|
84
|
-
self.engine(engine)
|
85
|
-
self.filemanager = fsspec
|
86
|
-
self.processor = te.FileProcessorFactory.get_processor("txt")
|
87
|
-
self.options = []
|
88
|
-
self._data = None
|
89
|
-
|
90
|
-
def with_processor(self, processor_type: str) -> None:
|
91
|
-
"""Set file processor.
|
92
|
-
|
93
|
-
Parameters
|
94
|
-
----------
|
95
|
-
processor_type : str
|
96
|
-
type of processor, which corresponds to format of the file.
|
97
|
-
|
98
|
-
Returns
|
99
|
-
-------
|
100
|
-
None
|
101
|
-
"""
|
102
|
-
self.processor = te.FileProcessorFactory.get_processor(processor_type)()
|
103
|
-
return self
|
104
|
-
|
105
|
-
def engine(self, eng: str) -> None:
|
106
|
-
"""Set engine for dataloader. Can be pandas or cudf.
|
107
|
-
|
108
|
-
Parameters
|
109
|
-
----------
|
110
|
-
eng : str
|
111
|
-
name of engine
|
112
|
-
|
113
|
-
Returns
|
114
|
-
-------
|
115
|
-
None
|
116
|
-
|
117
|
-
Raises
|
118
|
-
------
|
119
|
-
NotSupportedError
|
120
|
-
raises error if engine passed in is not supported.
|
121
|
-
"""
|
122
|
-
if eng is None:
|
123
|
-
self._engine = None
|
124
|
-
self._format_output = lambda *args, **kwargs: args[0]
|
125
|
-
return self
|
126
|
-
if eng not in ["pandas", "cudf"]:
|
127
|
-
raise NotSupportedError("Only pandas and cudf currently.")
|
128
|
-
else:
|
129
|
-
if eng == "pandas":
|
130
|
-
import pandas
|
131
|
-
|
132
|
-
self._engine = pandas
|
133
|
-
self._format_output = pandas.DataFrame
|
134
|
-
else:
|
135
|
-
import cudf
|
136
|
-
|
137
|
-
self._engine = cudf
|
138
|
-
self._format_output = lambda data, **kwargs: cudf.DataFrame(
|
139
|
-
[row for row in data], **kwargs
|
140
|
-
) # cuDF cannot be initialized with a generator
|
141
|
-
return self
|
142
|
-
|
143
|
-
def backend(self, backend: Union[str, backends.Base]) -> None:
|
144
|
-
"""Set backend used for extracting text from files.
|
145
|
-
|
146
|
-
Parameters
|
147
|
-
----------
|
148
|
-
backend : (str | `ads.text_dataset.backends.Base`)
|
149
|
-
backend for extracting text from raw files.
|
150
|
-
|
151
|
-
Returns
|
152
|
-
-------
|
153
|
-
None
|
154
|
-
"""
|
155
|
-
self.processor.backend(backend)
|
156
|
-
return self
|
157
|
-
|
158
|
-
def option(self, opt: Options, spec: Any = None) -> None:
|
159
|
-
"""Set extraction options.
|
160
|
-
|
161
|
-
Parameters
|
162
|
-
----------
|
163
|
-
opt : `ads.text_dataset.options.Options`
|
164
|
-
an option defined in `ads.text_dataset.options.Options`
|
165
|
-
spec : Any, optional
|
166
|
-
specifications that will be passed to option handler, by default None
|
167
|
-
|
168
|
-
Returns
|
169
|
-
-------
|
170
|
-
None
|
171
|
-
"""
|
172
|
-
self.options.append((OptionFactory.option_handler(opt), spec))
|
173
|
-
return self
|
174
|
-
|
175
|
-
def __load_data__(
|
176
|
-
self,
|
177
|
-
reader: Callable,
|
178
|
-
path: str,
|
179
|
-
udf: Union[str, Callable] = None,
|
180
|
-
storage_options: Dict = None,
|
181
|
-
encoding: str = "utf-8",
|
182
|
-
n_rows_per_file: int = None,
|
183
|
-
total_rows: int = None,
|
184
|
-
) -> Generator[Union[str, List[str]], None, None]:
|
185
|
-
storage_options = storage_options if storage_options is not None else {}
|
186
|
-
fhs = self.filemanager.open_files(
|
187
|
-
path, mode="rb", encoding=encoding, **storage_options
|
188
|
-
)
|
189
|
-
if udf is not None:
|
190
|
-
if isinstance(udf, str):
|
191
|
-
fn = UDF.from_regex(udf)
|
192
|
-
else:
|
193
|
-
fn = udf
|
194
|
-
else:
|
195
|
-
fn = lambda x: x
|
196
|
-
|
197
|
-
total_line_count = [0]
|
198
|
-
|
199
|
-
# function to apply to each element
|
200
|
-
def func(fh, reader):
|
201
|
-
out = [option(self).handle(fh, spec) for option, spec in self.options]
|
202
|
-
line_count = 0
|
203
|
-
for text in reader(fh):
|
204
|
-
if total_rows is None or total_line_count[0] < total_rows:
|
205
|
-
if n_rows_per_file is None or line_count < n_rows_per_file:
|
206
|
-
content = fn(text)
|
207
|
-
if content is not None:
|
208
|
-
yield out + list(content) if (
|
209
|
-
isinstance(content, list) or isinstance(content, tuple)
|
210
|
-
) else out + [content]
|
211
|
-
line_count += 1
|
212
|
-
total_line_count[0] += 1
|
213
|
-
|
214
|
-
return itertools.chain.from_iterable((func(fh, reader) for fh in fhs))
|
215
|
-
|
216
|
-
def read_line(
|
217
|
-
self,
|
218
|
-
path: str,
|
219
|
-
udf: Union[str, Callable] = None,
|
220
|
-
n_lines_per_file: int = None,
|
221
|
-
total_lines: int = None,
|
222
|
-
df_args: Dict = None,
|
223
|
-
storage_options: Dict = None,
|
224
|
-
encoding: str = "utf-8",
|
225
|
-
) -> Union[Generator[Union[str, List[str]], None, None], "DataFrame"]:
|
226
|
-
"""Read each file into lines. If path matches multiple files, will combine lines from all files.
|
227
|
-
|
228
|
-
Parameters
|
229
|
-
----------
|
230
|
-
path : str
|
231
|
-
path to data files. can have glob pattern.
|
232
|
-
udf : (callable | str), optional
|
233
|
-
user defined function for processing each line, can be a callable or regex, by default None
|
234
|
-
n_lines_per_file : int, optional
|
235
|
-
max number of lines read from each file, by default None
|
236
|
-
total_lines : int, optional
|
237
|
-
max number of lines read from all files, by default None
|
238
|
-
df_args : dict, optional
|
239
|
-
arguments passed to dataframe engine (e.g. pandas), by default None
|
240
|
-
storage_options : dict, optional
|
241
|
-
storage options for cloud storage, by default None
|
242
|
-
encoding : str, optional
|
243
|
-
encoding of files, by default 'utf-8'
|
244
|
-
|
245
|
-
Returns
|
246
|
-
-------
|
247
|
-
(Generator | DataFrame)
|
248
|
-
returns either a data generator or a dataframe.
|
249
|
-
"""
|
250
|
-
df_args = df_args if df_args is not None else {}
|
251
|
-
self._data = self.__load_data__(
|
252
|
-
self.processor.read_line,
|
253
|
-
path,
|
254
|
-
udf,
|
255
|
-
storage_options,
|
256
|
-
encoding,
|
257
|
-
n_lines_per_file,
|
258
|
-
total_lines,
|
259
|
-
)
|
260
|
-
return self._format_output(self._data, **df_args)
|
261
|
-
|
262
|
-
def read_text(
|
263
|
-
self,
|
264
|
-
path: str,
|
265
|
-
udf: Union[str, Callable] = None,
|
266
|
-
total_files: int = None,
|
267
|
-
storage_options: Dict = None,
|
268
|
-
df_args: Dict = None,
|
269
|
-
encoding: str = "utf-8",
|
270
|
-
) -> Union[Generator[Union[str, List[str]], None, None], "DataFrame"]:
|
271
|
-
"""Read each file into a text string. If path matches multiple files, each file corresponds to one record.
|
272
|
-
|
273
|
-
Parameters
|
274
|
-
----------
|
275
|
-
path : str
|
276
|
-
path to data files. can have glob pattern.
|
277
|
-
udf : (callable | str), optional
|
278
|
-
user defined function for processing each line, can be a callable or regex, by default None
|
279
|
-
total_files : int, optional
|
280
|
-
max number of files to read, by default None
|
281
|
-
df_args : dict, optional
|
282
|
-
arguments passed to dataframe engine (e.g. pandas), by default None
|
283
|
-
storage_options : dict, optional
|
284
|
-
storage options for cloud storage, by default None
|
285
|
-
encoding : str, optional
|
286
|
-
encoding of files, by default 'utf-8'
|
287
|
-
|
288
|
-
Returns
|
289
|
-
-------
|
290
|
-
(Generator | DataFrame)
|
291
|
-
returns either a data generator or a dataframe.
|
292
|
-
"""
|
293
|
-
df_args = df_args if df_args is not None else {}
|
294
|
-
self._data = self.__load_data__(
|
295
|
-
self.processor.read_text,
|
296
|
-
path,
|
297
|
-
udf,
|
298
|
-
storage_options,
|
299
|
-
encoding,
|
300
|
-
1,
|
301
|
-
total_files,
|
302
|
-
)
|
303
|
-
return self._format_output(self._data, **df_args)
|
304
|
-
|
305
|
-
def convert_to_text(
|
306
|
-
self,
|
307
|
-
src_path: str,
|
308
|
-
dst_path: str,
|
309
|
-
encoding: str = "utf-8",
|
310
|
-
storage_options: Dict = None,
|
311
|
-
) -> None:
|
312
|
-
"""Convert files to plain text files.
|
313
|
-
|
314
|
-
Parameters
|
315
|
-
----------
|
316
|
-
src_path : str
|
317
|
-
path to source data file(s). can use glob pattern
|
318
|
-
dst_path: str
|
319
|
-
local folder or cloud storage (e.g., OCI object storage) prefix to save converted text files
|
320
|
-
encoding: str, optional
|
321
|
-
encoding for files, by default utf-8
|
322
|
-
storage_options : Dict, optional
|
323
|
-
storage options for cloud storage, by default None
|
324
|
-
|
325
|
-
Returns
|
326
|
-
-------
|
327
|
-
None
|
328
|
-
"""
|
329
|
-
storage_options = storage_options if storage_options is not None else {}
|
330
|
-
fhs = self.filemanager.open_files(
|
331
|
-
src_path, mode="rb", encoding=encoding, **storage_options
|
332
|
-
)
|
333
|
-
for fh in fhs:
|
334
|
-
self.processor.convert_to_text(
|
335
|
-
fh,
|
336
|
-
dst_path,
|
337
|
-
storage_options=storage_options,
|
338
|
-
)
|
339
|
-
|
340
|
-
def metadata_all(
|
341
|
-
self, path: str, storage_options: Dict = None, encoding: str = "utf-8"
|
342
|
-
) -> Generator[Dict[str, Any], None, None]:
|
343
|
-
"""Get metadata of all files that matches the given path. Return a generator.
|
344
|
-
|
345
|
-
Parameters
|
346
|
-
----------
|
347
|
-
path : str
|
348
|
-
path to data files. can use glob pattern.
|
349
|
-
storage_options : Dict, optional
|
350
|
-
storage options for cloud storage, by default None
|
351
|
-
encoding : str, optional
|
352
|
-
encoding of files, by default 'utf-8'
|
353
|
-
|
354
|
-
Returns
|
355
|
-
-------
|
356
|
-
Generator
|
357
|
-
generator of extracted metedata from files.
|
358
|
-
"""
|
359
|
-
storage_options = storage_options if storage_options is not None else {}
|
360
|
-
fhs = self.filemanager.open_files(
|
361
|
-
path, mode="rb", encoding=encoding, **storage_options
|
362
|
-
)
|
363
|
-
return (self.processor.get_metadata(fh) for fh in fhs)
|
364
|
-
|
365
|
-
def metadata_schema(
|
366
|
-
self,
|
367
|
-
path: str,
|
368
|
-
n_files: int = 1,
|
369
|
-
storage_options: Dict = None,
|
370
|
-
encoding: str = "utf-8",
|
371
|
-
) -> List[str]:
|
372
|
-
"""
|
373
|
-
Get available fields in metadata by looking at the first `n_files` that
|
374
|
-
matches the given path.
|
375
|
-
|
376
|
-
Parameters
|
377
|
-
----------
|
378
|
-
path: str
|
379
|
-
path to data files. can have glob pattern
|
380
|
-
n_files: int, optional
|
381
|
-
number of files to look up, default to be 1
|
382
|
-
storage_options: dict, optional
|
383
|
-
storage options for cloud storage, by default None
|
384
|
-
encoding: str, optional
|
385
|
-
encoding of files, by default utf-8
|
386
|
-
|
387
|
-
Returns
|
388
|
-
-------
|
389
|
-
List[str]
|
390
|
-
list of available fields in metadata
|
391
|
-
"""
|
392
|
-
|
393
|
-
metadata = self.metadata_all(
|
394
|
-
path, storage_options=storage_options, encoding=encoding
|
395
|
-
)
|
396
|
-
fields = set()
|
397
|
-
for _ in range(n_files):
|
398
|
-
try:
|
399
|
-
fields.update(list(next(metadata).keys()))
|
400
|
-
except StopIteration:
|
401
|
-
break
|
402
|
-
return list(fields)
|
403
|
-
|
404
|
-
# ----- not currently used, but in case we want to consider chaining in the future -----
|
405
|
-
def _transform(self, udf, udf_type="fn"): # pragma: no cover
|
406
|
-
if udf_type == "fn":
|
407
|
-
func = UDF.from_lambda(udf)
|
408
|
-
elif udf_type == "regex":
|
409
|
-
func = UDF.from_regex(udf)
|
410
|
-
else:
|
411
|
-
raise NotImplementedError("Other types of UDF not yet supported.")
|
412
|
-
|
413
|
-
# convert df into iterator
|
414
|
-
if isinstance(self._data, pd.DataFrame) or isinstance(self._data, pd.Series):
|
415
|
-
self._data = (
|
416
|
-
row.values if len(row.values) > 1 else row.values[0]
|
417
|
-
for i, row in self._data.iterrows()
|
418
|
-
)
|
419
|
-
|
420
|
-
self._data = (func(row) for row in self._data)
|
421
|
-
self._data = (row for row in self._data if row is not None)
|
422
|
-
return self
|
423
|
-
|
424
|
-
|
425
|
-
class TextDatasetFactory:
|
426
|
-
"""A class that generates a dataloader given a file format."""
|
427
|
-
|
428
|
-
@staticmethod
|
429
|
-
def format(format_name: str) -> DataLoader:
|
430
|
-
"""
|
431
|
-
Instantiates DataLoader class and seeds it with the right kind of FileProcessor.
|
432
|
-
Eg. PDFProcessor for pdf. The FileProcessorFactory returns the processor based
|
433
|
-
on the format Type.
|
434
|
-
|
435
|
-
Parameters
|
436
|
-
----------
|
437
|
-
format_name : str
|
438
|
-
name of format
|
439
|
-
|
440
|
-
Returns
|
441
|
-
-------
|
442
|
-
`ads.text_dataset.dataset.DataLoader`
|
443
|
-
a `DataLoader` object.
|
444
|
-
"""
|
445
|
-
return DataLoader().with_processor(format_name)
|
ads/text_dataset/extractor.py
DELETED
@@ -1,207 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8; -*-
|
3
|
-
|
4
|
-
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
-
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
-
|
7
|
-
import logging
|
8
|
-
import os
|
9
|
-
from typing import Dict, Generator, List, Union
|
10
|
-
|
11
|
-
from ads.text_dataset import backends
|
12
|
-
from ads.text_dataset.backends import OITCC, Base, PDFPlumber, Tika
|
13
|
-
from ads.text_dataset.utils import NotSupportedError
|
14
|
-
from fsspec.core import OpenFile
|
15
|
-
|
16
|
-
logger = logging.getLogger("ads.text_dataset")
|
17
|
-
|
18
|
-
|
19
|
-
class FileProcessor:
|
20
|
-
"""
|
21
|
-
Base class for all the file processor. Files are opened using fsspec library.
|
22
|
-
The default implementation in the base class assumes text files.
|
23
|
-
|
24
|
-
This class is expected to be used inside `ads.text_dataset.dataset.DataLoader`.
|
25
|
-
"""
|
26
|
-
|
27
|
-
backend_map = {"default": Base, "tika": Tika}
|
28
|
-
|
29
|
-
def __init__(self, backend: Union[str, backends.Base] = "default") -> None:
|
30
|
-
self.backend(backend)
|
31
|
-
|
32
|
-
def backend(self, backend: Union[str, backends.Base]) -> None:
|
33
|
-
"""Set backend for file processor.
|
34
|
-
|
35
|
-
Parameters
|
36
|
-
----------
|
37
|
-
backend : `ads.text_dataset.backends.Base`
|
38
|
-
a backend for file processor
|
39
|
-
|
40
|
-
Returns
|
41
|
-
-------
|
42
|
-
None
|
43
|
-
|
44
|
-
Raises
|
45
|
-
------
|
46
|
-
NotSupportedError
|
47
|
-
when specified backend is not supported.
|
48
|
-
"""
|
49
|
-
if isinstance(backend, str) and backend in self.backend_map:
|
50
|
-
self._backend = self.backend_map[backend]()
|
51
|
-
elif isinstance(backend, Base):
|
52
|
-
self._backend = backend
|
53
|
-
else:
|
54
|
-
raise NotSupportedError(
|
55
|
-
f"backend {backend} is not recognized or not a subclass of ads.text_dataset.backends.Base."
|
56
|
-
)
|
57
|
-
return self
|
58
|
-
|
59
|
-
def read_line(
|
60
|
-
self, fhandler: OpenFile, **format_reader_kwargs: Dict
|
61
|
-
) -> Generator[Union[str, List[str]], None, None]:
|
62
|
-
"""Yields lines from a file.
|
63
|
-
|
64
|
-
Parameters
|
65
|
-
----------
|
66
|
-
fhandler : `fsspec.core.OpenFile`
|
67
|
-
file handler returned by `fsspec`
|
68
|
-
|
69
|
-
Returns
|
70
|
-
-------
|
71
|
-
Generator
|
72
|
-
a generator that yields lines from a file
|
73
|
-
"""
|
74
|
-
return self._backend.read_line(fhandler, **format_reader_kwargs)
|
75
|
-
|
76
|
-
def read_text(
|
77
|
-
self, fhandler: OpenFile, **format_reader_kwargs: Dict
|
78
|
-
) -> Generator[Union[str, List[str]], None, None]:
|
79
|
-
"""Yield contents from the entire file.
|
80
|
-
|
81
|
-
Parameters
|
82
|
-
----------
|
83
|
-
fhandler : `fsspec.core.OpenFile`
|
84
|
-
a file handler returned by fsspec
|
85
|
-
|
86
|
-
Returns
|
87
|
-
-------
|
88
|
-
Generator
|
89
|
-
a generator that yield text from a file
|
90
|
-
"""
|
91
|
-
return self._backend.read_text(fhandler, **format_reader_kwargs)
|
92
|
-
|
93
|
-
def convert_to_text(
|
94
|
-
self,
|
95
|
-
fhandler: OpenFile,
|
96
|
-
dst_path: str,
|
97
|
-
fname: str = None,
|
98
|
-
storage_options: Dict = None,
|
99
|
-
) -> str:
|
100
|
-
"""Convert input file to a text file.
|
101
|
-
|
102
|
-
Parameters
|
103
|
-
----------
|
104
|
-
fhandler : `fsspec.core.OpenFile`
|
105
|
-
a file handler returned by `fsspec`
|
106
|
-
dst_path: str
|
107
|
-
local folder or cloud storage (e.g. OCI object storage) prefix to save converted text files
|
108
|
-
fname: str, optional
|
109
|
-
filename for converted output, relative to dirname or prefix, by default None
|
110
|
-
storage_options: dict, optional
|
111
|
-
storage options for cloud storage, by default None
|
112
|
-
|
113
|
-
Returns
|
114
|
-
-------
|
115
|
-
str
|
116
|
-
path to saved output
|
117
|
-
"""
|
118
|
-
return self._backend.convert_to_text(fhandler, dst_path, fname, storage_options)
|
119
|
-
|
120
|
-
def get_metadata(self, fhandler: OpenFile) -> Dict:
|
121
|
-
"""Get metadata of a file.
|
122
|
-
|
123
|
-
Parameters
|
124
|
-
----------
|
125
|
-
fhandler : `fsspec.core.OpenFile`
|
126
|
-
a file handler returned by fsspec
|
127
|
-
|
128
|
-
Returns
|
129
|
-
-------
|
130
|
-
dict
|
131
|
-
dictionary of metadata
|
132
|
-
"""
|
133
|
-
return self._backend.get_metadata(fhandler)
|
134
|
-
|
135
|
-
|
136
|
-
class PDFProcessor(FileProcessor):
|
137
|
-
"""
|
138
|
-
Extracts text content from PDF
|
139
|
-
"""
|
140
|
-
|
141
|
-
backend_map = {"tika": Tika, "pdfplumber": PDFPlumber, "default": Tika}
|
142
|
-
|
143
|
-
|
144
|
-
class WordProcessor(FileProcessor):
|
145
|
-
"""
|
146
|
-
Extracts text content from doc or docx format.
|
147
|
-
"""
|
148
|
-
|
149
|
-
backend_map = {"default": Tika, "tika": Tika}
|
150
|
-
|
151
|
-
|
152
|
-
class FileProcessorFactory:
|
153
|
-
"""Factory that manages all file processors.
|
154
|
-
Provides functionality to get a processor corresponding to a given file type,
|
155
|
-
or register custom processor for a specific file format.
|
156
|
-
|
157
|
-
Examples
|
158
|
-
--------
|
159
|
-
>>> from ads.text_dataset.extractor import FileProcessor, FileProcessorFactory
|
160
|
-
>>> FileProcessorFactory.get_processor('pdf')
|
161
|
-
>>> class CustomProcessor(FileProcessor):
|
162
|
-
... # custom logic here
|
163
|
-
... pass
|
164
|
-
>>> FileProcessorFactory.register('new_format', CustomProcessor)
|
165
|
-
"""
|
166
|
-
|
167
|
-
processor_map = {
|
168
|
-
"pdf": PDFProcessor,
|
169
|
-
"docx": WordProcessor,
|
170
|
-
"doc": WordProcessor,
|
171
|
-
"txt": FileProcessor,
|
172
|
-
}
|
173
|
-
|
174
|
-
@classmethod
|
175
|
-
def register(cls, fmt: str, processor: FileProcessor) -> None:
|
176
|
-
"""Register custom file processor for a file format.
|
177
|
-
|
178
|
-
Parameters
|
179
|
-
----------
|
180
|
-
fmt : str
|
181
|
-
file format
|
182
|
-
processor : `FileProcessor`
|
183
|
-
custom processor
|
184
|
-
|
185
|
-
Raises
|
186
|
-
------
|
187
|
-
TypeError
|
188
|
-
raised when processor is not a subclass of `FileProcessor`.
|
189
|
-
"""
|
190
|
-
if issubclass(processor, FileProcessor):
|
191
|
-
cls.processor_map[fmt] = processor
|
192
|
-
else:
|
193
|
-
raise TypeError(f"Processor must inherit from FileProcessor class.")
|
194
|
-
|
195
|
-
@staticmethod
|
196
|
-
def get_processor(format):
|
197
|
-
if format in FileProcessorFactory.processor_map:
|
198
|
-
return FileProcessorFactory.processor_map[format]
|
199
|
-
else:
|
200
|
-
logger.warning(
|
201
|
-
f"""
|
202
|
-
Format {format} is not supported natively.
|
203
|
-
A generic FileProcessor is returned.
|
204
|
-
You can define and register a custom processor.
|
205
|
-
"""
|
206
|
-
)
|
207
|
-
return FileProcessor
|
ads/text_dataset/options.py
DELETED
@@ -1,53 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8; -*-
|
3
|
-
|
4
|
-
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
-
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
-
|
7
|
-
from enum import Enum, auto
|
8
|
-
from typing import Any, List, Dict
|
9
|
-
|
10
|
-
from fsspec.core import OpenFile
|
11
|
-
|
12
|
-
|
13
|
-
class Options(Enum):
|
14
|
-
FILE_NAME = auto()
|
15
|
-
FILE_METADATA = auto()
|
16
|
-
|
17
|
-
|
18
|
-
class OptionHandler:
|
19
|
-
def __init__(self, dataloader: "ads.text_dataset.dataset.DataLoader") -> None:
|
20
|
-
self.dataloader = dataloader
|
21
|
-
|
22
|
-
def handle(self, fhandler: OpenFile, spec: Any) -> Any:
|
23
|
-
raise NotImplementedError()
|
24
|
-
|
25
|
-
|
26
|
-
class FileOption(OptionHandler):
|
27
|
-
def handle(self, fhandler: OpenFile, spec: Any) -> Any:
|
28
|
-
return fhandler.path
|
29
|
-
|
30
|
-
|
31
|
-
class MetadataOption(OptionHandler):
|
32
|
-
def handle(self, fhandler: OpenFile, spec: Dict) -> List:
|
33
|
-
metadata = self.dataloader.processor.get_metadata(fhandler)
|
34
|
-
return [metadata.get(k, None) for k in spec["extract"]]
|
35
|
-
|
36
|
-
|
37
|
-
class OptionFactory:
|
38
|
-
|
39
|
-
option_handlers = {
|
40
|
-
Options.FILE_NAME: FileOption,
|
41
|
-
Options.FILE_METADATA: MetadataOption,
|
42
|
-
}
|
43
|
-
|
44
|
-
@staticmethod
|
45
|
-
def option_handler(option: Options) -> OptionHandler:
|
46
|
-
handler = OptionFactory.option_handlers.get(option, None)
|
47
|
-
if handler is None:
|
48
|
-
raise RuntimeError(f"Option {option} Not Recognized.")
|
49
|
-
return handler
|
50
|
-
|
51
|
-
@classmethod
|
52
|
-
def register_option(cls, option: Options, handler) -> None:
|
53
|
-
cls.option_handlers[option] = handler
|