oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ads/aqua/__init__.py +40 -0
- ads/aqua/app.py +507 -0
- ads/aqua/cli.py +96 -0
- ads/aqua/client/__init__.py +3 -0
- ads/aqua/client/client.py +836 -0
- ads/aqua/client/openai_client.py +305 -0
- ads/aqua/common/__init__.py +5 -0
- ads/aqua/common/decorator.py +125 -0
- ads/aqua/common/entities.py +274 -0
- ads/aqua/common/enums.py +134 -0
- ads/aqua/common/errors.py +109 -0
- ads/aqua/common/utils.py +1295 -0
- ads/aqua/config/__init__.py +4 -0
- ads/aqua/config/container_config.py +246 -0
- ads/aqua/config/evaluation/__init__.py +4 -0
- ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
- ads/aqua/config/utils/__init__.py +4 -0
- ads/aqua/config/utils/serializer.py +339 -0
- ads/aqua/constants.py +116 -0
- ads/aqua/data.py +14 -0
- ads/aqua/dummy_data/icon.txt +1 -0
- ads/aqua/dummy_data/oci_model_deployments.json +56 -0
- ads/aqua/dummy_data/oci_models.json +1 -0
- ads/aqua/dummy_data/readme.md +26 -0
- ads/aqua/evaluation/__init__.py +8 -0
- ads/aqua/evaluation/constants.py +53 -0
- ads/aqua/evaluation/entities.py +186 -0
- ads/aqua/evaluation/errors.py +70 -0
- ads/aqua/evaluation/evaluation.py +1814 -0
- ads/aqua/extension/__init__.py +42 -0
- ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
- ads/aqua/extension/base_handler.py +90 -0
- ads/aqua/extension/common_handler.py +121 -0
- ads/aqua/extension/common_ws_msg_handler.py +36 -0
- ads/aqua/extension/deployment_handler.py +381 -0
- ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
- ads/aqua/extension/errors.py +30 -0
- ads/aqua/extension/evaluation_handler.py +129 -0
- ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
- ads/aqua/extension/finetune_handler.py +96 -0
- ads/aqua/extension/model_handler.py +390 -0
- ads/aqua/extension/models/__init__.py +0 -0
- ads/aqua/extension/models/ws_models.py +145 -0
- ads/aqua/extension/models_ws_msg_handler.py +50 -0
- ads/aqua/extension/ui_handler.py +300 -0
- ads/aqua/extension/ui_websocket_handler.py +130 -0
- ads/aqua/extension/utils.py +133 -0
- ads/aqua/finetuning/__init__.py +7 -0
- ads/aqua/finetuning/constants.py +23 -0
- ads/aqua/finetuning/entities.py +181 -0
- ads/aqua/finetuning/finetuning.py +749 -0
- ads/aqua/model/__init__.py +8 -0
- ads/aqua/model/constants.py +60 -0
- ads/aqua/model/entities.py +385 -0
- ads/aqua/model/enums.py +32 -0
- ads/aqua/model/model.py +2134 -0
- ads/aqua/model/utils.py +52 -0
- ads/aqua/modeldeployment/__init__.py +6 -0
- ads/aqua/modeldeployment/constants.py +10 -0
- ads/aqua/modeldeployment/deployment.py +1315 -0
- ads/aqua/modeldeployment/entities.py +653 -0
- ads/aqua/modeldeployment/utils.py +543 -0
- ads/aqua/resources/gpu_shapes_index.json +94 -0
- ads/aqua/server/__init__.py +4 -0
- ads/aqua/server/__main__.py +24 -0
- ads/aqua/server/app.py +47 -0
- ads/aqua/server/aqua_spec.yml +1291 -0
- ads/aqua/training/__init__.py +4 -0
- ads/aqua/training/exceptions.py +476 -0
- ads/aqua/ui.py +519 -0
- ads/automl/__init__.py +9 -0
- ads/automl/driver.py +330 -0
- ads/automl/provider.py +975 -0
- ads/bds/__init__.py +5 -0
- ads/bds/auth.py +127 -0
- ads/bds/big_data_service.py +255 -0
- ads/catalog/__init__.py +19 -0
- ads/catalog/model.py +1576 -0
- ads/catalog/notebook.py +461 -0
- ads/catalog/project.py +468 -0
- ads/catalog/summary.py +178 -0
- ads/common/__init__.py +11 -0
- ads/common/analyzer.py +65 -0
- ads/common/artifact/.model-ignore +63 -0
- ads/common/artifact/__init__.py +10 -0
- ads/common/auth.py +1122 -0
- ads/common/card_identifier.py +83 -0
- ads/common/config.py +647 -0
- ads/common/data.py +165 -0
- ads/common/decorator/__init__.py +9 -0
- ads/common/decorator/argument_to_case.py +88 -0
- ads/common/decorator/deprecate.py +69 -0
- ads/common/decorator/require_nonempty_arg.py +65 -0
- ads/common/decorator/runtime_dependency.py +178 -0
- ads/common/decorator/threaded.py +97 -0
- ads/common/decorator/utils.py +35 -0
- ads/common/dsc_file_system.py +303 -0
- ads/common/error.py +14 -0
- ads/common/extended_enum.py +81 -0
- ads/common/function/__init__.py +5 -0
- ads/common/function/fn_util.py +142 -0
- ads/common/function/func_conf.yaml +25 -0
- ads/common/ipython.py +76 -0
- ads/common/model.py +679 -0
- ads/common/model_artifact.py +1759 -0
- ads/common/model_artifact_schema.json +107 -0
- ads/common/model_export_util.py +664 -0
- ads/common/model_metadata.py +24 -0
- ads/common/object_storage_details.py +296 -0
- ads/common/oci_client.py +179 -0
- ads/common/oci_datascience.py +46 -0
- ads/common/oci_logging.py +1144 -0
- ads/common/oci_mixin.py +957 -0
- ads/common/oci_resource.py +136 -0
- ads/common/serializer.py +559 -0
- ads/common/utils.py +1852 -0
- ads/common/word_lists.py +1491 -0
- ads/common/work_request.py +189 -0
- ads/config.py +1 -0
- ads/data_labeling/__init__.py +13 -0
- ads/data_labeling/boundingbox.py +253 -0
- ads/data_labeling/constants.py +47 -0
- ads/data_labeling/data_labeling_service.py +244 -0
- ads/data_labeling/interface/__init__.py +5 -0
- ads/data_labeling/interface/loader.py +16 -0
- ads/data_labeling/interface/parser.py +16 -0
- ads/data_labeling/interface/reader.py +23 -0
- ads/data_labeling/loader/__init__.py +5 -0
- ads/data_labeling/loader/file_loader.py +241 -0
- ads/data_labeling/metadata.py +110 -0
- ads/data_labeling/mixin/__init__.py +5 -0
- ads/data_labeling/mixin/data_labeling.py +232 -0
- ads/data_labeling/ner.py +129 -0
- ads/data_labeling/parser/__init__.py +5 -0
- ads/data_labeling/parser/dls_record_parser.py +388 -0
- ads/data_labeling/parser/export_metadata_parser.py +94 -0
- ads/data_labeling/parser/export_record_parser.py +473 -0
- ads/data_labeling/reader/__init__.py +5 -0
- ads/data_labeling/reader/dataset_reader.py +574 -0
- ads/data_labeling/reader/dls_record_reader.py +121 -0
- ads/data_labeling/reader/export_record_reader.py +62 -0
- ads/data_labeling/reader/jsonl_reader.py +75 -0
- ads/data_labeling/reader/metadata_reader.py +203 -0
- ads/data_labeling/reader/record_reader.py +263 -0
- ads/data_labeling/record.py +52 -0
- ads/data_labeling/visualizer/__init__.py +5 -0
- ads/data_labeling/visualizer/image_visualizer.py +525 -0
- ads/data_labeling/visualizer/text_visualizer.py +357 -0
- ads/database/__init__.py +5 -0
- ads/database/connection.py +338 -0
- ads/dataset/__init__.py +10 -0
- ads/dataset/capabilities.md +51 -0
- ads/dataset/classification_dataset.py +339 -0
- ads/dataset/correlation.py +226 -0
- ads/dataset/correlation_plot.py +563 -0
- ads/dataset/dask_series.py +173 -0
- ads/dataset/dataframe_transformer.py +110 -0
- ads/dataset/dataset.py +1979 -0
- ads/dataset/dataset_browser.py +360 -0
- ads/dataset/dataset_with_target.py +995 -0
- ads/dataset/exception.py +25 -0
- ads/dataset/factory.py +987 -0
- ads/dataset/feature_engineering_transformer.py +35 -0
- ads/dataset/feature_selection.py +107 -0
- ads/dataset/forecasting_dataset.py +26 -0
- ads/dataset/helper.py +1450 -0
- ads/dataset/label_encoder.py +99 -0
- ads/dataset/mixin/__init__.py +5 -0
- ads/dataset/mixin/dataset_accessor.py +134 -0
- ads/dataset/pipeline.py +58 -0
- ads/dataset/plot.py +710 -0
- ads/dataset/progress.py +86 -0
- ads/dataset/recommendation.py +297 -0
- ads/dataset/recommendation_transformer.py +502 -0
- ads/dataset/regression_dataset.py +14 -0
- ads/dataset/sampled_dataset.py +1050 -0
- ads/dataset/target.py +98 -0
- ads/dataset/timeseries.py +18 -0
- ads/dbmixin/__init__.py +5 -0
- ads/dbmixin/db_pandas_accessor.py +153 -0
- ads/environment/__init__.py +9 -0
- ads/environment/ml_runtime.py +66 -0
- ads/evaluations/README.md +14 -0
- ads/evaluations/__init__.py +109 -0
- ads/evaluations/evaluation_plot.py +983 -0
- ads/evaluations/evaluator.py +1334 -0
- ads/evaluations/statistical_metrics.py +543 -0
- ads/experiments/__init__.py +9 -0
- ads/experiments/capabilities.md +0 -0
- ads/explanations/__init__.py +21 -0
- ads/explanations/base_explainer.py +142 -0
- ads/explanations/capabilities.md +83 -0
- ads/explanations/explainer.py +190 -0
- ads/explanations/mlx_global_explainer.py +1050 -0
- ads/explanations/mlx_interface.py +386 -0
- ads/explanations/mlx_local_explainer.py +287 -0
- ads/explanations/mlx_whatif_explainer.py +201 -0
- ads/feature_engineering/__init__.py +20 -0
- ads/feature_engineering/accessor/__init__.py +5 -0
- ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
- ads/feature_engineering/accessor/mixin/__init__.py +5 -0
- ads/feature_engineering/accessor/mixin/correlation.py +166 -0
- ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
- ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
- ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
- ads/feature_engineering/accessor/mixin/utils.py +65 -0
- ads/feature_engineering/accessor/series_accessor.py +431 -0
- ads/feature_engineering/adsimage/__init__.py +5 -0
- ads/feature_engineering/adsimage/image.py +192 -0
- ads/feature_engineering/adsimage/image_reader.py +170 -0
- ads/feature_engineering/adsimage/interface/__init__.py +5 -0
- ads/feature_engineering/adsimage/interface/reader.py +19 -0
- ads/feature_engineering/adsstring/__init__.py +7 -0
- ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
- ads/feature_engineering/adsstring/string/__init__.py +8 -0
- ads/feature_engineering/data_schema.json +57 -0
- ads/feature_engineering/dataset/__init__.py +5 -0
- ads/feature_engineering/dataset/zip_code_data.py +42062 -0
- ads/feature_engineering/exceptions.py +40 -0
- ads/feature_engineering/feature_type/__init__.py +133 -0
- ads/feature_engineering/feature_type/address.py +184 -0
- ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
- ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
- ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
- ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
- ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
- ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
- ads/feature_engineering/feature_type/adsstring/string.py +258 -0
- ads/feature_engineering/feature_type/base.py +58 -0
- ads/feature_engineering/feature_type/boolean.py +183 -0
- ads/feature_engineering/feature_type/category.py +146 -0
- ads/feature_engineering/feature_type/constant.py +137 -0
- ads/feature_engineering/feature_type/continuous.py +151 -0
- ads/feature_engineering/feature_type/creditcard.py +314 -0
- ads/feature_engineering/feature_type/datetime.py +190 -0
- ads/feature_engineering/feature_type/discrete.py +134 -0
- ads/feature_engineering/feature_type/document.py +43 -0
- ads/feature_engineering/feature_type/gis.py +251 -0
- ads/feature_engineering/feature_type/handler/__init__.py +5 -0
- ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
- ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
- ads/feature_engineering/feature_type/handler/warnings.py +128 -0
- ads/feature_engineering/feature_type/integer.py +142 -0
- ads/feature_engineering/feature_type/ip_address.py +144 -0
- ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
- ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
- ads/feature_engineering/feature_type/lat_long.py +256 -0
- ads/feature_engineering/feature_type/object.py +43 -0
- ads/feature_engineering/feature_type/ordinal.py +132 -0
- ads/feature_engineering/feature_type/phone_number.py +135 -0
- ads/feature_engineering/feature_type/string.py +171 -0
- ads/feature_engineering/feature_type/text.py +93 -0
- ads/feature_engineering/feature_type/unknown.py +43 -0
- ads/feature_engineering/feature_type/zip_code.py +164 -0
- ads/feature_engineering/feature_type_manager.py +406 -0
- ads/feature_engineering/schema.py +795 -0
- ads/feature_engineering/utils.py +245 -0
- ads/feature_store/.readthedocs.yaml +19 -0
- ads/feature_store/README.md +65 -0
- ads/feature_store/__init__.py +9 -0
- ads/feature_store/common/__init__.py +0 -0
- ads/feature_store/common/enums.py +339 -0
- ads/feature_store/common/exceptions.py +18 -0
- ads/feature_store/common/spark_session_singleton.py +125 -0
- ads/feature_store/common/utils/__init__.py +0 -0
- ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
- ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
- ads/feature_store/common/utils/transformation_utils.py +82 -0
- ads/feature_store/common/utils/utility.py +403 -0
- ads/feature_store/data_validation/__init__.py +0 -0
- ads/feature_store/data_validation/great_expectation.py +129 -0
- ads/feature_store/dataset.py +1230 -0
- ads/feature_store/dataset_job.py +530 -0
- ads/feature_store/docs/Dockerfile +7 -0
- ads/feature_store/docs/Makefile +44 -0
- ads/feature_store/docs/conf.py +28 -0
- ads/feature_store/docs/requirements.txt +14 -0
- ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
- ads/feature_store/docs/source/cicd.rst +137 -0
- ads/feature_store/docs/source/conf.py +86 -0
- ads/feature_store/docs/source/data_versioning.rst +33 -0
- ads/feature_store/docs/source/dataset.rst +388 -0
- ads/feature_store/docs/source/dataset_job.rst +27 -0
- ads/feature_store/docs/source/demo.rst +70 -0
- ads/feature_store/docs/source/entity.rst +78 -0
- ads/feature_store/docs/source/feature_group.rst +624 -0
- ads/feature_store/docs/source/feature_group_job.rst +29 -0
- ads/feature_store/docs/source/feature_store.rst +122 -0
- ads/feature_store/docs/source/feature_store_class.rst +123 -0
- ads/feature_store/docs/source/feature_validation.rst +66 -0
- ads/feature_store/docs/source/figures/cicd.png +0 -0
- ads/feature_store/docs/source/figures/data_validation.png +0 -0
- ads/feature_store/docs/source/figures/data_versioning.png +0 -0
- ads/feature_store/docs/source/figures/dataset.gif +0 -0
- ads/feature_store/docs/source/figures/dataset.png +0 -0
- ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
- ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
- ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
- ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
- ads/feature_store/docs/source/figures/entity.png +0 -0
- ads/feature_store/docs/source/figures/feature_group.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
- ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
- ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
- ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
- ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
- ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
- ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
- ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
- ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
- ads/feature_store/docs/source/figures/overview.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
- ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
- ads/feature_store/docs/source/figures/stats_1.png +0 -0
- ads/feature_store/docs/source/figures/stats_2.png +0 -0
- ads/feature_store/docs/source/figures/stats_d.png +0 -0
- ads/feature_store/docs/source/figures/stats_fg.png +0 -0
- ads/feature_store/docs/source/figures/transformation.png +0 -0
- ads/feature_store/docs/source/figures/transformations.gif +0 -0
- ads/feature_store/docs/source/figures/validation.png +0 -0
- ads/feature_store/docs/source/figures/validation_fg.png +0 -0
- ads/feature_store/docs/source/figures/validation_results.png +0 -0
- ads/feature_store/docs/source/figures/validation_summary.png +0 -0
- ads/feature_store/docs/source/index.rst +81 -0
- ads/feature_store/docs/source/module.rst +8 -0
- ads/feature_store/docs/source/notebook.rst +94 -0
- ads/feature_store/docs/source/overview.rst +47 -0
- ads/feature_store/docs/source/quickstart.rst +176 -0
- ads/feature_store/docs/source/release_notes.rst +194 -0
- ads/feature_store/docs/source/setup_feature_store.rst +81 -0
- ads/feature_store/docs/source/statistics.rst +58 -0
- ads/feature_store/docs/source/transformation.rst +199 -0
- ads/feature_store/docs/source/ui.rst +65 -0
- ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
- ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
- ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
- ads/feature_store/entity.py +718 -0
- ads/feature_store/execution_strategy/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
- ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
- ads/feature_store/execution_strategy/engine/__init__.py +0 -0
- ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
- ads/feature_store/execution_strategy/execution_strategy.py +113 -0
- ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
- ads/feature_store/execution_strategy/spark/__init__.py +0 -0
- ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
- ads/feature_store/feature.py +192 -0
- ads/feature_store/feature_group.py +1494 -0
- ads/feature_store/feature_group_expectation.py +346 -0
- ads/feature_store/feature_group_job.py +602 -0
- ads/feature_store/feature_lineage/__init__.py +0 -0
- ads/feature_store/feature_lineage/graphviz_service.py +180 -0
- ads/feature_store/feature_option_details.py +50 -0
- ads/feature_store/feature_statistics/__init__.py +0 -0
- ads/feature_store/feature_statistics/statistics_service.py +99 -0
- ads/feature_store/feature_store.py +699 -0
- ads/feature_store/feature_store_registrar.py +518 -0
- ads/feature_store/input_feature_detail.py +149 -0
- ads/feature_store/mixin/__init__.py +4 -0
- ads/feature_store/mixin/oci_feature_store.py +145 -0
- ads/feature_store/model_details.py +73 -0
- ads/feature_store/query/__init__.py +0 -0
- ads/feature_store/query/filter.py +266 -0
- ads/feature_store/query/generator/__init__.py +0 -0
- ads/feature_store/query/generator/query_generator.py +298 -0
- ads/feature_store/query/join.py +161 -0
- ads/feature_store/query/query.py +403 -0
- ads/feature_store/query/validator/__init__.py +0 -0
- ads/feature_store/query/validator/query_validator.py +57 -0
- ads/feature_store/response/__init__.py +0 -0
- ads/feature_store/response/response_builder.py +68 -0
- ads/feature_store/service/__init__.py +0 -0
- ads/feature_store/service/oci_dataset.py +139 -0
- ads/feature_store/service/oci_dataset_job.py +199 -0
- ads/feature_store/service/oci_entity.py +125 -0
- ads/feature_store/service/oci_feature_group.py +164 -0
- ads/feature_store/service/oci_feature_group_job.py +214 -0
- ads/feature_store/service/oci_feature_store.py +182 -0
- ads/feature_store/service/oci_lineage.py +87 -0
- ads/feature_store/service/oci_transformation.py +104 -0
- ads/feature_store/statistics/__init__.py +0 -0
- ads/feature_store/statistics/abs_feature_value.py +49 -0
- ads/feature_store/statistics/charts/__init__.py +0 -0
- ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
- ads/feature_store/statistics/charts/box_plot.py +148 -0
- ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
- ads/feature_store/statistics/charts/probability_distribution.py +68 -0
- ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
- ads/feature_store/statistics/feature_stat.py +126 -0
- ads/feature_store/statistics/generic_feature_value.py +33 -0
- ads/feature_store/statistics/statistics.py +41 -0
- ads/feature_store/statistics_config.py +101 -0
- ads/feature_store/templates/feature_store_template.yaml +45 -0
- ads/feature_store/transformation.py +499 -0
- ads/feature_store/validation_output.py +57 -0
- ads/hpo/__init__.py +9 -0
- ads/hpo/_imports.py +91 -0
- ads/hpo/ads_search_space.py +439 -0
- ads/hpo/distributions.py +325 -0
- ads/hpo/objective.py +280 -0
- ads/hpo/search_cv.py +1657 -0
- ads/hpo/stopping_criterion.py +75 -0
- ads/hpo/tuner_artifact.py +413 -0
- ads/hpo/utils.py +91 -0
- ads/hpo/validation.py +140 -0
- ads/hpo/visualization/__init__.py +5 -0
- ads/hpo/visualization/_contour.py +23 -0
- ads/hpo/visualization/_edf.py +20 -0
- ads/hpo/visualization/_intermediate_values.py +21 -0
- ads/hpo/visualization/_optimization_history.py +25 -0
- ads/hpo/visualization/_parallel_coordinate.py +169 -0
- ads/hpo/visualization/_param_importances.py +26 -0
- ads/jobs/__init__.py +53 -0
- ads/jobs/ads_job.py +663 -0
- ads/jobs/builders/__init__.py +5 -0
- ads/jobs/builders/base.py +156 -0
- ads/jobs/builders/infrastructure/__init__.py +6 -0
- ads/jobs/builders/infrastructure/base.py +165 -0
- ads/jobs/builders/infrastructure/dataflow.py +1252 -0
- ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
- ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
- ads/jobs/builders/infrastructure/utils.py +65 -0
- ads/jobs/builders/runtimes/__init__.py +5 -0
- ads/jobs/builders/runtimes/artifact.py +338 -0
- ads/jobs/builders/runtimes/base.py +325 -0
- ads/jobs/builders/runtimes/container_runtime.py +242 -0
- ads/jobs/builders/runtimes/python_runtime.py +1016 -0
- ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
- ads/jobs/cli.py +104 -0
- ads/jobs/env_var_parser.py +131 -0
- ads/jobs/extension.py +160 -0
- ads/jobs/schema/__init__.py +5 -0
- ads/jobs/schema/infrastructure_schema.json +116 -0
- ads/jobs/schema/job_schema.json +42 -0
- ads/jobs/schema/runtime_schema.json +183 -0
- ads/jobs/schema/validator.py +141 -0
- ads/jobs/serializer.py +296 -0
- ads/jobs/templates/__init__.py +5 -0
- ads/jobs/templates/container.py +6 -0
- ads/jobs/templates/driver_notebook.py +177 -0
- ads/jobs/templates/driver_oci.py +500 -0
- ads/jobs/templates/driver_python.py +48 -0
- ads/jobs/templates/driver_pytorch.py +852 -0
- ads/jobs/templates/driver_utils.py +615 -0
- ads/jobs/templates/hostname_from_env.c +55 -0
- ads/jobs/templates/oci_metrics.py +181 -0
- ads/jobs/utils.py +104 -0
- ads/llm/__init__.py +28 -0
- ads/llm/autogen/__init__.py +2 -0
- ads/llm/autogen/constants.py +15 -0
- ads/llm/autogen/reports/__init__.py +2 -0
- ads/llm/autogen/reports/base.py +67 -0
- ads/llm/autogen/reports/data.py +103 -0
- ads/llm/autogen/reports/session.py +526 -0
- ads/llm/autogen/reports/templates/chat_box.html +13 -0
- ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
- ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
- ads/llm/autogen/reports/utils.py +56 -0
- ads/llm/autogen/v02/__init__.py +4 -0
- ads/llm/autogen/v02/client.py +295 -0
- ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
- ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
- ads/llm/autogen/v02/loggers/__init__.py +6 -0
- ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
- ads/llm/autogen/v02/loggers/session_logger.py +580 -0
- ads/llm/autogen/v02/loggers/utils.py +86 -0
- ads/llm/autogen/v02/runtime_logging.py +163 -0
- ads/llm/chain.py +268 -0
- ads/llm/chat_template.py +31 -0
- ads/llm/deploy.py +63 -0
- ads/llm/guardrails/__init__.py +5 -0
- ads/llm/guardrails/base.py +442 -0
- ads/llm/guardrails/huggingface.py +44 -0
- ads/llm/langchain/__init__.py +5 -0
- ads/llm/langchain/plugins/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
- ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
- ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
- ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
- ads/llm/langchain/plugins/llms/__init__.py +5 -0
- ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
- ads/llm/requirements.txt +3 -0
- ads/llm/serialize.py +219 -0
- ads/llm/serializers/__init__.py +0 -0
- ads/llm/serializers/retrieval_qa.py +153 -0
- ads/llm/serializers/runnable_parallel.py +27 -0
- ads/llm/templates/score_chain.jinja2 +155 -0
- ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
- ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
- ads/model/__init__.py +52 -0
- ads/model/artifact.py +573 -0
- ads/model/artifact_downloader.py +254 -0
- ads/model/artifact_uploader.py +267 -0
- ads/model/base_properties.py +238 -0
- ads/model/common/.model-ignore +66 -0
- ads/model/common/__init__.py +5 -0
- ads/model/common/utils.py +142 -0
- ads/model/datascience_model.py +2635 -0
- ads/model/deployment/__init__.py +20 -0
- ads/model/deployment/common/__init__.py +5 -0
- ads/model/deployment/common/utils.py +308 -0
- ads/model/deployment/model_deployer.py +466 -0
- ads/model/deployment/model_deployment.py +1846 -0
- ads/model/deployment/model_deployment_infrastructure.py +671 -0
- ads/model/deployment/model_deployment_properties.py +493 -0
- ads/model/deployment/model_deployment_runtime.py +838 -0
- ads/model/extractor/__init__.py +5 -0
- ads/model/extractor/automl_extractor.py +74 -0
- ads/model/extractor/embedding_onnx_extractor.py +80 -0
- ads/model/extractor/huggingface_extractor.py +88 -0
- ads/model/extractor/keras_extractor.py +84 -0
- ads/model/extractor/lightgbm_extractor.py +93 -0
- ads/model/extractor/model_info_extractor.py +114 -0
- ads/model/extractor/model_info_extractor_factory.py +105 -0
- ads/model/extractor/pytorch_extractor.py +87 -0
- ads/model/extractor/sklearn_extractor.py +112 -0
- ads/model/extractor/spark_extractor.py +89 -0
- ads/model/extractor/tensorflow_extractor.py +85 -0
- ads/model/extractor/xgboost_extractor.py +94 -0
- ads/model/framework/__init__.py +5 -0
- ads/model/framework/automl_model.py +178 -0
- ads/model/framework/embedding_onnx_model.py +438 -0
- ads/model/framework/huggingface_model.py +399 -0
- ads/model/framework/lightgbm_model.py +266 -0
- ads/model/framework/pytorch_model.py +266 -0
- ads/model/framework/sklearn_model.py +250 -0
- ads/model/framework/spark_model.py +326 -0
- ads/model/framework/tensorflow_model.py +254 -0
- ads/model/framework/xgboost_model.py +258 -0
- ads/model/generic_model.py +3518 -0
- ads/model/model_artifact_boilerplate/README.md +381 -0
- ads/model/model_artifact_boilerplate/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
- ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
- ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
- ads/model/model_artifact_boilerplate/score.py +61 -0
- ads/model/model_file_description_schema.json +68 -0
- ads/model/model_introspect.py +331 -0
- ads/model/model_metadata.py +1810 -0
- ads/model/model_metadata_mixin.py +460 -0
- ads/model/model_properties.py +63 -0
- ads/model/model_version_set.py +739 -0
- ads/model/runtime/__init__.py +5 -0
- ads/model/runtime/env_info.py +306 -0
- ads/model/runtime/model_deployment_details.py +37 -0
- ads/model/runtime/model_provenance_details.py +58 -0
- ads/model/runtime/runtime_info.py +81 -0
- ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
- ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
- ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
- ads/model/runtime/utils.py +201 -0
- ads/model/serde/__init__.py +5 -0
- ads/model/serde/common.py +40 -0
- ads/model/serde/model_input.py +547 -0
- ads/model/serde/model_serializer.py +1184 -0
- ads/model/service/__init__.py +5 -0
- ads/model/service/oci_datascience_model.py +1076 -0
- ads/model/service/oci_datascience_model_deployment.py +500 -0
- ads/model/service/oci_datascience_model_version_set.py +176 -0
- ads/model/transformer/__init__.py +5 -0
- ads/model/transformer/onnx_transformer.py +324 -0
- ads/mysqldb/__init__.py +5 -0
- ads/mysqldb/mysql_db.py +227 -0
- ads/opctl/__init__.py +18 -0
- ads/opctl/anomaly_detection.py +11 -0
- ads/opctl/backend/__init__.py +5 -0
- ads/opctl/backend/ads_dataflow.py +353 -0
- ads/opctl/backend/ads_ml_job.py +710 -0
- ads/opctl/backend/ads_ml_pipeline.py +164 -0
- ads/opctl/backend/ads_model_deployment.py +209 -0
- ads/opctl/backend/base.py +146 -0
- ads/opctl/backend/local.py +1053 -0
- ads/opctl/backend/marketplace/__init__.py +9 -0
- ads/opctl/backend/marketplace/helm_helper.py +173 -0
- ads/opctl/backend/marketplace/local_marketplace.py +271 -0
- ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
- ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
- ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
- ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
- ads/opctl/backend/marketplace/models/__init__.py +5 -0
- ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
- ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
- ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
- ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
- ads/opctl/cli.py +707 -0
- ads/opctl/cmds.py +869 -0
- ads/opctl/conda/__init__.py +5 -0
- ads/opctl/conda/cli.py +193 -0
- ads/opctl/conda/cmds.py +749 -0
- ads/opctl/conda/config.yaml +34 -0
- ads/opctl/conda/manifest_template.yaml +13 -0
- ads/opctl/conda/multipart_uploader.py +188 -0
- ads/opctl/conda/pack.py +89 -0
- ads/opctl/config/__init__.py +5 -0
- ads/opctl/config/base.py +57 -0
- ads/opctl/config/diagnostics/__init__.py +5 -0
- ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
- ads/opctl/config/merger.py +255 -0
- ads/opctl/config/resolver.py +297 -0
- ads/opctl/config/utils.py +79 -0
- ads/opctl/config/validator.py +17 -0
- ads/opctl/config/versioner.py +68 -0
- ads/opctl/config/yaml_parsers/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/base.py +58 -0
- ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
- ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
- ads/opctl/constants.py +66 -0
- ads/opctl/decorator/__init__.py +5 -0
- ads/opctl/decorator/common.py +129 -0
- ads/opctl/diagnostics/__init__.py +5 -0
- ads/opctl/diagnostics/__main__.py +25 -0
- ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
- ads/opctl/diagnostics/check_requirements.py +144 -0
- ads/opctl/diagnostics/requirement_exception.py +9 -0
- ads/opctl/distributed/README.md +109 -0
- ads/opctl/distributed/__init__.py +5 -0
- ads/opctl/distributed/certificates.py +32 -0
- ads/opctl/distributed/cli.py +207 -0
- ads/opctl/distributed/cmds.py +731 -0
- ads/opctl/distributed/common/__init__.py +5 -0
- ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
- ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
- ads/opctl/distributed/common/cluster_config_helper.py +103 -0
- ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
- ads/opctl/distributed/common/cluster_runner.py +54 -0
- ads/opctl/distributed/common/framework_factory.py +29 -0
- ads/opctl/docker/Dockerfile.job +103 -0
- ads/opctl/docker/Dockerfile.job.arm +107 -0
- ads/opctl/docker/Dockerfile.job.gpu +175 -0
- ads/opctl/docker/base-env.yaml +13 -0
- ads/opctl/docker/cuda.repo +6 -0
- ads/opctl/docker/operator/.dockerignore +0 -0
- ads/opctl/docker/operator/Dockerfile +41 -0
- ads/opctl/docker/operator/Dockerfile.gpu +85 -0
- ads/opctl/docker/operator/cuda.repo +6 -0
- ads/opctl/docker/operator/environment.yaml +8 -0
- ads/opctl/forecast.py +11 -0
- ads/opctl/index.yaml +3 -0
- ads/opctl/model/__init__.py +5 -0
- ads/opctl/model/cli.py +65 -0
- ads/opctl/model/cmds.py +73 -0
- ads/opctl/operator/README.md +4 -0
- ads/opctl/operator/__init__.py +31 -0
- ads/opctl/operator/cli.py +344 -0
- ads/opctl/operator/cmd.py +596 -0
- ads/opctl/operator/common/__init__.py +5 -0
- ads/opctl/operator/common/backend_factory.py +460 -0
- ads/opctl/operator/common/const.py +27 -0
- ads/opctl/operator/common/data/synthetic.csv +16001 -0
- ads/opctl/operator/common/dictionary_merger.py +148 -0
- ads/opctl/operator/common/errors.py +42 -0
- ads/opctl/operator/common/operator_config.py +99 -0
- ads/opctl/operator/common/operator_loader.py +811 -0
- ads/opctl/operator/common/operator_schema.yaml +130 -0
- ads/opctl/operator/common/operator_yaml_generator.py +152 -0
- ads/opctl/operator/common/utils.py +208 -0
- ads/opctl/operator/lowcode/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
- ads/opctl/operator/lowcode/anomaly/README.md +207 -0
- ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
- ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
- ads/opctl/operator/lowcode/anomaly/const.py +167 -0
- ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
- ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
- ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
- ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
- ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
- ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
- ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
- ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
- ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
- ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
- ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
- ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
- ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
- ads/opctl/operator/lowcode/common/__init__.py +5 -0
- ads/opctl/operator/lowcode/common/const.py +10 -0
- ads/opctl/operator/lowcode/common/data.py +116 -0
- ads/opctl/operator/lowcode/common/errors.py +47 -0
- ads/opctl/operator/lowcode/common/transformations.py +296 -0
- ads/opctl/operator/lowcode/common/utils.py +384 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
- ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
- ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
- ads/opctl/operator/lowcode/forecast/README.md +209 -0
- ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
- ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
- ads/opctl/operator/lowcode/forecast/const.py +92 -0
- ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
- ads/opctl/operator/lowcode/forecast/errors.py +26 -0
- ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
- ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
- ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
- ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
- ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
- ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
- ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
- ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
- ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
- ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
- ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
- ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
- ads/opctl/operator/lowcode/forecast/utils.py +397 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
- ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
- ads/opctl/operator/lowcode/pii/MLoperator +17 -0
- ads/opctl/operator/lowcode/pii/README.md +208 -0
- ads/opctl/operator/lowcode/pii/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/__main__.py +78 -0
- ads/opctl/operator/lowcode/pii/cmd.py +39 -0
- ads/opctl/operator/lowcode/pii/constant.py +84 -0
- ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
- ads/opctl/operator/lowcode/pii/errors.py +27 -0
- ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
- ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
- ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
- ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
- ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
- ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
- ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
- ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
- ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
- ads/opctl/operator/lowcode/pii/model/report.py +487 -0
- ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
- ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
- ads/opctl/operator/lowcode/pii/utils.py +43 -0
- ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
- ads/opctl/operator/lowcode/recommender/README.md +206 -0
- ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
- ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
- ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
- ads/opctl/operator/lowcode/recommender/constant.py +30 -0
- ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
- ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
- ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
- ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
- ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
- ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
- ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
- ads/opctl/operator/lowcode/recommender/utils.py +13 -0
- ads/opctl/operator/runtime/__init__.py +5 -0
- ads/opctl/operator/runtime/const.py +17 -0
- ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
- ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
- ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
- ads/opctl/operator/runtime/runtime.py +115 -0
- ads/opctl/schema.yaml.yml +36 -0
- ads/opctl/script.py +40 -0
- ads/opctl/spark/__init__.py +5 -0
- ads/opctl/spark/cli.py +43 -0
- ads/opctl/spark/cmds.py +147 -0
- ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
- ads/opctl/utils.py +344 -0
- ads/oracledb/__init__.py +5 -0
- ads/oracledb/oracle_db.py +346 -0
- ads/pipeline/__init__.py +39 -0
- ads/pipeline/ads_pipeline.py +2279 -0
- ads/pipeline/ads_pipeline_run.py +772 -0
- ads/pipeline/ads_pipeline_step.py +605 -0
- ads/pipeline/builders/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/__init__.py +5 -0
- ads/pipeline/builders/infrastructure/custom_script.py +32 -0
- ads/pipeline/cli.py +119 -0
- ads/pipeline/extension.py +291 -0
- ads/pipeline/schema/__init__.py +5 -0
- ads/pipeline/schema/cs_step_schema.json +35 -0
- ads/pipeline/schema/ml_step_schema.json +31 -0
- ads/pipeline/schema/pipeline_schema.json +71 -0
- ads/pipeline/visualizer/__init__.py +5 -0
- ads/pipeline/visualizer/base.py +570 -0
- ads/pipeline/visualizer/graph_renderer.py +272 -0
- ads/pipeline/visualizer/text_renderer.py +84 -0
- ads/secrets/__init__.py +11 -0
- ads/secrets/adb.py +386 -0
- ads/secrets/auth_token.py +86 -0
- ads/secrets/big_data_service.py +365 -0
- ads/secrets/mysqldb.py +149 -0
- ads/secrets/oracledb.py +160 -0
- ads/secrets/secrets.py +407 -0
- ads/telemetry/__init__.py +7 -0
- ads/telemetry/base.py +69 -0
- ads/telemetry/client.py +122 -0
- ads/telemetry/telemetry.py +257 -0
- ads/templates/dataflow_pyspark.jinja2 +13 -0
- ads/templates/dataflow_sparksql.jinja2 +22 -0
- ads/templates/func.jinja2 +20 -0
- ads/templates/schemas/openapi.json +1740 -0
- ads/templates/score-pkl.jinja2 +173 -0
- ads/templates/score.jinja2 +322 -0
- ads/templates/score_embedding_onnx.jinja2 +202 -0
- ads/templates/score_generic.jinja2 +165 -0
- ads/templates/score_huggingface_pipeline.jinja2 +217 -0
- ads/templates/score_lightgbm.jinja2 +185 -0
- ads/templates/score_onnx.jinja2 +407 -0
- ads/templates/score_onnx_new.jinja2 +473 -0
- ads/templates/score_oracle_automl.jinja2 +185 -0
- ads/templates/score_pyspark.jinja2 +154 -0
- ads/templates/score_pytorch.jinja2 +219 -0
- ads/templates/score_scikit-learn.jinja2 +184 -0
- ads/templates/score_tensorflow.jinja2 +184 -0
- ads/templates/score_xgboost.jinja2 +178 -0
- ads/text_dataset/__init__.py +5 -0
- ads/text_dataset/backends.py +211 -0
- ads/text_dataset/dataset.py +445 -0
- ads/text_dataset/extractor.py +207 -0
- ads/text_dataset/options.py +53 -0
- ads/text_dataset/udfs.py +22 -0
- ads/text_dataset/utils.py +49 -0
- ads/type_discovery/__init__.py +9 -0
- ads/type_discovery/abstract_detector.py +21 -0
- ads/type_discovery/constant_detector.py +41 -0
- ads/type_discovery/continuous_detector.py +54 -0
- ads/type_discovery/credit_card_detector.py +99 -0
- ads/type_discovery/datetime_detector.py +92 -0
- ads/type_discovery/discrete_detector.py +118 -0
- ads/type_discovery/document_detector.py +146 -0
- ads/type_discovery/ip_detector.py +68 -0
- ads/type_discovery/latlon_detector.py +90 -0
- ads/type_discovery/phone_number_detector.py +63 -0
- ads/type_discovery/type_discovery_driver.py +87 -0
- ads/type_discovery/typed_feature.py +594 -0
- ads/type_discovery/unknown_detector.py +41 -0
- ads/type_discovery/zipcode_detector.py +48 -0
- ads/vault/__init__.py +7 -0
- ads/vault/vault.py +237 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/METADATA +150 -149
- oracle_ads-2.13.10.dist-info/RECORD +858 -0
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/WHEEL +1 -2
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/entry_points.txt +2 -1
- oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
- oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
- {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/licenses/LICENSE.txt +0 -0
ads/text_dataset/udfs.py
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import re
|
8
|
+
from typing import Callable
|
9
|
+
|
10
|
+
|
11
|
+
class UDF:
|
12
|
+
@staticmethod
|
13
|
+
def from_regex(regex: str) -> Callable:
|
14
|
+
def function(content):
|
15
|
+
match = re.match(regex, content)
|
16
|
+
if match:
|
17
|
+
if len(match.groups()) == 0:
|
18
|
+
return [match.group(0)]
|
19
|
+
else:
|
20
|
+
return match.groups()
|
21
|
+
|
22
|
+
return function
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2021, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import functools
|
8
|
+
import os
|
9
|
+
|
10
|
+
|
11
|
+
class NotSupportedError(Exception): # pragma: no cover
|
12
|
+
pass
|
13
|
+
|
14
|
+
|
15
|
+
def experimental(cls):
|
16
|
+
@functools.wraps(cls)
|
17
|
+
def wrapper(*args, **kwargs):
|
18
|
+
instance = cls(*args, **kwargs)
|
19
|
+
print(f"{cls.__name__} is experimental and may be removed in the future.")
|
20
|
+
return instance
|
21
|
+
|
22
|
+
return wrapper
|
23
|
+
|
24
|
+
|
25
|
+
class PY4JGateway:
|
26
|
+
def __init__(self) -> None:
|
27
|
+
try:
|
28
|
+
from py4j.java_gateway import GatewayParameters, JavaGateway, launch_gateway
|
29
|
+
except ModuleNotFoundError:
|
30
|
+
raise ModuleNotFoundError("py4j is not installed.")
|
31
|
+
if "CONDA_PREFIX" not in os.environ or not os.path.exists(
|
32
|
+
os.path.join(os.environ.get("CONDA_PREFIX"), "text-extraction-tools.jar")
|
33
|
+
):
|
34
|
+
raise NotSupportedError(
|
35
|
+
"Tika is not supported in this distribution. Use alternatives such as pdfplumber."
|
36
|
+
)
|
37
|
+
port = launch_gateway(
|
38
|
+
java_path="/usr/bin/java",
|
39
|
+
classpath=os.path.join(
|
40
|
+
os.environ.get("CONDA_PREFIX"), "text-extraction-tools.jar"
|
41
|
+
),
|
42
|
+
)
|
43
|
+
self.gateway = JavaGateway(gateway_parameters=GatewayParameters(port=port))
|
44
|
+
|
45
|
+
def __enter__(self) -> None:
|
46
|
+
return self.gateway
|
47
|
+
|
48
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
49
|
+
self.gateway.shutdown()
|
@@ -0,0 +1,9 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*--
|
3
|
+
|
4
|
+
# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
import logging
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
@@ -0,0 +1,21 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from __future__ import print_function, absolute_import
|
8
|
+
|
9
|
+
import abc
|
10
|
+
|
11
|
+
|
12
|
+
class AbstractTypeDiscoveryDetector(metaclass=abc.ABCMeta):
|
13
|
+
@abc.abstractmethod
|
14
|
+
def discover(self, name, series):
|
15
|
+
return
|
16
|
+
|
17
|
+
|
18
|
+
class DiscreteDiscoveryDetector(AbstractTypeDiscoveryDetector, metaclass=abc.ABCMeta):
|
19
|
+
@abc.abstractmethod
|
20
|
+
def discover(self, name, series):
|
21
|
+
return
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from __future__ import print_function, absolute_import, division
|
8
|
+
|
9
|
+
import pandas as pd
|
10
|
+
|
11
|
+
from ads.type_discovery import logger
|
12
|
+
from ads.type_discovery.abstract_detector import AbstractTypeDiscoveryDetector
|
13
|
+
from ads.type_discovery.typed_feature import ConstantTypedFeature
|
14
|
+
|
15
|
+
|
16
|
+
class ConstantDetector(AbstractTypeDiscoveryDetector):
|
17
|
+
def is_constant(self, name, values):
|
18
|
+
#
|
19
|
+
# if all the values are null we treat this as a const feature
|
20
|
+
#
|
21
|
+
|
22
|
+
return values.size == 0 or values.nunique() == 1
|
23
|
+
|
24
|
+
def discover(self, name, series):
|
25
|
+
candidates = series.loc[~series.isnull()]
|
26
|
+
|
27
|
+
if self.is_constant(name, candidates):
|
28
|
+
logger.debug("column [{}]/[{}] Constant".format(name, series.dtype))
|
29
|
+
return ConstantTypedFeature.build(name, series)
|
30
|
+
|
31
|
+
return False
|
32
|
+
|
33
|
+
|
34
|
+
if __name__ == "__main__":
|
35
|
+
dd = ConstantDetector()
|
36
|
+
print(dd.discover("zipcodes", pd.Series([None, "94065", "94065", "94065", None])))
|
37
|
+
print(dd.discover("years", pd.Series([2008, 2008, 2008, 2008, 2008])))
|
38
|
+
df = pd.read_csv(
|
39
|
+
"https://artifacthub.oraclecorp.com/dsc-generic/advanced-ds/datasets/flights.csv"
|
40
|
+
)
|
41
|
+
print(dd.discover("flights_years", df["Year"]))
|
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
|
8
|
+
from __future__ import print_function, absolute_import, division
|
9
|
+
|
10
|
+
import pandas as pd
|
11
|
+
|
12
|
+
from ads.type_discovery import logger
|
13
|
+
from ads.type_discovery.abstract_detector import AbstractTypeDiscoveryDetector
|
14
|
+
from ads.type_discovery.typed_feature import ContinuousTypedFeature
|
15
|
+
from ads.common import utils
|
16
|
+
|
17
|
+
|
18
|
+
class ContinuousDetector(AbstractTypeDiscoveryDetector):
|
19
|
+
@staticmethod
|
20
|
+
def _target_is_continuous(series):
|
21
|
+
if str(series.dtype) in ["float16", "float32", "float64"]:
|
22
|
+
return True # treat target variable as continuous
|
23
|
+
elif str(series.dtype) in ["int16", "int32", "int64"]:
|
24
|
+
if series.nunique() >= 20:
|
25
|
+
return True # treat target variable as continuous
|
26
|
+
|
27
|
+
return False
|
28
|
+
|
29
|
+
def _is_continuous(self, series):
|
30
|
+
if series.dtype.name in ["object"]:
|
31
|
+
try:
|
32
|
+
series.astype("float")
|
33
|
+
return True
|
34
|
+
except:
|
35
|
+
pass
|
36
|
+
|
37
|
+
if series.dtype.name in utils.numeric_pandas_dtypes():
|
38
|
+
#
|
39
|
+
# if the type is float we simply beleive pandas and go with continuous
|
40
|
+
#
|
41
|
+
return True
|
42
|
+
|
43
|
+
def discover(self, name, series):
|
44
|
+
|
45
|
+
if self._is_continuous(series):
|
46
|
+
logger.debug("column [{}]/[{}] continuous".format(name, series.dtype))
|
47
|
+
return ContinuousTypedFeature.build(name, series)
|
48
|
+
|
49
|
+
return False
|
50
|
+
|
51
|
+
|
52
|
+
if __name__ == "__main__":
|
53
|
+
dd = ContinuousDetector()
|
54
|
+
print(dd.discover("continuous", pd.Series([None, 3.14, 12.0, 1, 2, 3, None])))
|
@@ -0,0 +1,99 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
"""
|
8
|
+
NOTE:
|
9
|
+
|
10
|
+
There's an opportunity here to generate a new feature, credict card numbers are not preditive because they
|
11
|
+
don't generalize, however, if the feature is replaced by the type of card that might be predictive.
|
12
|
+
|
13
|
+
- Visa: ^4[0-9]{12}(?:[0-9]{3})?$ All Visa card numbers start with a 4. New cards have 16 digits. Old cards have 13.
|
14
|
+
- MasterCard: ^(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}$ MasterCard numbers
|
15
|
+
either start with the numbers 51 through 55 or with the numbers 2221 through 2720. All have 16 digits.
|
16
|
+
- American Express: ^3[47][0-9]{13}$ American Express card numbers start with 34 or 37 and have 15 digits.
|
17
|
+
- Diners Club: ^3(?:0[0-5]|[68][0-9])[0-9]{11}$ Diners Club card numbers begin with 300 through 305, 36 or 38.
|
18
|
+
All have 14 digits. There are Diners Club cards that begin with 5 and have 16 digits. These are a joint
|
19
|
+
venture between Diners Club and MasterCard, and should be processed like a MasterCard.
|
20
|
+
- Discover: ^6(?:011|5[0-9]{2})[0-9]{12}$ Discover card numbers begin with 6011 or 65. All have 16 digits.
|
21
|
+
- JCB: ^(?:2131|1800|35\d{3})\d{11}$ JCB cards beginning with 2131 or 1800 have 15 digits.
|
22
|
+
JCB cards beginning with 35 have 16 digits.
|
23
|
+
|
24
|
+
"""
|
25
|
+
|
26
|
+
from __future__ import print_function, absolute_import, division
|
27
|
+
|
28
|
+
import re
|
29
|
+
|
30
|
+
import pandas as pd
|
31
|
+
|
32
|
+
from ads.type_discovery import logger
|
33
|
+
from ads.type_discovery.abstract_detector import AbstractTypeDiscoveryDetector
|
34
|
+
from ads.type_discovery.typed_feature import CreditCardTypedFeature
|
35
|
+
|
36
|
+
|
37
|
+
class CreditCardDetector(AbstractTypeDiscoveryDetector):
|
38
|
+
|
39
|
+
_max_sample_size_to_luhn_check = 1000
|
40
|
+
_pattern_string = r"""^(?:4[0-9]{12}(?:[0-9]{3})? # Visa
|
41
|
+
| (?:5[1-5][0-9]{2} # MasterCard
|
42
|
+
| 222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}
|
43
|
+
| 3[47][0-9]{13} # American Express
|
44
|
+
| 3(?:0[0-5]|[68][0-9])[0-9]{11} # Diners Club
|
45
|
+
| 6(?:011|5[0-9]{2})[0-9]{12} # Discover
|
46
|
+
| (?:2131|1800|35\d{3})\d{11} # JCB
|
47
|
+
| (5018|5020|5038|5612|5893|6304|6759|6761|6762|6763|0604|6390)\d+$ # Maestro
|
48
|
+
| ^(5[06789]|6)[0-9]{0,}$ # Maestro
|
49
|
+
| ^4[0-9]{12}(?:[0-9]{6})?$ #Visa 19 digit
|
50
|
+
)$"""
|
51
|
+
|
52
|
+
def luhn_checksum(self, card_number):
|
53
|
+
def digits_of(n):
|
54
|
+
return [int(d) for d in str(n)]
|
55
|
+
|
56
|
+
digits = digits_of(card_number)
|
57
|
+
odd_digits = digits[-1::-2]
|
58
|
+
even_digits = digits[-2::-2]
|
59
|
+
checksum = 0
|
60
|
+
checksum += sum(odd_digits)
|
61
|
+
for d in even_digits:
|
62
|
+
checksum += sum(digits_of(d * 2))
|
63
|
+
return checksum % 10
|
64
|
+
|
65
|
+
def is_luhn_valid(self, card_number):
|
66
|
+
return self.luhn_checksum(card_number) == 0
|
67
|
+
|
68
|
+
def is_credit_card(self, name, values):
|
69
|
+
cc = re.compile(CreditCardDetector._pattern_string, re.VERBOSE)
|
70
|
+
# since the nulls have been previously filtered we can safely do "all"
|
71
|
+
samp = (
|
72
|
+
values
|
73
|
+
if values.size <= CreditCardDetector._max_sample_size_to_luhn_check
|
74
|
+
else values.sample(n=CreditCardDetector._max_sample_size_to_luhn_check)
|
75
|
+
)
|
76
|
+
|
77
|
+
if samp.dtype.name in ["float16", "float32", "float64"]:
|
78
|
+
if samp.apply(float.is_integer).all():
|
79
|
+
samp = samp.fillna(0.0).astype(int)
|
80
|
+
|
81
|
+
if samp.dtype.name in ["int16", "int32", "int64"]:
|
82
|
+
samp = samp.astype(str)
|
83
|
+
|
84
|
+
if all([cc.match(str(x)) for x in samp]):
|
85
|
+
#
|
86
|
+
# iff the pattern matching succeeds do we try the luhn algorithm on a sample
|
87
|
+
#
|
88
|
+
return all([self.is_luhn_valid(x) for x in samp])
|
89
|
+
|
90
|
+
return False
|
91
|
+
|
92
|
+
def discover(self, name, series):
|
93
|
+
candidates = series.loc[~series.isnull()]
|
94
|
+
|
95
|
+
if self.is_credit_card(name, candidates.head(1000)):
|
96
|
+
logger.debug("column [{}]/[{}] credit card".format(name, series.dtype))
|
97
|
+
return CreditCardTypedFeature.build(name, series)
|
98
|
+
|
99
|
+
return False
|
@@ -0,0 +1,92 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
|
8
|
+
from __future__ import print_function, absolute_import, division
|
9
|
+
|
10
|
+
import pandas as pd
|
11
|
+
|
12
|
+
from ads.common.decorator.runtime_dependency import (
|
13
|
+
runtime_dependency,
|
14
|
+
OptionalDependency,
|
15
|
+
)
|
16
|
+
from ads.type_discovery import logger
|
17
|
+
from ads.type_discovery.abstract_detector import AbstractTypeDiscoveryDetector
|
18
|
+
from ads.type_discovery.typed_feature import DateTimeTypedFeature
|
19
|
+
|
20
|
+
|
21
|
+
class DateTimeDetector(AbstractTypeDiscoveryDetector):
|
22
|
+
@runtime_dependency(module="datefinder", install_from=OptionalDependency.DATA)
|
23
|
+
def _is_date_time(self, name, values, low_level_type_name):
|
24
|
+
if low_level_type_name.startswith("datetime64"):
|
25
|
+
return lambda x: x
|
26
|
+
else:
|
27
|
+
#
|
28
|
+
# if the column/feature contains the word "timestamp" then
|
29
|
+
#
|
30
|
+
if low_level_type_name.startswith("int") and "timestamp" in name.lower():
|
31
|
+
# either s (max len 10) on ns (max len) 19
|
32
|
+
unit = "s" if values.astype("str").str.len().max() <= 10 else "ns"
|
33
|
+
try:
|
34
|
+
pd.to_datetime(values, unit=unit)
|
35
|
+
return lambda x: pd.to_datetime(x, unit=unit)
|
36
|
+
except:
|
37
|
+
pass
|
38
|
+
if values.dtype == "object":
|
39
|
+
try:
|
40
|
+
pd.to_datetime(values, infer_datetime_format=True)
|
41
|
+
datefinder_result = all(
|
42
|
+
[bool(list(datefinder.find_dates(str(x)))) for x in values]
|
43
|
+
)
|
44
|
+
if datefinder_result:
|
45
|
+
return lambda x: pd.to_datetime(x, infer_datetime_format=True)
|
46
|
+
except:
|
47
|
+
pass
|
48
|
+
|
49
|
+
return None
|
50
|
+
|
51
|
+
def discover(self, name, series):
|
52
|
+
candidates = series.loc[~series.isnull()]
|
53
|
+
fn = self._is_date_time(name, candidates.head(500), series.dtype.name)
|
54
|
+
if fn:
|
55
|
+
logger.debug("column [{}]/[{}] datetime".format(name, series.dtype))
|
56
|
+
return DateTimeTypedFeature.build(name, fn(series))
|
57
|
+
|
58
|
+
return False
|
59
|
+
|
60
|
+
|
61
|
+
if __name__ == "__main__":
|
62
|
+
dd = DateTimeDetector()
|
63
|
+
print(
|
64
|
+
dd.discover(
|
65
|
+
"date-range",
|
66
|
+
pd.Series(pd.date_range(start="1/1/2018", end="1/08/2018", freq="H")),
|
67
|
+
)
|
68
|
+
)
|
69
|
+
print(dd.discover("dates", pd.Series(["12/12/12", "12/12/13", None, "12/12/14"])))
|
70
|
+
print(
|
71
|
+
dd.discover(
|
72
|
+
"dates-with-other-values",
|
73
|
+
pd.Series(["12/12/12", "Monday", None, "12/12/14"]),
|
74
|
+
)
|
75
|
+
)
|
76
|
+
print(
|
77
|
+
dd.discover(
|
78
|
+
"timestamp s",
|
79
|
+
pd.Series(
|
80
|
+
[
|
81
|
+
978300760,
|
82
|
+
978302109,
|
83
|
+
978301968,
|
84
|
+
978300275,
|
85
|
+
978824291,
|
86
|
+
978302268,
|
87
|
+
978302039,
|
88
|
+
]
|
89
|
+
),
|
90
|
+
)
|
91
|
+
)
|
92
|
+
print(dd.discover("timestamp ns", pd.Series([1490195805433502912])))
|
@@ -0,0 +1,118 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
from __future__ import print_function, absolute_import, division
|
8
|
+
|
9
|
+
import pandas as pd
|
10
|
+
from sklearn.utils.multiclass import type_of_target
|
11
|
+
|
12
|
+
from ads.type_discovery import logger
|
13
|
+
from ads.type_discovery.abstract_detector import DiscreteDiscoveryDetector
|
14
|
+
from ads.type_discovery.typed_feature import (
|
15
|
+
OrdinalTypedFeature,
|
16
|
+
CategoricalTypedFeature,
|
17
|
+
)
|
18
|
+
from ads.common import utils
|
19
|
+
|
20
|
+
|
21
|
+
class DiscreteDetector(DiscreteDiscoveryDetector):
|
22
|
+
|
23
|
+
_max_categorical_values = 100
|
24
|
+
|
25
|
+
def _get_categorical_or_ordinal(self, name, series):
|
26
|
+
#
|
27
|
+
# categoricals are unordered discreet types
|
28
|
+
# ordinals are ordered discreet (int) types
|
29
|
+
#
|
30
|
+
|
31
|
+
low_level_type_name = series.dtype.name
|
32
|
+
|
33
|
+
if low_level_type_name == "category" or low_level_type_name == "bool":
|
34
|
+
return "categorical"
|
35
|
+
|
36
|
+
else:
|
37
|
+
#
|
38
|
+
# after removing nulls the new Series might already be categorical
|
39
|
+
#
|
40
|
+
nulls_removed = pd.Series(list(series.loc[~series.isna()]))
|
41
|
+
if (
|
42
|
+
nulls_removed.dtype.name == "category"
|
43
|
+
or nulls_removed.dtype.name == "bool"
|
44
|
+
):
|
45
|
+
return "categorical"
|
46
|
+
|
47
|
+
count_distinct = series.nunique()
|
48
|
+
observations = series.size
|
49
|
+
|
50
|
+
tot = type_of_target(
|
51
|
+
list(nulls_removed.head(min(nulls_removed.size, 2000)))
|
52
|
+
)
|
53
|
+
|
54
|
+
if tot == "binary":
|
55
|
+
return "categorical"
|
56
|
+
|
57
|
+
elif tot == "multiclass":
|
58
|
+
if count_distinct <= DiscreteDetector._max_categorical_values:
|
59
|
+
if low_level_type_name in utils.numeric_pandas_dtypes():
|
60
|
+
return "ordinal"
|
61
|
+
else:
|
62
|
+
return "categorical"
|
63
|
+
|
64
|
+
if low_level_type_name.startswith(
|
65
|
+
"int"
|
66
|
+
) or low_level_type_name.startswith("float"):
|
67
|
+
if nulls_removed.min() >= 0:
|
68
|
+
if (
|
69
|
+
low_level_type_name.startswith("int")
|
70
|
+
or nulls_removed.sum()
|
71
|
+
== nulls_removed.astype("int64").sum()
|
72
|
+
):
|
73
|
+
return "ordinal"
|
74
|
+
# by summing all the values and summing all the int values we can know all the values are integers
|
75
|
+
|
76
|
+
return False
|
77
|
+
|
78
|
+
def discover(self, name, series):
|
79
|
+
|
80
|
+
guessed_type = self._get_categorical_or_ordinal(
|
81
|
+
name, series.loc[~series.isnull()]
|
82
|
+
)
|
83
|
+
|
84
|
+
if guessed_type == "categorical":
|
85
|
+
logger.debug("column [{}]/[{}] categorical".format(name, series.dtype))
|
86
|
+
return CategoricalTypedFeature.build(name, series)
|
87
|
+
elif guessed_type == "ordinal":
|
88
|
+
logger.debug("column [{}]/[{}] ordinal".format(name, series.dtype))
|
89
|
+
return OrdinalTypedFeature.build(name, series)
|
90
|
+
else:
|
91
|
+
return False
|
92
|
+
|
93
|
+
|
94
|
+
if __name__ == "__main__":
|
95
|
+
dd = DiscreteDetector()
|
96
|
+
|
97
|
+
print(
|
98
|
+
dd.discover(
|
99
|
+
"str-categorical",
|
100
|
+
pd.Series(["a", "a", "a", "b", "c", "a"], dtype="category"),
|
101
|
+
)
|
102
|
+
)
|
103
|
+
print(
|
104
|
+
dd.discover(
|
105
|
+
"bool-categorical", pd.Series([True, False, True, True, True, None, True])
|
106
|
+
)
|
107
|
+
)
|
108
|
+
print(dd.discover("continuous", pd.Series([None, 3.14, 12.0, 1, 2, 3, None])))
|
109
|
+
print(
|
110
|
+
dd.discover("int-1-categorical", pd.Series([1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 9]))
|
111
|
+
)
|
112
|
+
print(dd.discover("int-2-categorical", pd.Series([1, 1, 1, 5, 9])))
|
113
|
+
print(dd.discover("real-3-categorical", pd.Series([1.0, 2.0, 3.0, 1.0, 4.0, 5.0])))
|
114
|
+
print(
|
115
|
+
dd.discover(
|
116
|
+
"bool-categorical", pd.Series([True, False, True, True, True, None, True])
|
117
|
+
)
|
118
|
+
)
|
@@ -0,0 +1,146 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8; -*-
|
3
|
+
|
4
|
+
# Copyright (c) 2020, 2022 Oracle and/or its affiliates.
|
5
|
+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
|
6
|
+
|
7
|
+
|
8
|
+
from __future__ import print_function, absolute_import, division
|
9
|
+
|
10
|
+
import re
|
11
|
+
|
12
|
+
import pandas as pd
|
13
|
+
|
14
|
+
from ads.type_discovery import logger
|
15
|
+
from ads.type_discovery.abstract_detector import AbstractTypeDiscoveryDetector
|
16
|
+
from ads.type_discovery.typed_feature import DocumentTypedFeature, AddressTypedFeature
|
17
|
+
|
18
|
+
|
19
|
+
class DocumentDetector(AbstractTypeDiscoveryDetector):
|
20
|
+
|
21
|
+
_min_cjk_chars_for_document = 100
|
22
|
+
_min_words = 10
|
23
|
+
_min_html_tags = 5
|
24
|
+
|
25
|
+
_html_pattern = re.compile("<.*?>")
|
26
|
+
|
27
|
+
_unicode_ranges = [
|
28
|
+
{"from": ord(u"\u3300"), "to": ord(u"\u33ff")}, # compatibility ideographs
|
29
|
+
{"from": ord(u"\ufe30"), "to": ord(u"\ufe4f")}, # compatibility ideographs
|
30
|
+
{"from": ord(u"\uf900"), "to": ord(u"\ufaff")}, # compatibility ideographs
|
31
|
+
{
|
32
|
+
"from": ord(u"\U0002F800"),
|
33
|
+
"to": ord(u"\U0002fa1f"),
|
34
|
+
}, # compatibility ideographs
|
35
|
+
{"from": ord(u"\u3040"), "to": ord(u"\u309f")}, # Japanese Hiragana
|
36
|
+
{"from": ord(u"\u30a0"), "to": ord(u"\u30ff")}, # Japanese Katakana
|
37
|
+
{"from": ord(u"\u2e80"), "to": ord(u"\u2eff")}, # cjk radicals supplement
|
38
|
+
{"from": ord(u"\u4e00"), "to": ord(u"\u9fff")},
|
39
|
+
{"from": ord(u"\u3400"), "to": ord(u"\u4dbf")},
|
40
|
+
{"from": ord(u"\U00020000"), "to": ord(u"\U0002a6df")},
|
41
|
+
{"from": ord(u"\U0002a700"), "to": ord(u"\U0002b73f")},
|
42
|
+
{"from": ord(u"\U0002b740"), "to": ord(u"\U0002b81f")},
|
43
|
+
{
|
44
|
+
"from": ord(u"\U0002b820"),
|
45
|
+
"to": ord(u"\U0002ceaf"),
|
46
|
+
}, # included as of Unicode 8.0
|
47
|
+
]
|
48
|
+
|
49
|
+
def _is_cjk_char(self, char):
|
50
|
+
return any(
|
51
|
+
[
|
52
|
+
range["from"] <= ord(char) <= range["to"]
|
53
|
+
for range in DocumentDetector._unicode_ranges
|
54
|
+
]
|
55
|
+
)
|
56
|
+
|
57
|
+
def cjk_string(self, document):
|
58
|
+
cjk_char_count = sum([1 if self._is_cjk_char(c) else 0 for c in document])
|
59
|
+
return cjk_char_count / len(document) >= 0.2
|
60
|
+
|
61
|
+
def html_document(self, document):
|
62
|
+
return (
|
63
|
+
len(list(re.finditer(DocumentDetector._html_pattern, document)))
|
64
|
+
> DocumentDetector._min_html_tags
|
65
|
+
)
|
66
|
+
|
67
|
+
def discover(self, name, series):
|
68
|
+
#
|
69
|
+
# very basic detection of a document. If the document is CJK then we use only the document length
|
70
|
+
# otherwise we split on whitespace and confirm that there are word-like strings
|
71
|
+
#
|
72
|
+
if series.dtype == "object":
|
73
|
+
null_series = series.loc[~series.isnull()]
|
74
|
+
first_non_null_document = null_series.iloc[0]
|
75
|
+
|
76
|
+
if isinstance(first_non_null_document, str):
|
77
|
+
|
78
|
+
is_html = self.html_document(first_non_null_document)
|
79
|
+
|
80
|
+
if self.cjk_string(first_non_null_document):
|
81
|
+
if (
|
82
|
+
len(first_non_null_document)
|
83
|
+
>= DocumentDetector._min_cjk_chars_for_document
|
84
|
+
):
|
85
|
+
tf = DocumentTypedFeature.build(
|
86
|
+
name, series, is_cjk=True, is_html=is_html
|
87
|
+
)
|
88
|
+
logger.debug(
|
89
|
+
"type discovery on CJK column [{}]/[{}] found to be a document".format(
|
90
|
+
name, series.dtype
|
91
|
+
)
|
92
|
+
)
|
93
|
+
return tf
|
94
|
+
else:
|
95
|
+
# find rows with above average length
|
96
|
+
above_avg_series = null_series.loc[
|
97
|
+
null_series.str.len() >= null_series.str.len().mean()
|
98
|
+
]
|
99
|
+
|
100
|
+
# take a sample. max 500 docs
|
101
|
+
above_avg_series_sample = above_avg_series.sample(
|
102
|
+
n=min(500, len(above_avg_series))
|
103
|
+
)
|
104
|
+
|
105
|
+
# if all of the samples have more than min_words tokens..
|
106
|
+
mean_number_of_words = (
|
107
|
+
above_avg_series_sample.str.split().str.len().mean()
|
108
|
+
)
|
109
|
+
if mean_number_of_words > DocumentDetector._min_words:
|
110
|
+
if (
|
111
|
+
mean_number_of_words < 15
|
112
|
+
and above_avg_series_sample.str.count(",").mean()
|
113
|
+
/ mean_number_of_words
|
114
|
+
> 0.1
|
115
|
+
):
|
116
|
+
# many commas probably means address type
|
117
|
+
logger.debug(
|
118
|
+
"type discovery on column [{}]/[{}] looks like an address type".format(
|
119
|
+
name, series.dtype
|
120
|
+
)
|
121
|
+
)
|
122
|
+
return AddressTypedFeature.build(name, series)
|
123
|
+
else:
|
124
|
+
logger.debug(
|
125
|
+
"type discovery on non-CJK column [{}]/[{}] found to be a document".format(
|
126
|
+
name, series.dtype
|
127
|
+
)
|
128
|
+
)
|
129
|
+
|
130
|
+
# previous check of first document for HTML is now refined using longer documents
|
131
|
+
is_html = all(
|
132
|
+
[
|
133
|
+
self.html_document(doc)
|
134
|
+
for doc in above_avg_series_sample
|
135
|
+
]
|
136
|
+
)
|
137
|
+
|
138
|
+
return DocumentTypedFeature.build(
|
139
|
+
name, series, is_cjk=False, is_html=is_html
|
140
|
+
)
|
141
|
+
|
142
|
+
return False
|
143
|
+
|
144
|
+
|
145
|
+
if __name__ == "__main__":
|
146
|
+
dd = DocumentDetector()
|