oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.9rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (857) hide show
  1. ads/aqua/__init__.py +40 -0
  2. ads/aqua/app.py +506 -0
  3. ads/aqua/cli.py +96 -0
  4. ads/aqua/client/__init__.py +3 -0
  5. ads/aqua/client/client.py +836 -0
  6. ads/aqua/client/openai_client.py +305 -0
  7. ads/aqua/common/__init__.py +5 -0
  8. ads/aqua/common/decorator.py +125 -0
  9. ads/aqua/common/entities.py +269 -0
  10. ads/aqua/common/enums.py +122 -0
  11. ads/aqua/common/errors.py +109 -0
  12. ads/aqua/common/utils.py +1285 -0
  13. ads/aqua/config/__init__.py +4 -0
  14. ads/aqua/config/container_config.py +248 -0
  15. ads/aqua/config/evaluation/__init__.py +4 -0
  16. ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
  17. ads/aqua/config/utils/__init__.py +4 -0
  18. ads/aqua/config/utils/serializer.py +339 -0
  19. ads/aqua/constants.py +116 -0
  20. ads/aqua/data.py +14 -0
  21. ads/aqua/dummy_data/icon.txt +1 -0
  22. ads/aqua/dummy_data/oci_model_deployments.json +56 -0
  23. ads/aqua/dummy_data/oci_models.json +1 -0
  24. ads/aqua/dummy_data/readme.md +26 -0
  25. ads/aqua/evaluation/__init__.py +8 -0
  26. ads/aqua/evaluation/constants.py +53 -0
  27. ads/aqua/evaluation/entities.py +186 -0
  28. ads/aqua/evaluation/errors.py +70 -0
  29. ads/aqua/evaluation/evaluation.py +1814 -0
  30. ads/aqua/extension/__init__.py +42 -0
  31. ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
  32. ads/aqua/extension/base_handler.py +90 -0
  33. ads/aqua/extension/common_handler.py +121 -0
  34. ads/aqua/extension/common_ws_msg_handler.py +36 -0
  35. ads/aqua/extension/deployment_handler.py +298 -0
  36. ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
  37. ads/aqua/extension/errors.py +30 -0
  38. ads/aqua/extension/evaluation_handler.py +129 -0
  39. ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
  40. ads/aqua/extension/finetune_handler.py +96 -0
  41. ads/aqua/extension/model_handler.py +390 -0
  42. ads/aqua/extension/models/__init__.py +0 -0
  43. ads/aqua/extension/models/ws_models.py +145 -0
  44. ads/aqua/extension/models_ws_msg_handler.py +50 -0
  45. ads/aqua/extension/ui_handler.py +282 -0
  46. ads/aqua/extension/ui_websocket_handler.py +130 -0
  47. ads/aqua/extension/utils.py +133 -0
  48. ads/aqua/finetuning/__init__.py +7 -0
  49. ads/aqua/finetuning/constants.py +23 -0
  50. ads/aqua/finetuning/entities.py +181 -0
  51. ads/aqua/finetuning/finetuning.py +749 -0
  52. ads/aqua/model/__init__.py +8 -0
  53. ads/aqua/model/constants.py +60 -0
  54. ads/aqua/model/entities.py +385 -0
  55. ads/aqua/model/enums.py +32 -0
  56. ads/aqua/model/model.py +2114 -0
  57. ads/aqua/modeldeployment/__init__.py +8 -0
  58. ads/aqua/modeldeployment/constants.py +10 -0
  59. ads/aqua/modeldeployment/deployment.py +1326 -0
  60. ads/aqua/modeldeployment/entities.py +653 -0
  61. ads/aqua/modeldeployment/inference.py +74 -0
  62. ads/aqua/modeldeployment/utils.py +543 -0
  63. ads/aqua/resources/gpu_shapes_index.json +94 -0
  64. ads/aqua/server/__init__.py +4 -0
  65. ads/aqua/server/__main__.py +24 -0
  66. ads/aqua/server/app.py +47 -0
  67. ads/aqua/server/aqua_spec.yml +1291 -0
  68. ads/aqua/training/__init__.py +4 -0
  69. ads/aqua/training/exceptions.py +476 -0
  70. ads/aqua/ui.py +499 -0
  71. ads/automl/__init__.py +9 -0
  72. ads/automl/driver.py +330 -0
  73. ads/automl/provider.py +975 -0
  74. ads/bds/__init__.py +5 -0
  75. ads/bds/auth.py +127 -0
  76. ads/bds/big_data_service.py +255 -0
  77. ads/catalog/__init__.py +19 -0
  78. ads/catalog/model.py +1576 -0
  79. ads/catalog/notebook.py +461 -0
  80. ads/catalog/project.py +468 -0
  81. ads/catalog/summary.py +178 -0
  82. ads/common/__init__.py +11 -0
  83. ads/common/analyzer.py +65 -0
  84. ads/common/artifact/.model-ignore +63 -0
  85. ads/common/artifact/__init__.py +10 -0
  86. ads/common/auth.py +1122 -0
  87. ads/common/card_identifier.py +83 -0
  88. ads/common/config.py +647 -0
  89. ads/common/data.py +165 -0
  90. ads/common/decorator/__init__.py +9 -0
  91. ads/common/decorator/argument_to_case.py +88 -0
  92. ads/common/decorator/deprecate.py +69 -0
  93. ads/common/decorator/require_nonempty_arg.py +65 -0
  94. ads/common/decorator/runtime_dependency.py +178 -0
  95. ads/common/decorator/threaded.py +97 -0
  96. ads/common/decorator/utils.py +35 -0
  97. ads/common/dsc_file_system.py +303 -0
  98. ads/common/error.py +14 -0
  99. ads/common/extended_enum.py +81 -0
  100. ads/common/function/__init__.py +5 -0
  101. ads/common/function/fn_util.py +142 -0
  102. ads/common/function/func_conf.yaml +25 -0
  103. ads/common/ipython.py +76 -0
  104. ads/common/model.py +679 -0
  105. ads/common/model_artifact.py +1759 -0
  106. ads/common/model_artifact_schema.json +107 -0
  107. ads/common/model_export_util.py +664 -0
  108. ads/common/model_metadata.py +24 -0
  109. ads/common/object_storage_details.py +296 -0
  110. ads/common/oci_client.py +175 -0
  111. ads/common/oci_datascience.py +46 -0
  112. ads/common/oci_logging.py +1144 -0
  113. ads/common/oci_mixin.py +957 -0
  114. ads/common/oci_resource.py +136 -0
  115. ads/common/serializer.py +559 -0
  116. ads/common/utils.py +1852 -0
  117. ads/common/word_lists.py +1491 -0
  118. ads/common/work_request.py +189 -0
  119. ads/data_labeling/__init__.py +13 -0
  120. ads/data_labeling/boundingbox.py +253 -0
  121. ads/data_labeling/constants.py +47 -0
  122. ads/data_labeling/data_labeling_service.py +244 -0
  123. ads/data_labeling/interface/__init__.py +5 -0
  124. ads/data_labeling/interface/loader.py +16 -0
  125. ads/data_labeling/interface/parser.py +16 -0
  126. ads/data_labeling/interface/reader.py +23 -0
  127. ads/data_labeling/loader/__init__.py +5 -0
  128. ads/data_labeling/loader/file_loader.py +241 -0
  129. ads/data_labeling/metadata.py +110 -0
  130. ads/data_labeling/mixin/__init__.py +5 -0
  131. ads/data_labeling/mixin/data_labeling.py +232 -0
  132. ads/data_labeling/ner.py +129 -0
  133. ads/data_labeling/parser/__init__.py +5 -0
  134. ads/data_labeling/parser/dls_record_parser.py +388 -0
  135. ads/data_labeling/parser/export_metadata_parser.py +94 -0
  136. ads/data_labeling/parser/export_record_parser.py +473 -0
  137. ads/data_labeling/reader/__init__.py +5 -0
  138. ads/data_labeling/reader/dataset_reader.py +574 -0
  139. ads/data_labeling/reader/dls_record_reader.py +121 -0
  140. ads/data_labeling/reader/export_record_reader.py +62 -0
  141. ads/data_labeling/reader/jsonl_reader.py +75 -0
  142. ads/data_labeling/reader/metadata_reader.py +203 -0
  143. ads/data_labeling/reader/record_reader.py +263 -0
  144. ads/data_labeling/record.py +52 -0
  145. ads/data_labeling/visualizer/__init__.py +5 -0
  146. ads/data_labeling/visualizer/image_visualizer.py +525 -0
  147. ads/data_labeling/visualizer/text_visualizer.py +357 -0
  148. ads/database/__init__.py +5 -0
  149. ads/database/connection.py +338 -0
  150. ads/dataset/__init__.py +10 -0
  151. ads/dataset/capabilities.md +51 -0
  152. ads/dataset/classification_dataset.py +339 -0
  153. ads/dataset/correlation.py +226 -0
  154. ads/dataset/correlation_plot.py +563 -0
  155. ads/dataset/dask_series.py +173 -0
  156. ads/dataset/dataframe_transformer.py +110 -0
  157. ads/dataset/dataset.py +1979 -0
  158. ads/dataset/dataset_browser.py +360 -0
  159. ads/dataset/dataset_with_target.py +995 -0
  160. ads/dataset/exception.py +25 -0
  161. ads/dataset/factory.py +987 -0
  162. ads/dataset/feature_engineering_transformer.py +35 -0
  163. ads/dataset/feature_selection.py +107 -0
  164. ads/dataset/forecasting_dataset.py +26 -0
  165. ads/dataset/helper.py +1450 -0
  166. ads/dataset/label_encoder.py +99 -0
  167. ads/dataset/mixin/__init__.py +5 -0
  168. ads/dataset/mixin/dataset_accessor.py +134 -0
  169. ads/dataset/pipeline.py +58 -0
  170. ads/dataset/plot.py +710 -0
  171. ads/dataset/progress.py +86 -0
  172. ads/dataset/recommendation.py +297 -0
  173. ads/dataset/recommendation_transformer.py +502 -0
  174. ads/dataset/regression_dataset.py +14 -0
  175. ads/dataset/sampled_dataset.py +1050 -0
  176. ads/dataset/target.py +98 -0
  177. ads/dataset/timeseries.py +18 -0
  178. ads/dbmixin/__init__.py +5 -0
  179. ads/dbmixin/db_pandas_accessor.py +153 -0
  180. ads/environment/__init__.py +9 -0
  181. ads/environment/ml_runtime.py +66 -0
  182. ads/evaluations/README.md +14 -0
  183. ads/evaluations/__init__.py +109 -0
  184. ads/evaluations/evaluation_plot.py +983 -0
  185. ads/evaluations/evaluator.py +1334 -0
  186. ads/evaluations/statistical_metrics.py +543 -0
  187. ads/experiments/__init__.py +9 -0
  188. ads/experiments/capabilities.md +0 -0
  189. ads/explanations/__init__.py +21 -0
  190. ads/explanations/base_explainer.py +142 -0
  191. ads/explanations/capabilities.md +83 -0
  192. ads/explanations/explainer.py +190 -0
  193. ads/explanations/mlx_global_explainer.py +1050 -0
  194. ads/explanations/mlx_interface.py +386 -0
  195. ads/explanations/mlx_local_explainer.py +287 -0
  196. ads/explanations/mlx_whatif_explainer.py +201 -0
  197. ads/feature_engineering/__init__.py +20 -0
  198. ads/feature_engineering/accessor/__init__.py +5 -0
  199. ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
  200. ads/feature_engineering/accessor/mixin/__init__.py +5 -0
  201. ads/feature_engineering/accessor/mixin/correlation.py +166 -0
  202. ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
  203. ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
  204. ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
  205. ads/feature_engineering/accessor/mixin/utils.py +65 -0
  206. ads/feature_engineering/accessor/series_accessor.py +431 -0
  207. ads/feature_engineering/adsimage/__init__.py +5 -0
  208. ads/feature_engineering/adsimage/image.py +192 -0
  209. ads/feature_engineering/adsimage/image_reader.py +170 -0
  210. ads/feature_engineering/adsimage/interface/__init__.py +5 -0
  211. ads/feature_engineering/adsimage/interface/reader.py +19 -0
  212. ads/feature_engineering/adsstring/__init__.py +7 -0
  213. ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
  214. ads/feature_engineering/adsstring/string/__init__.py +8 -0
  215. ads/feature_engineering/data_schema.json +57 -0
  216. ads/feature_engineering/dataset/__init__.py +5 -0
  217. ads/feature_engineering/dataset/zip_code_data.py +42062 -0
  218. ads/feature_engineering/exceptions.py +40 -0
  219. ads/feature_engineering/feature_type/__init__.py +133 -0
  220. ads/feature_engineering/feature_type/address.py +184 -0
  221. ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
  222. ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
  223. ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
  224. ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
  225. ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
  226. ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
  227. ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
  228. ads/feature_engineering/feature_type/adsstring/string.py +258 -0
  229. ads/feature_engineering/feature_type/base.py +58 -0
  230. ads/feature_engineering/feature_type/boolean.py +183 -0
  231. ads/feature_engineering/feature_type/category.py +146 -0
  232. ads/feature_engineering/feature_type/constant.py +137 -0
  233. ads/feature_engineering/feature_type/continuous.py +151 -0
  234. ads/feature_engineering/feature_type/creditcard.py +314 -0
  235. ads/feature_engineering/feature_type/datetime.py +190 -0
  236. ads/feature_engineering/feature_type/discrete.py +134 -0
  237. ads/feature_engineering/feature_type/document.py +43 -0
  238. ads/feature_engineering/feature_type/gis.py +251 -0
  239. ads/feature_engineering/feature_type/handler/__init__.py +5 -0
  240. ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
  241. ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
  242. ads/feature_engineering/feature_type/handler/warnings.py +128 -0
  243. ads/feature_engineering/feature_type/integer.py +142 -0
  244. ads/feature_engineering/feature_type/ip_address.py +144 -0
  245. ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
  246. ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
  247. ads/feature_engineering/feature_type/lat_long.py +256 -0
  248. ads/feature_engineering/feature_type/object.py +43 -0
  249. ads/feature_engineering/feature_type/ordinal.py +132 -0
  250. ads/feature_engineering/feature_type/phone_number.py +135 -0
  251. ads/feature_engineering/feature_type/string.py +171 -0
  252. ads/feature_engineering/feature_type/text.py +93 -0
  253. ads/feature_engineering/feature_type/unknown.py +43 -0
  254. ads/feature_engineering/feature_type/zip_code.py +164 -0
  255. ads/feature_engineering/feature_type_manager.py +406 -0
  256. ads/feature_engineering/schema.py +795 -0
  257. ads/feature_engineering/utils.py +245 -0
  258. ads/feature_store/.readthedocs.yaml +19 -0
  259. ads/feature_store/README.md +65 -0
  260. ads/feature_store/__init__.py +9 -0
  261. ads/feature_store/common/__init__.py +0 -0
  262. ads/feature_store/common/enums.py +339 -0
  263. ads/feature_store/common/exceptions.py +18 -0
  264. ads/feature_store/common/spark_session_singleton.py +125 -0
  265. ads/feature_store/common/utils/__init__.py +0 -0
  266. ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
  267. ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
  268. ads/feature_store/common/utils/transformation_utils.py +82 -0
  269. ads/feature_store/common/utils/utility.py +403 -0
  270. ads/feature_store/data_validation/__init__.py +0 -0
  271. ads/feature_store/data_validation/great_expectation.py +129 -0
  272. ads/feature_store/dataset.py +1230 -0
  273. ads/feature_store/dataset_job.py +530 -0
  274. ads/feature_store/docs/Dockerfile +7 -0
  275. ads/feature_store/docs/Makefile +44 -0
  276. ads/feature_store/docs/conf.py +28 -0
  277. ads/feature_store/docs/requirements.txt +14 -0
  278. ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
  279. ads/feature_store/docs/source/cicd.rst +137 -0
  280. ads/feature_store/docs/source/conf.py +86 -0
  281. ads/feature_store/docs/source/data_versioning.rst +33 -0
  282. ads/feature_store/docs/source/dataset.rst +388 -0
  283. ads/feature_store/docs/source/dataset_job.rst +27 -0
  284. ads/feature_store/docs/source/demo.rst +70 -0
  285. ads/feature_store/docs/source/entity.rst +78 -0
  286. ads/feature_store/docs/source/feature_group.rst +624 -0
  287. ads/feature_store/docs/source/feature_group_job.rst +29 -0
  288. ads/feature_store/docs/source/feature_store.rst +122 -0
  289. ads/feature_store/docs/source/feature_store_class.rst +123 -0
  290. ads/feature_store/docs/source/feature_validation.rst +66 -0
  291. ads/feature_store/docs/source/figures/cicd.png +0 -0
  292. ads/feature_store/docs/source/figures/data_validation.png +0 -0
  293. ads/feature_store/docs/source/figures/data_versioning.png +0 -0
  294. ads/feature_store/docs/source/figures/dataset.gif +0 -0
  295. ads/feature_store/docs/source/figures/dataset.png +0 -0
  296. ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
  297. ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
  298. ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
  299. ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
  300. ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
  301. ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
  302. ads/feature_store/docs/source/figures/entity.png +0 -0
  303. ads/feature_store/docs/source/figures/feature_group.png +0 -0
  304. ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
  305. ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
  306. ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
  307. ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
  308. ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
  309. ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
  310. ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
  311. ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
  312. ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
  313. ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
  314. ads/feature_store/docs/source/figures/overview.png +0 -0
  315. ads/feature_store/docs/source/figures/resource_manager.png +0 -0
  316. ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
  317. ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
  318. ads/feature_store/docs/source/figures/stats_1.png +0 -0
  319. ads/feature_store/docs/source/figures/stats_2.png +0 -0
  320. ads/feature_store/docs/source/figures/stats_d.png +0 -0
  321. ads/feature_store/docs/source/figures/stats_fg.png +0 -0
  322. ads/feature_store/docs/source/figures/transformation.png +0 -0
  323. ads/feature_store/docs/source/figures/transformations.gif +0 -0
  324. ads/feature_store/docs/source/figures/validation.png +0 -0
  325. ads/feature_store/docs/source/figures/validation_fg.png +0 -0
  326. ads/feature_store/docs/source/figures/validation_results.png +0 -0
  327. ads/feature_store/docs/source/figures/validation_summary.png +0 -0
  328. ads/feature_store/docs/source/index.rst +81 -0
  329. ads/feature_store/docs/source/module.rst +8 -0
  330. ads/feature_store/docs/source/notebook.rst +94 -0
  331. ads/feature_store/docs/source/overview.rst +47 -0
  332. ads/feature_store/docs/source/quickstart.rst +176 -0
  333. ads/feature_store/docs/source/release_notes.rst +194 -0
  334. ads/feature_store/docs/source/setup_feature_store.rst +81 -0
  335. ads/feature_store/docs/source/statistics.rst +58 -0
  336. ads/feature_store/docs/source/transformation.rst +199 -0
  337. ads/feature_store/docs/source/ui.rst +65 -0
  338. ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
  339. ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
  340. ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
  341. ads/feature_store/entity.py +718 -0
  342. ads/feature_store/execution_strategy/__init__.py +0 -0
  343. ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
  344. ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
  345. ads/feature_store/execution_strategy/engine/__init__.py +0 -0
  346. ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
  347. ads/feature_store/execution_strategy/execution_strategy.py +113 -0
  348. ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
  349. ads/feature_store/execution_strategy/spark/__init__.py +0 -0
  350. ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
  351. ads/feature_store/feature.py +192 -0
  352. ads/feature_store/feature_group.py +1494 -0
  353. ads/feature_store/feature_group_expectation.py +346 -0
  354. ads/feature_store/feature_group_job.py +602 -0
  355. ads/feature_store/feature_lineage/__init__.py +0 -0
  356. ads/feature_store/feature_lineage/graphviz_service.py +180 -0
  357. ads/feature_store/feature_option_details.py +50 -0
  358. ads/feature_store/feature_statistics/__init__.py +0 -0
  359. ads/feature_store/feature_statistics/statistics_service.py +99 -0
  360. ads/feature_store/feature_store.py +699 -0
  361. ads/feature_store/feature_store_registrar.py +518 -0
  362. ads/feature_store/input_feature_detail.py +149 -0
  363. ads/feature_store/mixin/__init__.py +4 -0
  364. ads/feature_store/mixin/oci_feature_store.py +145 -0
  365. ads/feature_store/model_details.py +73 -0
  366. ads/feature_store/query/__init__.py +0 -0
  367. ads/feature_store/query/filter.py +266 -0
  368. ads/feature_store/query/generator/__init__.py +0 -0
  369. ads/feature_store/query/generator/query_generator.py +298 -0
  370. ads/feature_store/query/join.py +161 -0
  371. ads/feature_store/query/query.py +403 -0
  372. ads/feature_store/query/validator/__init__.py +0 -0
  373. ads/feature_store/query/validator/query_validator.py +57 -0
  374. ads/feature_store/response/__init__.py +0 -0
  375. ads/feature_store/response/response_builder.py +68 -0
  376. ads/feature_store/service/__init__.py +0 -0
  377. ads/feature_store/service/oci_dataset.py +139 -0
  378. ads/feature_store/service/oci_dataset_job.py +199 -0
  379. ads/feature_store/service/oci_entity.py +125 -0
  380. ads/feature_store/service/oci_feature_group.py +164 -0
  381. ads/feature_store/service/oci_feature_group_job.py +214 -0
  382. ads/feature_store/service/oci_feature_store.py +182 -0
  383. ads/feature_store/service/oci_lineage.py +87 -0
  384. ads/feature_store/service/oci_transformation.py +104 -0
  385. ads/feature_store/statistics/__init__.py +0 -0
  386. ads/feature_store/statistics/abs_feature_value.py +49 -0
  387. ads/feature_store/statistics/charts/__init__.py +0 -0
  388. ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
  389. ads/feature_store/statistics/charts/box_plot.py +148 -0
  390. ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
  391. ads/feature_store/statistics/charts/probability_distribution.py +68 -0
  392. ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
  393. ads/feature_store/statistics/feature_stat.py +126 -0
  394. ads/feature_store/statistics/generic_feature_value.py +33 -0
  395. ads/feature_store/statistics/statistics.py +41 -0
  396. ads/feature_store/statistics_config.py +101 -0
  397. ads/feature_store/templates/feature_store_template.yaml +45 -0
  398. ads/feature_store/transformation.py +499 -0
  399. ads/feature_store/validation_output.py +57 -0
  400. ads/hpo/__init__.py +9 -0
  401. ads/hpo/_imports.py +91 -0
  402. ads/hpo/ads_search_space.py +439 -0
  403. ads/hpo/distributions.py +325 -0
  404. ads/hpo/objective.py +280 -0
  405. ads/hpo/search_cv.py +1657 -0
  406. ads/hpo/stopping_criterion.py +75 -0
  407. ads/hpo/tuner_artifact.py +413 -0
  408. ads/hpo/utils.py +91 -0
  409. ads/hpo/validation.py +140 -0
  410. ads/hpo/visualization/__init__.py +5 -0
  411. ads/hpo/visualization/_contour.py +23 -0
  412. ads/hpo/visualization/_edf.py +20 -0
  413. ads/hpo/visualization/_intermediate_values.py +21 -0
  414. ads/hpo/visualization/_optimization_history.py +25 -0
  415. ads/hpo/visualization/_parallel_coordinate.py +169 -0
  416. ads/hpo/visualization/_param_importances.py +26 -0
  417. ads/jobs/__init__.py +53 -0
  418. ads/jobs/ads_job.py +663 -0
  419. ads/jobs/builders/__init__.py +5 -0
  420. ads/jobs/builders/base.py +156 -0
  421. ads/jobs/builders/infrastructure/__init__.py +6 -0
  422. ads/jobs/builders/infrastructure/base.py +165 -0
  423. ads/jobs/builders/infrastructure/dataflow.py +1252 -0
  424. ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
  425. ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
  426. ads/jobs/builders/infrastructure/utils.py +65 -0
  427. ads/jobs/builders/runtimes/__init__.py +5 -0
  428. ads/jobs/builders/runtimes/artifact.py +338 -0
  429. ads/jobs/builders/runtimes/base.py +325 -0
  430. ads/jobs/builders/runtimes/container_runtime.py +242 -0
  431. ads/jobs/builders/runtimes/python_runtime.py +1016 -0
  432. ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
  433. ads/jobs/cli.py +104 -0
  434. ads/jobs/env_var_parser.py +131 -0
  435. ads/jobs/extension.py +160 -0
  436. ads/jobs/schema/__init__.py +5 -0
  437. ads/jobs/schema/infrastructure_schema.json +116 -0
  438. ads/jobs/schema/job_schema.json +42 -0
  439. ads/jobs/schema/runtime_schema.json +183 -0
  440. ads/jobs/schema/validator.py +141 -0
  441. ads/jobs/serializer.py +296 -0
  442. ads/jobs/templates/__init__.py +5 -0
  443. ads/jobs/templates/container.py +6 -0
  444. ads/jobs/templates/driver_notebook.py +177 -0
  445. ads/jobs/templates/driver_oci.py +500 -0
  446. ads/jobs/templates/driver_python.py +48 -0
  447. ads/jobs/templates/driver_pytorch.py +852 -0
  448. ads/jobs/templates/driver_utils.py +615 -0
  449. ads/jobs/templates/hostname_from_env.c +55 -0
  450. ads/jobs/templates/oci_metrics.py +181 -0
  451. ads/jobs/utils.py +104 -0
  452. ads/llm/__init__.py +28 -0
  453. ads/llm/autogen/__init__.py +2 -0
  454. ads/llm/autogen/constants.py +15 -0
  455. ads/llm/autogen/reports/__init__.py +2 -0
  456. ads/llm/autogen/reports/base.py +67 -0
  457. ads/llm/autogen/reports/data.py +103 -0
  458. ads/llm/autogen/reports/session.py +526 -0
  459. ads/llm/autogen/reports/templates/chat_box.html +13 -0
  460. ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
  461. ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
  462. ads/llm/autogen/reports/utils.py +56 -0
  463. ads/llm/autogen/v02/__init__.py +4 -0
  464. ads/llm/autogen/v02/client.py +295 -0
  465. ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
  466. ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
  467. ads/llm/autogen/v02/loggers/__init__.py +6 -0
  468. ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
  469. ads/llm/autogen/v02/loggers/session_logger.py +580 -0
  470. ads/llm/autogen/v02/loggers/utils.py +86 -0
  471. ads/llm/autogen/v02/runtime_logging.py +163 -0
  472. ads/llm/chain.py +268 -0
  473. ads/llm/chat_template.py +31 -0
  474. ads/llm/deploy.py +63 -0
  475. ads/llm/guardrails/__init__.py +5 -0
  476. ads/llm/guardrails/base.py +442 -0
  477. ads/llm/guardrails/huggingface.py +44 -0
  478. ads/llm/langchain/__init__.py +5 -0
  479. ads/llm/langchain/plugins/__init__.py +5 -0
  480. ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
  481. ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
  482. ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
  483. ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
  484. ads/llm/langchain/plugins/llms/__init__.py +5 -0
  485. ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
  486. ads/llm/requirements.txt +3 -0
  487. ads/llm/serialize.py +219 -0
  488. ads/llm/serializers/__init__.py +0 -0
  489. ads/llm/serializers/retrieval_qa.py +153 -0
  490. ads/llm/serializers/runnable_parallel.py +27 -0
  491. ads/llm/templates/score_chain.jinja2 +155 -0
  492. ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
  493. ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
  494. ads/model/__init__.py +52 -0
  495. ads/model/artifact.py +573 -0
  496. ads/model/artifact_downloader.py +254 -0
  497. ads/model/artifact_uploader.py +267 -0
  498. ads/model/base_properties.py +238 -0
  499. ads/model/common/.model-ignore +66 -0
  500. ads/model/common/__init__.py +5 -0
  501. ads/model/common/utils.py +142 -0
  502. ads/model/datascience_model.py +2635 -0
  503. ads/model/deployment/__init__.py +20 -0
  504. ads/model/deployment/common/__init__.py +5 -0
  505. ads/model/deployment/common/utils.py +308 -0
  506. ads/model/deployment/model_deployer.py +466 -0
  507. ads/model/deployment/model_deployment.py +1846 -0
  508. ads/model/deployment/model_deployment_infrastructure.py +671 -0
  509. ads/model/deployment/model_deployment_properties.py +493 -0
  510. ads/model/deployment/model_deployment_runtime.py +838 -0
  511. ads/model/extractor/__init__.py +5 -0
  512. ads/model/extractor/automl_extractor.py +74 -0
  513. ads/model/extractor/embedding_onnx_extractor.py +80 -0
  514. ads/model/extractor/huggingface_extractor.py +88 -0
  515. ads/model/extractor/keras_extractor.py +84 -0
  516. ads/model/extractor/lightgbm_extractor.py +93 -0
  517. ads/model/extractor/model_info_extractor.py +114 -0
  518. ads/model/extractor/model_info_extractor_factory.py +105 -0
  519. ads/model/extractor/pytorch_extractor.py +87 -0
  520. ads/model/extractor/sklearn_extractor.py +112 -0
  521. ads/model/extractor/spark_extractor.py +89 -0
  522. ads/model/extractor/tensorflow_extractor.py +85 -0
  523. ads/model/extractor/xgboost_extractor.py +94 -0
  524. ads/model/framework/__init__.py +5 -0
  525. ads/model/framework/automl_model.py +178 -0
  526. ads/model/framework/embedding_onnx_model.py +438 -0
  527. ads/model/framework/huggingface_model.py +399 -0
  528. ads/model/framework/lightgbm_model.py +266 -0
  529. ads/model/framework/pytorch_model.py +266 -0
  530. ads/model/framework/sklearn_model.py +250 -0
  531. ads/model/framework/spark_model.py +326 -0
  532. ads/model/framework/tensorflow_model.py +254 -0
  533. ads/model/framework/xgboost_model.py +258 -0
  534. ads/model/generic_model.py +3518 -0
  535. ads/model/model_artifact_boilerplate/README.md +381 -0
  536. ads/model/model_artifact_boilerplate/__init__.py +5 -0
  537. ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
  538. ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
  539. ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
  540. ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
  541. ads/model/model_artifact_boilerplate/score.py +61 -0
  542. ads/model/model_file_description_schema.json +68 -0
  543. ads/model/model_introspect.py +331 -0
  544. ads/model/model_metadata.py +1810 -0
  545. ads/model/model_metadata_mixin.py +460 -0
  546. ads/model/model_properties.py +63 -0
  547. ads/model/model_version_set.py +739 -0
  548. ads/model/runtime/__init__.py +5 -0
  549. ads/model/runtime/env_info.py +306 -0
  550. ads/model/runtime/model_deployment_details.py +37 -0
  551. ads/model/runtime/model_provenance_details.py +58 -0
  552. ads/model/runtime/runtime_info.py +81 -0
  553. ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
  554. ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
  555. ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
  556. ads/model/runtime/utils.py +201 -0
  557. ads/model/serde/__init__.py +5 -0
  558. ads/model/serde/common.py +40 -0
  559. ads/model/serde/model_input.py +547 -0
  560. ads/model/serde/model_serializer.py +1184 -0
  561. ads/model/service/__init__.py +5 -0
  562. ads/model/service/oci_datascience_model.py +1076 -0
  563. ads/model/service/oci_datascience_model_deployment.py +500 -0
  564. ads/model/service/oci_datascience_model_version_set.py +176 -0
  565. ads/model/transformer/__init__.py +5 -0
  566. ads/model/transformer/onnx_transformer.py +324 -0
  567. ads/mysqldb/__init__.py +5 -0
  568. ads/mysqldb/mysql_db.py +227 -0
  569. ads/opctl/__init__.py +18 -0
  570. ads/opctl/anomaly_detection.py +11 -0
  571. ads/opctl/backend/__init__.py +5 -0
  572. ads/opctl/backend/ads_dataflow.py +353 -0
  573. ads/opctl/backend/ads_ml_job.py +710 -0
  574. ads/opctl/backend/ads_ml_pipeline.py +164 -0
  575. ads/opctl/backend/ads_model_deployment.py +209 -0
  576. ads/opctl/backend/base.py +146 -0
  577. ads/opctl/backend/local.py +1053 -0
  578. ads/opctl/backend/marketplace/__init__.py +9 -0
  579. ads/opctl/backend/marketplace/helm_helper.py +173 -0
  580. ads/opctl/backend/marketplace/local_marketplace.py +271 -0
  581. ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
  582. ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
  583. ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
  584. ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
  585. ads/opctl/backend/marketplace/models/__init__.py +5 -0
  586. ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
  587. ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
  588. ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
  589. ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
  590. ads/opctl/cli.py +707 -0
  591. ads/opctl/cmds.py +869 -0
  592. ads/opctl/conda/__init__.py +5 -0
  593. ads/opctl/conda/cli.py +193 -0
  594. ads/opctl/conda/cmds.py +749 -0
  595. ads/opctl/conda/config.yaml +34 -0
  596. ads/opctl/conda/manifest_template.yaml +13 -0
  597. ads/opctl/conda/multipart_uploader.py +188 -0
  598. ads/opctl/conda/pack.py +89 -0
  599. ads/opctl/config/__init__.py +5 -0
  600. ads/opctl/config/base.py +57 -0
  601. ads/opctl/config/diagnostics/__init__.py +5 -0
  602. ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
  603. ads/opctl/config/merger.py +255 -0
  604. ads/opctl/config/resolver.py +297 -0
  605. ads/opctl/config/utils.py +79 -0
  606. ads/opctl/config/validator.py +17 -0
  607. ads/opctl/config/versioner.py +68 -0
  608. ads/opctl/config/yaml_parsers/__init__.py +7 -0
  609. ads/opctl/config/yaml_parsers/base.py +58 -0
  610. ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
  611. ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
  612. ads/opctl/constants.py +66 -0
  613. ads/opctl/decorator/__init__.py +5 -0
  614. ads/opctl/decorator/common.py +129 -0
  615. ads/opctl/diagnostics/__init__.py +5 -0
  616. ads/opctl/diagnostics/__main__.py +25 -0
  617. ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
  618. ads/opctl/diagnostics/check_requirements.py +144 -0
  619. ads/opctl/diagnostics/requirement_exception.py +9 -0
  620. ads/opctl/distributed/README.md +109 -0
  621. ads/opctl/distributed/__init__.py +5 -0
  622. ads/opctl/distributed/certificates.py +32 -0
  623. ads/opctl/distributed/cli.py +207 -0
  624. ads/opctl/distributed/cmds.py +731 -0
  625. ads/opctl/distributed/common/__init__.py +5 -0
  626. ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
  627. ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
  628. ads/opctl/distributed/common/cluster_config_helper.py +103 -0
  629. ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
  630. ads/opctl/distributed/common/cluster_runner.py +54 -0
  631. ads/opctl/distributed/common/framework_factory.py +29 -0
  632. ads/opctl/docker/Dockerfile.job +103 -0
  633. ads/opctl/docker/Dockerfile.job.arm +107 -0
  634. ads/opctl/docker/Dockerfile.job.gpu +175 -0
  635. ads/opctl/docker/base-env.yaml +13 -0
  636. ads/opctl/docker/cuda.repo +6 -0
  637. ads/opctl/docker/operator/.dockerignore +0 -0
  638. ads/opctl/docker/operator/Dockerfile +41 -0
  639. ads/opctl/docker/operator/Dockerfile.gpu +85 -0
  640. ads/opctl/docker/operator/cuda.repo +6 -0
  641. ads/opctl/docker/operator/environment.yaml +8 -0
  642. ads/opctl/forecast.py +11 -0
  643. ads/opctl/index.yaml +3 -0
  644. ads/opctl/model/__init__.py +5 -0
  645. ads/opctl/model/cli.py +65 -0
  646. ads/opctl/model/cmds.py +73 -0
  647. ads/opctl/operator/README.md +4 -0
  648. ads/opctl/operator/__init__.py +31 -0
  649. ads/opctl/operator/cli.py +344 -0
  650. ads/opctl/operator/cmd.py +596 -0
  651. ads/opctl/operator/common/__init__.py +5 -0
  652. ads/opctl/operator/common/backend_factory.py +460 -0
  653. ads/opctl/operator/common/const.py +27 -0
  654. ads/opctl/operator/common/data/synthetic.csv +16001 -0
  655. ads/opctl/operator/common/dictionary_merger.py +148 -0
  656. ads/opctl/operator/common/errors.py +42 -0
  657. ads/opctl/operator/common/operator_config.py +99 -0
  658. ads/opctl/operator/common/operator_loader.py +811 -0
  659. ads/opctl/operator/common/operator_schema.yaml +130 -0
  660. ads/opctl/operator/common/operator_yaml_generator.py +152 -0
  661. ads/opctl/operator/common/utils.py +208 -0
  662. ads/opctl/operator/lowcode/__init__.py +5 -0
  663. ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
  664. ads/opctl/operator/lowcode/anomaly/README.md +207 -0
  665. ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
  666. ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
  667. ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
  668. ads/opctl/operator/lowcode/anomaly/const.py +167 -0
  669. ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
  670. ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
  671. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
  672. ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
  673. ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
  674. ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
  675. ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
  676. ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
  677. ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
  678. ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
  679. ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
  680. ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
  681. ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
  682. ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
  683. ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
  684. ads/opctl/operator/lowcode/common/__init__.py +5 -0
  685. ads/opctl/operator/lowcode/common/const.py +10 -0
  686. ads/opctl/operator/lowcode/common/data.py +116 -0
  687. ads/opctl/operator/lowcode/common/errors.py +47 -0
  688. ads/opctl/operator/lowcode/common/transformations.py +296 -0
  689. ads/opctl/operator/lowcode/common/utils.py +384 -0
  690. ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
  691. ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
  692. ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
  693. ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
  694. ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
  695. ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
  696. ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
  697. ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
  698. ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
  699. ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
  700. ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
  701. ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
  702. ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
  703. ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
  704. ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
  705. ads/opctl/operator/lowcode/forecast/README.md +209 -0
  706. ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
  707. ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
  708. ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
  709. ads/opctl/operator/lowcode/forecast/const.py +92 -0
  710. ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
  711. ads/opctl/operator/lowcode/forecast/errors.py +26 -0
  712. ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
  713. ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
  714. ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
  715. ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
  716. ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
  717. ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
  718. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
  719. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
  720. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
  721. ads/opctl/operator/lowcode/forecast/model/prophet.py +445 -0
  722. ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
  723. ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
  724. ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
  725. ads/opctl/operator/lowcode/forecast/utils.py +397 -0
  726. ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
  727. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
  728. ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
  729. ads/opctl/operator/lowcode/pii/MLoperator +17 -0
  730. ads/opctl/operator/lowcode/pii/README.md +208 -0
  731. ads/opctl/operator/lowcode/pii/__init__.py +5 -0
  732. ads/opctl/operator/lowcode/pii/__main__.py +78 -0
  733. ads/opctl/operator/lowcode/pii/cmd.py +39 -0
  734. ads/opctl/operator/lowcode/pii/constant.py +84 -0
  735. ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
  736. ads/opctl/operator/lowcode/pii/errors.py +27 -0
  737. ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
  738. ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
  739. ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
  740. ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
  741. ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
  742. ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
  743. ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
  744. ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
  745. ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
  746. ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
  747. ads/opctl/operator/lowcode/pii/model/report.py +487 -0
  748. ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
  749. ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
  750. ads/opctl/operator/lowcode/pii/utils.py +43 -0
  751. ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
  752. ads/opctl/operator/lowcode/recommender/README.md +206 -0
  753. ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
  754. ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
  755. ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
  756. ads/opctl/operator/lowcode/recommender/constant.py +30 -0
  757. ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
  758. ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
  759. ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
  760. ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
  761. ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
  762. ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
  763. ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
  764. ads/opctl/operator/lowcode/recommender/utils.py +13 -0
  765. ads/opctl/operator/runtime/__init__.py +5 -0
  766. ads/opctl/operator/runtime/const.py +17 -0
  767. ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
  768. ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
  769. ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
  770. ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
  771. ads/opctl/operator/runtime/runtime.py +115 -0
  772. ads/opctl/schema.yaml.yml +36 -0
  773. ads/opctl/script.py +40 -0
  774. ads/opctl/spark/__init__.py +5 -0
  775. ads/opctl/spark/cli.py +43 -0
  776. ads/opctl/spark/cmds.py +147 -0
  777. ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
  778. ads/opctl/utils.py +344 -0
  779. ads/oracledb/__init__.py +5 -0
  780. ads/oracledb/oracle_db.py +346 -0
  781. ads/pipeline/__init__.py +39 -0
  782. ads/pipeline/ads_pipeline.py +2279 -0
  783. ads/pipeline/ads_pipeline_run.py +772 -0
  784. ads/pipeline/ads_pipeline_step.py +605 -0
  785. ads/pipeline/builders/__init__.py +5 -0
  786. ads/pipeline/builders/infrastructure/__init__.py +5 -0
  787. ads/pipeline/builders/infrastructure/custom_script.py +32 -0
  788. ads/pipeline/cli.py +119 -0
  789. ads/pipeline/extension.py +291 -0
  790. ads/pipeline/schema/__init__.py +5 -0
  791. ads/pipeline/schema/cs_step_schema.json +35 -0
  792. ads/pipeline/schema/ml_step_schema.json +31 -0
  793. ads/pipeline/schema/pipeline_schema.json +71 -0
  794. ads/pipeline/visualizer/__init__.py +5 -0
  795. ads/pipeline/visualizer/base.py +570 -0
  796. ads/pipeline/visualizer/graph_renderer.py +272 -0
  797. ads/pipeline/visualizer/text_renderer.py +84 -0
  798. ads/secrets/__init__.py +11 -0
  799. ads/secrets/adb.py +386 -0
  800. ads/secrets/auth_token.py +86 -0
  801. ads/secrets/big_data_service.py +365 -0
  802. ads/secrets/mysqldb.py +149 -0
  803. ads/secrets/oracledb.py +160 -0
  804. ads/secrets/secrets.py +407 -0
  805. ads/telemetry/__init__.py +7 -0
  806. ads/telemetry/base.py +69 -0
  807. ads/telemetry/client.py +125 -0
  808. ads/telemetry/telemetry.py +257 -0
  809. ads/templates/dataflow_pyspark.jinja2 +13 -0
  810. ads/templates/dataflow_sparksql.jinja2 +22 -0
  811. ads/templates/func.jinja2 +20 -0
  812. ads/templates/schemas/openapi.json +1740 -0
  813. ads/templates/score-pkl.jinja2 +173 -0
  814. ads/templates/score.jinja2 +322 -0
  815. ads/templates/score_embedding_onnx.jinja2 +202 -0
  816. ads/templates/score_generic.jinja2 +165 -0
  817. ads/templates/score_huggingface_pipeline.jinja2 +217 -0
  818. ads/templates/score_lightgbm.jinja2 +185 -0
  819. ads/templates/score_onnx.jinja2 +407 -0
  820. ads/templates/score_onnx_new.jinja2 +473 -0
  821. ads/templates/score_oracle_automl.jinja2 +185 -0
  822. ads/templates/score_pyspark.jinja2 +154 -0
  823. ads/templates/score_pytorch.jinja2 +219 -0
  824. ads/templates/score_scikit-learn.jinja2 +184 -0
  825. ads/templates/score_tensorflow.jinja2 +184 -0
  826. ads/templates/score_xgboost.jinja2 +178 -0
  827. ads/text_dataset/__init__.py +5 -0
  828. ads/text_dataset/backends.py +211 -0
  829. ads/text_dataset/dataset.py +445 -0
  830. ads/text_dataset/extractor.py +207 -0
  831. ads/text_dataset/options.py +53 -0
  832. ads/text_dataset/udfs.py +22 -0
  833. ads/text_dataset/utils.py +49 -0
  834. ads/type_discovery/__init__.py +9 -0
  835. ads/type_discovery/abstract_detector.py +21 -0
  836. ads/type_discovery/constant_detector.py +41 -0
  837. ads/type_discovery/continuous_detector.py +54 -0
  838. ads/type_discovery/credit_card_detector.py +99 -0
  839. ads/type_discovery/datetime_detector.py +92 -0
  840. ads/type_discovery/discrete_detector.py +118 -0
  841. ads/type_discovery/document_detector.py +146 -0
  842. ads/type_discovery/ip_detector.py +68 -0
  843. ads/type_discovery/latlon_detector.py +90 -0
  844. ads/type_discovery/phone_number_detector.py +63 -0
  845. ads/type_discovery/type_discovery_driver.py +87 -0
  846. ads/type_discovery/typed_feature.py +594 -0
  847. ads/type_discovery/unknown_detector.py +41 -0
  848. ads/type_discovery/zipcode_detector.py +48 -0
  849. ads/vault/__init__.py +7 -0
  850. ads/vault/vault.py +237 -0
  851. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/METADATA +150 -150
  852. oracle_ads-2.13.9rc1.dist-info/RECORD +858 -0
  853. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/WHEEL +1 -2
  854. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/entry_points.txt +2 -1
  855. oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
  856. oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
  857. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/licenses/LICENSE.txt +0 -0
ads/automl/provider.py ADDED
@@ -0,0 +1,975 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2020, 2023 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ import logging
8
+ import time
9
+ import sys
10
+ import warnings
11
+ from abc import ABC, abstractmethod, abstractproperty
12
+ import math
13
+ import pandas as pd
14
+ import numpy as np
15
+ from sklearn import set_config
16
+ from sklearn.dummy import DummyClassifier, DummyRegressor
17
+
18
+ import matplotlib.pyplot as plt
19
+
20
+ import ads
21
+ from ads.common.utils import (
22
+ ml_task_types,
23
+ wrap_lines,
24
+ is_documentation_mode,
25
+ is_notebook,
26
+ )
27
+ from ads.common.decorator.runtime_dependency import (
28
+ runtime_dependency,
29
+ OptionalDependency,
30
+ )
31
+ from ads.common.decorator.deprecate import deprecated
32
+ from ads.dataset.label_encoder import DataFrameLabelEncoder
33
+ from ads.dataset.helper import is_text_data
34
+
35
+ from ads.common import logger, utils
36
+
37
+
38
+ class AutoMLProvider(ABC):
39
+ """
40
+ Abstract Base Class defining the structure of an AutoML solution. The solution needs to
41
+ implement train() and get_transformer_pipeline().
42
+ """
43
+
44
+ @deprecated(
45
+ details="Working with AutoML has moved from within ADS to working directly with the AutoMLx library. AutoMLx are preinstalled in conda pack automlx_p38_cpu_v2 and later, and can now be updated independently of ADS. AutoMLx documentation may be found at https://docs.oracle.com/en-us/iaas/tools/automlx/latest/html/multiversion/v23.1.1/index.html. Notebook examples are in Oracle's samples repository: https://github.com/oracle-samples/oci-data-science-ai-samples/tree/master/notebook_examples and a migration tutorial can be found at https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_training/automl/quick_start.html .",
46
+ raise_error=True,
47
+ )
48
+ def __init__(self):
49
+ self.X_train = None
50
+ self.y_train = None
51
+ self.X_valid = None
52
+ self.y_valid = None
53
+ self.client = None
54
+ self.ml_task_type = None
55
+ self.class_names = None
56
+ self.transformer_pipeline = None
57
+ self.est = None
58
+
59
+ def setup(
60
+ self,
61
+ X_train,
62
+ y_train,
63
+ ml_task_type,
64
+ X_valid=None,
65
+ y_valid=None,
66
+ class_names=None,
67
+ client=None,
68
+ ):
69
+ """
70
+ Setup arguments to the AutoML instance.
71
+
72
+ Parameters
73
+ ----------
74
+ X_train : DataFrame
75
+ Training features
76
+ y_train : DataFrame
77
+ Training labels
78
+ ml_task_type : One of ml_task_type.{REGRESSION,BINARY_CLASSIFICATION,
79
+ MULTI_CLASS_CLASSIFICATION,BINARY_TEXT_CLASSIFICATION,MULTI_CLASS_TEXT_CLASSIFICATION}
80
+ X_valid : DataFrame
81
+ Validation features
82
+ y_valid : DataFrame
83
+ Validation labels
84
+ class_names : list
85
+ Unique values in y_train
86
+ client : object
87
+ Dask client instance for distributed execution
88
+ """
89
+ self.X_train = X_train
90
+ self.y_train = y_train
91
+ self.X_valid = X_valid
92
+ self.y_valid = y_valid
93
+ self.ml_task_type = ml_task_type
94
+ self.client = client
95
+ self.class_names = class_names
96
+
97
+ @property
98
+ def est(self):
99
+ """
100
+ Returns the estimator.
101
+
102
+ The estimator can be a standard sklearn estimator or any object that implement methods from
103
+ (BaseEstimator, RegressorMixin) for regression or (BaseEstimator, ClassifierMixin) for classification.
104
+
105
+ Returns
106
+ -------
107
+ est : An instance of estimator
108
+ """
109
+ return self.__est
110
+
111
+ @est.setter
112
+ def est(self, est):
113
+ self.__est = est
114
+
115
+ @abstractmethod
116
+ def train(self, **kwargs):
117
+ """
118
+ Calls fit on estimator.
119
+
120
+ This method is expected to set the 'est' property.
121
+
122
+ Parameters
123
+ ----------
124
+ kwargs: dict, optional
125
+ kwargs to decide the estimator and arguments for the fit method
126
+ """
127
+ pass
128
+
129
+ @abstractmethod
130
+ def get_transformer_pipeline(self):
131
+ """
132
+ Returns a list of transformers representing the transformations done on data before model prediction.
133
+
134
+ This method is optional to implement, and is used only for visualizing transformations on data using
135
+ ADSModel#visualize_transforms().
136
+
137
+ Returns
138
+ -------
139
+ transformers_list : list of transformers implementing fit and transform
140
+ """
141
+ pass
142
+
143
+
144
+ class BaselineModel(object):
145
+ """
146
+ A BaselineModel object that supports fit/predict/predict_proba/transform
147
+ interface. Labels (y) are encoded using DataFrameLabelEncoder.
148
+ """
149
+
150
+ @deprecated(
151
+ details="Working with AutoML has moved from within ADS to working directly with the AutoMLx library. AutoMLx are preinstalled in conda pack automlx_p38_cpu_v2 and later, and can now be updated independently of ADS. AutoMLx documentation may be found at https://docs.oracle.com/en-us/iaas/tools/automlx/latest/html/multiversion/v23.1.1/index.html. Notebook examples are in Oracle's samples repository: https://github.com/oracle-samples/oci-data-science-ai-samples/tree/master/notebook_examples and a migration tutorial can be found at https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_training/automl/quick_start.html .",
152
+ raise_error=True,
153
+ )
154
+ def __init__(self, est):
155
+ self.est = est
156
+ self.df_label_encoder = DataFrameLabelEncoder()
157
+
158
+ def predict(self, X):
159
+
160
+ """
161
+ Runs the Baselines predict function and returns the result.
162
+
163
+ Parameters
164
+ ----------
165
+ X: Dataframe or list-like
166
+ A Dataframe or list-like object holding data to be predicted on
167
+
168
+ Returns
169
+ -------
170
+ List: A list of predictions performed on the input data.
171
+ """
172
+
173
+ X = self.transform(X)
174
+ return self.est.predict(X)
175
+
176
+ def predict_proba(self, X):
177
+
178
+ """
179
+ Runs the Baselines predict_proba function and returns the result.
180
+
181
+ Parameters
182
+ ----------
183
+ X: Dataframe or list-like
184
+ A Dataframe or list-like object holding data to be predicted on
185
+
186
+ Returns
187
+ -------
188
+ List: A list of probabilities of being part of a class
189
+ """
190
+
191
+ X = self.transform(X)
192
+ return self.est.predict_proba(X)
193
+
194
+ def fit(self, X, y):
195
+
196
+ """
197
+ Fits the baseline estimator.
198
+
199
+ Parameters
200
+ ----------
201
+ X: Dataframe or list-like
202
+ A Dataframe or list-like object holding data to be predicted on
203
+ Y: Dataframe, Series, or list-like
204
+ A Dataframe, series, or list-like object holding the labels
205
+
206
+
207
+ Returns
208
+ -------
209
+ estimator: The fitted estimator
210
+ """
211
+
212
+ self.est.fit(X, y)
213
+ return self
214
+
215
+ def transform(self, X):
216
+
217
+ """
218
+ Runs the Baselines transform function and returns the result.
219
+
220
+ Parameters
221
+ ---------
222
+ X: Dataframe or list-like
223
+ A Dataframe or list-like object holding data to be transformed
224
+
225
+ Returns
226
+ -------
227
+ Dataframe or list-like: The transformed Dataframe. Currently, no transformation is performed by the default Baseline Estimator.
228
+ """
229
+
230
+ return X
231
+
232
+ def __getattr__(self, item):
233
+ return getattr(self.est, item)
234
+
235
+ def __getstate__(self):
236
+ return self.__dict__
237
+
238
+ def __setstate__(self, state):
239
+ self.__dict__ = state
240
+
241
+ def __repr__(self):
242
+ set_config()
243
+ return str(self.est)[:-2]
244
+
245
+
246
+ class BaselineAutoMLProvider(AutoMLProvider):
247
+ def get_transformer_pipeline(self):
248
+ """
249
+ Returns a list of transformers representing the transformations done on data before model prediction.
250
+
251
+ This method is used only for visualizing transformations on data using
252
+ ADSModel#visualize_transforms().
253
+
254
+ Returns
255
+ -------
256
+ transformers_list : list of transformers implementing fit and transform
257
+ """
258
+ msg = "Baseline"
259
+ return [("automl_preprocessing", AutoMLPreprocessingTransformer(msg))]
260
+
261
+ @deprecated(
262
+ details="Working with AutoML has moved from within ADS to working directly with the AutoMLx library. AutoMLx are preinstalled in conda pack automlx_p38_cpu_v2 and later, and can now be updated independently of ADS. AutoMLx documentation may be found at https://docs.oracle.com/en-us/iaas/tools/automlx/latest/html/multiversion/v23.1.1/index.html. Notebook examples are in Oracle's samples repository: https://github.com/oracle-samples/oci-data-science-ai-samples/tree/master/notebook_examples and a migration tutorial can be found at https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_training/automl/quick_start.html .",
263
+ raise_error=True,
264
+ )
265
+ def __init__(self, est):
266
+ """
267
+ Generates a baseline model using the Zero Rule algorithm by default. For a classification
268
+ predictive modeling problem where a categorical value is predicted, the Zero
269
+ Rule algorithm predicts the class value that has the most observations in the training dataset.
270
+
271
+ Parameters
272
+ ----------
273
+ est : BaselineModel
274
+ An estimator that supports the fit/predict/predict_proba interface.
275
+ By default, DummyClassifier/DummyRegressor are used as estimators
276
+ """
277
+ super(BaselineAutoMLProvider, self).__init__()
278
+ self.est = est
279
+
280
+ def __repr__(self):
281
+ set_config()
282
+ return str(self.est)[:-2]
283
+
284
+ def train(self, **kwargs):
285
+ self.est = self.decide_estimator(**kwargs)
286
+ if self.est is None:
287
+ raise ValueError(
288
+ "Baseline model for (%s) is not supported" % self.ml_task_type
289
+ )
290
+ try:
291
+ self.est.fit(self.X_train, self.y_train)
292
+ except Exception as e:
293
+ warning_message = f"The baseline estimator failed to fit the data. It could not evaluate {self.est} and gave the exception {e}."
294
+ logger.warning(warning_message)
295
+
296
+ def decide_estimator(self, **kwargs):
297
+ """
298
+ Decides which type of BaselineModel to generate.
299
+
300
+ Returns
301
+ -------
302
+ Modell: BaselineModel
303
+ A baseline model generated for the particular ML task being performed
304
+ """
305
+ if self.est is not None:
306
+ return self.est
307
+ else:
308
+ if self.ml_task_type == ml_task_types.REGRESSION:
309
+ return BaselineModel(DummyRegressor())
310
+ elif self.ml_task_type in [
311
+ ml_task_types.BINARY_CLASSIFICATION,
312
+ ml_task_types.MULTI_CLASS_CLASSIFICATION,
313
+ ml_task_types.BINARY_TEXT_CLASSIFICATION,
314
+ ml_task_types.MULTI_CLASS_TEXT_CLASSIFICATION,
315
+ ]:
316
+ return BaselineModel(DummyClassifier())
317
+
318
+
319
+ # An installation of oracle labs automl is required only for this class
320
+ class OracleAutoMLProvider(AutoMLProvider, ABC):
321
+ @deprecated(
322
+ "2.6.7",
323
+ details="Oracle AutoML is recommended to be directly instantiated by importing automlx package",
324
+ raise_error=True,
325
+ )
326
+ def __init__(
327
+ self, n_jobs=-1, loglevel=None, logger_override=None, model_n_jobs: int = 1
328
+ ):
329
+ """
330
+ The Oracle AutoML Provider automatically provides a tuned ML pipeline that best models the given a training
331
+ dataset and a prediction task at hand.
332
+
333
+ Parameters
334
+ ----------
335
+ n_jobs : int
336
+ Specifies the degree of parallelism for Oracle AutoML. -1 (default) means that AutoML will use all
337
+ available cores.
338
+ loglevel : int
339
+ The verbosity of output for Oracle AutoML. Can be specified using the Python logging module
340
+ (https://docs.python.org/3/library/logging.html#logging-levels).
341
+ model_n_jobs: (optional, int). Defaults to 1.
342
+ Specifies the model parallelism used by AutoML.
343
+ This will be passed to the underlying model it is training.
344
+ """
345
+ try:
346
+ self.automl = __import__("automl")
347
+ self.cpuinfo = __import__("cpuinfo")
348
+ except ModuleNotFoundError as e:
349
+ utils._log_missing_module("automl", "ads[labs]")
350
+ raise e
351
+ super(OracleAutoMLProvider, self).__init__()
352
+ if loglevel is None:
353
+ loglevel = logging.DEBUG if ads.debug_mode else logging.ERROR
354
+
355
+ self.automl.init(
356
+ engine="local",
357
+ engine_opts={"n_jobs": n_jobs, "model_n_jobs": model_n_jobs},
358
+ logger=logger_override,
359
+ loglevel=loglevel,
360
+ )
361
+
362
+ def __repr__(self):
363
+ super(OracleAutoMLProvider, self).__repr__()
364
+
365
+ def get_transformer_pipeline(self):
366
+ """
367
+ Returns a list of transformers representing the transformations done on data before model prediction.
368
+
369
+ This method is used only for visualizing transformations on data using
370
+ ADSModel#visualize_transforms().
371
+
372
+ Returns
373
+ -------
374
+ transformers_list : list of transformers implementing fit and transform
375
+ """
376
+ if hasattr(self.est, "text") and not self.est.text:
377
+ msg1 = wrap_lines(
378
+ self.est.selected_features_names_, heading="Select features:"
379
+ )
380
+ return [("automl_feature_selection", AutoMLFeatureSelection(msg1))]
381
+ else:
382
+ msg = "Apply Tfidf Vectorization\n"
383
+ msg += "Normalize features\n"
384
+ msg += "Label encode target"
385
+ return [("automl_preprocessing", AutoMLPreprocessingTransformer(msg))]
386
+
387
+ def selected_model_name(self):
388
+ """
389
+ Return the name of the selected model by AutoML.
390
+ """
391
+ return self.est.selected_model_
392
+
393
+ @runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
394
+ def print_summary(
395
+ self,
396
+ max_rows=None,
397
+ sort_column="Mean Validation Score",
398
+ ranking_table_only=False,
399
+ ):
400
+ """
401
+ Prints a summary of the Oracle AutoML Pipeline in the last train() call.
402
+
403
+ Parameters
404
+ ----------
405
+ max_rows : int
406
+ Number of trials to print. Pass in None to print all trials
407
+ sort_column: string
408
+ Column to sort results by. Must be one of ['Algorithm', '#Samples', '#Features', 'Mean Validation Score',
409
+ 'Hyperparameters', 'All Validation Scores', 'CPU Time']
410
+ ranking_table_only: bool
411
+ Table to be displayed. Pass in False to display the complete table.
412
+ Pass in True to display the ranking table only.
413
+
414
+ """
415
+ if is_notebook(): # pragma: no cover
416
+ logger.info(
417
+ f"Training time was ({(time.time() - self.train_start_time):.2f} seconds.)"
418
+ )
419
+
420
+ if len(self.est.tuning_trials_) == 0 or len(self.est.train_shape_) == 0:
421
+ logger.error(
422
+ "Unfortunately, there were no trials found, so we cannot visualize it."
423
+ )
424
+ return
425
+
426
+ info = [
427
+ ["Training Dataset size", self.X_train.shape],
428
+ [
429
+ "Validation Dataset size",
430
+ self.X_valid.shape if self.X_valid is not None else None,
431
+ ],
432
+ ["CV", self.est.num_cv_folds_],
433
+ ["Target variable", self.y_train.name],
434
+ ["Optimization Metric", self.est.inferred_score_metric],
435
+ ["Initial number of Features", self.est.train_shape_[1]],
436
+ ["Selected number of Features", len(self.est.selected_features_names_)],
437
+ ["Selected Features", self.est.selected_features_names_],
438
+ ["Selected Algorithm", self.est.selected_model_],
439
+ [
440
+ "End-to-end Elapsed Time (seconds)",
441
+ self.train_end_time - self.train_start_time,
442
+ ],
443
+ ["Selected Hyperparameters", self.est.selected_model_params_],
444
+ ["Mean Validation Score", self.est.tuning_trials_[0][3]],
445
+ ["AutoML n_jobs", self.est.n_jobs_],
446
+ ["AutoML version", self.automl.__version__],
447
+ ["Python version", sys.version],
448
+ ]
449
+ info_df = pd.DataFrame(info)
450
+
451
+ # Remove the selected model and its params from the trials since it already shows up in the summary table
452
+ all_trials_ = (
453
+ self.est.model_selection_trials_
454
+ + self.est.adaptive_sampling_trials_
455
+ + self.est.feature_selection_trials_
456
+ + self.est.tuning_trials_[1:]
457
+ )
458
+ col_names = [
459
+ "Algorithm",
460
+ "#Samples",
461
+ "#Features",
462
+ "Mean Validation Score",
463
+ "Hyperparameters",
464
+ "All Validation Scores",
465
+ "CPU Time",
466
+ "Memory Usage",
467
+ ]
468
+ if ranking_table_only:
469
+ dropped_cols = [
470
+ "#Samples",
471
+ "#Features",
472
+ "All Validation Scores",
473
+ "CPU Time",
474
+ ]
475
+ else:
476
+ dropped_cols = "All Validation Scores"
477
+ summary_df = pd.DataFrame(all_trials_, columns=col_names).drop(
478
+ dropped_cols, axis=1
479
+ )
480
+ sorted_summary_df = summary_df.sort_values(sort_column, ascending=False)
481
+ # Add a rank column at the front
482
+ sorted_summary_df.insert(
483
+ 0, "Rank based on Performance", np.arange(2, len(sorted_summary_df) + 2)
484
+ )
485
+
486
+ from IPython.core.display import display, HTML
487
+
488
+ with pd.option_context(
489
+ "display.max_colwidth",
490
+ 1000,
491
+ "display.width",
492
+ None,
493
+ "display.precision",
494
+ 4,
495
+ ):
496
+ display(HTML(info_df.to_html(index=False, header=False)))
497
+ if max_rows is None:
498
+ display(HTML(sorted_summary_df.to_html(index=False)))
499
+ else:
500
+ display(
501
+ HTML(sorted_summary_df.to_html(index=False, max_rows=max_rows))
502
+ )
503
+
504
+ def train(self, **kwargs):
505
+ """
506
+ Train the Oracle AutoML Pipeline. This looks at the training data, and
507
+ identifies the best set of features, the best algorithm and the best
508
+ set of hyperparameters for this data. A model is then generated, trained
509
+ on this data and returned.
510
+
511
+ Parameters
512
+ ----------
513
+ score_metric : str, callable
514
+ Score function (or loss function) with signature ``score_func(y, y_pred, **kwargs)`` or string specified as
515
+ https://scikit-learn.org/stable/modules/model_evaluation.html#common-cases-predefined-values
516
+ random_state : int
517
+ Random seed used by AutoML
518
+ model_list : list of str
519
+ Models that will be evaluated by the Pipeline. Supported models:
520
+ - Classification: AdaBoostClassifier, DecisionTreeClassifier,
521
+ ExtraTreesClassifier, KNeighborsClassifier,
522
+ LGBMClassifier, LinearSVC, LogisticRegression,
523
+ RandomForestClassifier, SVC, XGBClassifier
524
+ - Regression: AdaBoostRegressor, DecisionTreeRegressor,
525
+ ExtraTreesRegressor, KNeighborsRegressor,
526
+ LGBMRegressor, LinearSVR, LinearRegression, RandomForestRegressor,
527
+ SVR, XGBRegressor
528
+ time_budget : float, optional
529
+ Time budget in seconds where 0 means no time budget constraint (best effort)
530
+ min_features : int, float, list, optional (default: 1)
531
+ Minimum number of features to keep. Acceptable values:
532
+ - If int, 0 < min_features <= n_features
533
+ - If float, 0 < min_features <= 1.0
534
+ - If list, names of features to keep, for example ['a', 'b'] means keep features 'a' and 'b'
535
+
536
+ Returns
537
+ -------
538
+ self : object
539
+ """
540
+
541
+ """Adding this part to give the correct error for situations when dataset > 10000 rows and user tries SVC or KNN"""
542
+ if len(self.X_train) > 10000:
543
+ if "model_list" in kwargs:
544
+ bad_model_list = ["SVC", "KNeighborsClassifier"]
545
+ for model in kwargs["model_list"]:
546
+ for item in bad_model_list:
547
+ if item in model:
548
+ raise ValueError(
549
+ "SVC, KNeighborsClassifier are disabled for datasets with > 10K samples"
550
+ )
551
+
552
+ self.train_start_time = time.time()
553
+
554
+ self.time_budget = kwargs.pop("time_budget", 0) # 0 means unlimited
555
+
556
+ self.col_types = kwargs.pop("col_types", None)
557
+
558
+ self.est = self._decide_estimator(**kwargs)
559
+ with warnings.catch_warnings():
560
+ warnings.simplefilter("ignore")
561
+ self.est.fit(
562
+ self.X_train,
563
+ self.y_train,
564
+ X_valid=self.X_valid,
565
+ y_valid=self.y_valid,
566
+ time_budget=self.time_budget,
567
+ col_types=self.col_types,
568
+ )
569
+ self.train_end_time = time.time()
570
+ self.print_summary(max_rows=10)
571
+
572
+ def print_trials(self, max_rows=None, sort_column="Mean Validation Score"):
573
+ """
574
+ Prints all trials executed by the Oracle AutoML Pipeline in the last train() call.
575
+
576
+ Parameters
577
+ ----------
578
+ max_rows : int
579
+ Number of trials to print. Pass in None to print all trials
580
+ sort_column: string
581
+ Column to sort results by. Must be one of ['Algorithm', '#Samples', '#Features', 'Mean Validation Score',
582
+ 'Hyperparameters', 'All Validation Scores', 'CPU Time']
583
+
584
+ """
585
+ self.est.print_trials(max_rows=max_rows, sort_column=sort_column)
586
+
587
+ def _decide_estimator(self, **kwargs):
588
+ """
589
+ Decide arguments to the Oracle AutoML pipeline based on user provided
590
+ arguments
591
+ """
592
+ est = None
593
+ score_metric = None
594
+ # Explicity define the default AutoML metrics
595
+ if (
596
+ self.ml_task_type == ml_task_types.BINARY_CLASSIFICATION
597
+ or self.ml_task_type == ml_task_types.BINARY_TEXT_CLASSIFICATION
598
+ ):
599
+ test_model_list = ["LogisticRegression"]
600
+ elif (
601
+ self.ml_task_type == ml_task_types.MULTI_CLASS_CLASSIFICATION
602
+ or self.ml_task_type == ml_task_types.MULTI_CLASS_TEXT_CLASSIFICATION
603
+ ):
604
+ test_model_list = ["LogisticRegression"]
605
+ elif self.ml_task_type == ml_task_types.REGRESSION:
606
+ test_model_list = ["LinearRegression"]
607
+ else:
608
+ raise ValueError("AutoML for (%s) is not supported" % self.ml_task_type)
609
+
610
+ # Respect the user provided scoring metric if given
611
+ if "score_metric" in kwargs:
612
+ score_metric = kwargs.pop("score_metric")
613
+
614
+ #
615
+ # ***FOR TESTING PURPOSE ONLY***
616
+ #
617
+ # Ignore model_list for test mode
618
+ if ads.test_mode: # pragma: no cover
619
+ if "model_list" in kwargs:
620
+ _ = kwargs.pop("model_list")
621
+ kwargs["model_list"] = test_model_list
622
+
623
+ if (
624
+ self.ml_task_type == ml_task_types.BINARY_CLASSIFICATION
625
+ or self.ml_task_type == ml_task_types.MULTI_CLASS_CLASSIFICATION
626
+ ):
627
+ est = self.automl.Pipeline(
628
+ task="classification", score_metric=score_metric, **kwargs
629
+ )
630
+ elif (
631
+ self.ml_task_type == ml_task_types.BINARY_TEXT_CLASSIFICATION
632
+ or self.ml_task_type == ml_task_types.MULTI_CLASS_TEXT_CLASSIFICATION
633
+ ):
634
+ est = self.automl.Pipeline(
635
+ task="classification", score_metric=score_metric, **kwargs
636
+ )
637
+ if not self.col_types:
638
+ if len(self.X_train.columns) == 1:
639
+ self.col_types = ["text"]
640
+ elif len(self.X_train.columns) == 2:
641
+ self.col_types = ["text", "text"]
642
+ else:
643
+ raise ValueError(
644
+ "We detected a text classification problem. Pass "
645
+ "in `col_types = [<type of column1>, <type of column2>, ...]`."
646
+ " Valid types are: ['categorical', 'numerical', 'text', 'datetime',"
647
+ " 'timedelta']."
648
+ )
649
+
650
+ elif self.ml_task_type == ml_task_types.REGRESSION:
651
+ est = self.automl.Pipeline(
652
+ task="regression", score_metric=score_metric, **kwargs
653
+ )
654
+ else:
655
+ raise ValueError("AutoML for (%s) is not supported" % self.ml_task_type)
656
+ return est
657
+
658
+ def selected_score_label(self):
659
+ """
660
+ Return the name of score_metric used in train.
661
+
662
+ """
663
+ score_label = self.est.score_metric
664
+ if score_label is None:
665
+ score_label = self.est.inferred_score_metric
666
+ return score_label
667
+
668
+ @runtime_dependency(module="scipy", install_from=OptionalDependency.VIZ)
669
+ def visualize_algorithm_selection_trials(self, ylabel=None):
670
+ """
671
+ Plot the scores predicted by Algorithm Selection for each algorithm. The
672
+ horizontal line shows the average score across all algorithms. Algorithms
673
+ below the line are colored turquoise, whereas those with a score higher
674
+ than the mean are colored teal. The orange bar shows the algorithm with
675
+ the highest predicted score. The error bar is +/- one standard error.
676
+
677
+ Parameters
678
+ ----------
679
+ ylabel : str,
680
+ Label for the y-axis. Defaults to the scoring metric.
681
+ """
682
+ if ylabel is None:
683
+ ylabel = self.selected_score_label().capitalize()
684
+ trials = self.est.model_selection_trials_
685
+ if not len(trials):
686
+ _log_visualize_no_trials("algorithm selection")
687
+ return
688
+ fig, ax = plt.subplots(1, figsize=(6, 3))
689
+ colors = []
690
+ y_error = []
691
+ mean_scores, models, cvscores = [], [], []
692
+ for (
693
+ algorithm,
694
+ samples,
695
+ features,
696
+ mean_score,
697
+ hyperparameters,
698
+ all_scores,
699
+ runtime,
700
+ x,
701
+ ) in trials:
702
+ mean_scores.append(mean_score)
703
+ models.append(algorithm)
704
+ cvscores.append(all_scores)
705
+ mean_scores_ser = pd.Series(mean_scores, index=models).sort_values(
706
+ ascending=False
707
+ )
708
+ scores_ser = pd.Series(cvscores, index=models)
709
+ ax.set_title("Algorithm Selection Trials")
710
+ ax.set_ylabel(ylabel)
711
+ ax.set_xlabel("Algorithm")
712
+ for f in mean_scores_ser.keys():
713
+ se = scipy.stats.sem(scores_ser[f], ddof=1)
714
+ y_error.append(se)
715
+ if f == "{}_AS".format(self.est.selected_model_):
716
+ colors.append("orange")
717
+ elif mean_scores_ser[f] >= mean_scores_ser.mean():
718
+ colors.append("teal")
719
+ else:
720
+ colors.append("turquoise")
721
+ mean_scores_ser.plot.bar(ax=ax, color=colors, edgecolor="black", zorder=1)
722
+ ax.errorbar(
723
+ x=mean_scores_ser.index.values,
724
+ y=mean_scores_ser.values,
725
+ yerr=y_error,
726
+ fmt="none",
727
+ capsize=4,
728
+ color="black",
729
+ zorder=0,
730
+ )
731
+ ax.axhline(y=mean_scores_ser.mean(), color="black", linewidth=0.5)
732
+ ax.autoscale_view()
733
+ plt.show()
734
+
735
+ def visualize_adaptive_sampling_trials(self):
736
+ """
737
+ Visualize the trials for Adaptive Sampling.
738
+ """
739
+ trials = self.est.adaptive_sampling_trials_
740
+ if len(trials) == 0:
741
+ _log_visualize_no_trials("adaptive sampling")
742
+ return
743
+ fig, ax = plt.subplots(1, figsize=(6, 3))
744
+ ax.set_title("Adaptive Sampling ({})".format(trials[0][0]))
745
+ ax.set_xlabel("Dataset sample size")
746
+ ax.set_ylabel(r"Predicted model score")
747
+ scores = [
748
+ mean_score
749
+ for (
750
+ algorithm,
751
+ samples,
752
+ features,
753
+ mean_score,
754
+ hyperparameters,
755
+ all_scores,
756
+ runtime,
757
+ x,
758
+ ) in trials
759
+ ]
760
+ n_samples = [
761
+ samples
762
+ for (
763
+ algorithm,
764
+ samples,
765
+ features,
766
+ mean_score,
767
+ hyperparameters,
768
+ all_scores,
769
+ runtime,
770
+ x,
771
+ ) in trials
772
+ ]
773
+ y_margin = 0.10 * (max(scores) - min(scores))
774
+ ax.grid(color="g", linestyle="-", linewidth=0.1)
775
+ ax.set_ylim(min(scores) - y_margin, max(scores) + y_margin)
776
+ ax.plot(n_samples, scores, "k:", marker="s", color="teal", markersize=3)
777
+ plt.show()
778
+
779
+ def visualize_feature_selection_trials(self, ylabel=None):
780
+ """
781
+ Visualize the feature selection trials taken to arrive at optimal set of
782
+ features. The orange line shows the optimal number of features chosen
783
+ by Feature Selection.
784
+
785
+ Parameters
786
+ ----------
787
+ ylabel : str,
788
+ Label for the y-axis. Defaults to the scoring metric.
789
+ """
790
+ if ylabel is None:
791
+ ylabel = self.selected_score_label().capitalize()
792
+ trials = self.est.feature_selection_trials_
793
+ if len(trials) == 0:
794
+ _log_visualize_no_trials("feature selection")
795
+ return
796
+ fig, ax = plt.subplots(1, figsize=(6, 3))
797
+ ax.set_title("Feature Selection Trials")
798
+ ax.set_xlabel("Number of Features")
799
+ ax.set_ylabel(ylabel)
800
+ scores = [
801
+ mean_score
802
+ for (
803
+ algorithm,
804
+ samples,
805
+ features,
806
+ mean_score,
807
+ hyperparameters,
808
+ all_scores,
809
+ runtime,
810
+ x,
811
+ ) in trials
812
+ ]
813
+ n_features = [
814
+ features
815
+ for (
816
+ algorithm,
817
+ samples,
818
+ features,
819
+ mean_score,
820
+ hyperparameters,
821
+ all_scores,
822
+ runtime,
823
+ x,
824
+ ) in trials
825
+ ]
826
+ y_margin = 0.10 * (max(scores) - min(scores))
827
+ ax.grid(color="g", linestyle="-", linewidth=0.1)
828
+ ax.set_ylim(min(scores) - y_margin, max(scores) + y_margin)
829
+ ax.plot(n_features, scores, "k:", marker="s", color="teal", markersize=3)
830
+ ax.axvline(
831
+ x=len(self.est.selected_features_names_), color="orange", linewidth=2.0
832
+ )
833
+ plt.show()
834
+
835
+ def visualize_tuning_trials(self, ylabel=None):
836
+ """
837
+ Visualize (plot) the hyperparamter tuning trials taken to arrive at the optimal
838
+ hyper parameters. Each trial in the plot represents a particular
839
+ hyperparamter combination.
840
+
841
+ Parameters
842
+ ----------
843
+ ylabel : str,
844
+ Label for the y-axis. Defaults to the scoring metric.
845
+ """
846
+ if ylabel is None:
847
+ ylabel = self.selected_score_label().capitalize()
848
+ # scores in trials are sorted decreasingly.
849
+ # reversed(trails) : let the scores sort in increasing order from left to right.
850
+ scores = [
851
+ mean_score
852
+ for (
853
+ algorithm,
854
+ samples,
855
+ features,
856
+ mean_score,
857
+ hyperparameters,
858
+ all_scores,
859
+ runtime,
860
+ x,
861
+ ) in reversed(self.est.tuning_trials_)
862
+ if mean_score and not np.isnan(mean_score)
863
+ ]
864
+ if not len(scores) > 1:
865
+ raise RuntimeError("Insufficient tuning trials.")
866
+ else:
867
+ fig, ax = plt.subplots(1, figsize=(6, 3))
868
+ ax.set_title("Hyperparameter Tuning Trials")
869
+ ax.set_xlabel("Iteration $n$")
870
+ ax.set_ylabel(ylabel)
871
+ y_margin = 0.10 * (max(scores) - min(scores))
872
+ ax.grid(color="g", linestyle="-", linewidth=0.1)
873
+ ax.set_ylim(min(scores) - y_margin, max(scores) + y_margin)
874
+ ax.plot(
875
+ range(1, len(scores) + 1),
876
+ scores,
877
+ "k:",
878
+ marker="s",
879
+ color="teal",
880
+ markersize=3,
881
+ )
882
+ plt.show()
883
+
884
+
885
+ class AutoMLPreprocessingTransformer(object): # pragma: no cover
886
+ @deprecated(
887
+ details="Working with AutoML has moved from within ADS to working directly with the AutoMLx library. AutoMLx are preinstalled in conda pack automlx_p38_cpu_v2 and later, and can now be updated independently of ADS. AutoMLx documentation may be found at https://docs.oracle.com/en-us/iaas/tools/automlx/latest/html/multiversion/v23.1.1/index.html. Notebook examples are in Oracle's samples repository: https://github.com/oracle-samples/oci-data-science-ai-samples/tree/master/notebook_examples and a migration tutorial can be found at https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_training/automl/quick_start.html .",
888
+ raise_error=True,
889
+ )
890
+ def __init__(self, msg):
891
+ self.msg = msg
892
+
893
+ def fit(self, X):
894
+ """
895
+ Fits the preprocessing Transformer
896
+
897
+ Parameters
898
+ ----------
899
+ X: Dataframe or list-like
900
+ A Dataframe or list-like object holding data to be predicted on
901
+
902
+ Returns
903
+ -------
904
+ Self: Estimator
905
+ The fitted estimator
906
+ """
907
+ return self
908
+
909
+ def transform(self, X):
910
+ """
911
+ Runs the preprocessing transform function and returns the result
912
+
913
+ Parameters
914
+ ---------
915
+ X: Dataframe or list-like
916
+ A Dataframe or list-like object holding data to be transformed
917
+
918
+ Returns
919
+ -------
920
+ X: Dataframe or list-like
921
+ The transformed Dataframe.
922
+ """
923
+ return X
924
+
925
+ def _log_visualize_no_trials(target):
926
+ logger.error(
927
+ f"There are no trials. Therefore, the {target} cannot be visualized."
928
+ )
929
+
930
+ def __repr__(self):
931
+ return self.msg
932
+
933
+
934
+ class AutoMLFeatureSelection(object): # pragma: no cover
935
+ @deprecated(
936
+ details="Working with AutoML has moved from within ADS to working directly with the AutoMLx library. AutoMLx are preinstalled in conda pack automlx_p38_cpu_v2 and later, and can now be updated independently of ADS. AutoMLx documentation may be found at https://docs.oracle.com/en-us/iaas/tools/automlx/latest/html/multiversion/v23.1.1/index.html. Notebook examples are in Oracle's samples repository: https://github.com/oracle-samples/oci-data-science-ai-samples/tree/master/notebook_examples and a migration tutorial can be found at https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_training/automl/quick_start.html .",
937
+ raise_error=True,
938
+ )
939
+ def __init__(self, msg):
940
+ self.msg = msg
941
+
942
+ def fit(self, X):
943
+ """
944
+ Fits the baseline estimator
945
+
946
+ Parameters
947
+ ----------
948
+ X: Dataframe or list-like
949
+ A Dataframe or list-like object holding data to be predicted on
950
+
951
+ Returns
952
+ -------
953
+ Self: Estimator
954
+ The fitted estimator
955
+ """
956
+ return self
957
+
958
+ def transform(self, X):
959
+ """
960
+ Runs the Baselines transform function and returns the result
961
+
962
+ Parameters
963
+ ---------
964
+ X: Dataframe or list-like
965
+ A Dataframe or list-like object holding data to be transformed
966
+
967
+ Returns
968
+ -------
969
+ X: Dataframe or list-like
970
+ The transformed Dataframe.
971
+ """
972
+ return X
973
+
974
+ def __repr__(self):
975
+ return self.msg