oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (858) hide show
  1. ads/aqua/__init__.py +40 -0
  2. ads/aqua/app.py +507 -0
  3. ads/aqua/cli.py +96 -0
  4. ads/aqua/client/__init__.py +3 -0
  5. ads/aqua/client/client.py +836 -0
  6. ads/aqua/client/openai_client.py +305 -0
  7. ads/aqua/common/__init__.py +5 -0
  8. ads/aqua/common/decorator.py +125 -0
  9. ads/aqua/common/entities.py +274 -0
  10. ads/aqua/common/enums.py +134 -0
  11. ads/aqua/common/errors.py +109 -0
  12. ads/aqua/common/utils.py +1295 -0
  13. ads/aqua/config/__init__.py +4 -0
  14. ads/aqua/config/container_config.py +247 -0
  15. ads/aqua/config/evaluation/__init__.py +4 -0
  16. ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
  17. ads/aqua/config/utils/__init__.py +4 -0
  18. ads/aqua/config/utils/serializer.py +339 -0
  19. ads/aqua/constants.py +116 -0
  20. ads/aqua/data.py +14 -0
  21. ads/aqua/dummy_data/icon.txt +1 -0
  22. ads/aqua/dummy_data/oci_model_deployments.json +56 -0
  23. ads/aqua/dummy_data/oci_models.json +1 -0
  24. ads/aqua/dummy_data/readme.md +26 -0
  25. ads/aqua/evaluation/__init__.py +8 -0
  26. ads/aqua/evaluation/constants.py +53 -0
  27. ads/aqua/evaluation/entities.py +186 -0
  28. ads/aqua/evaluation/errors.py +70 -0
  29. ads/aqua/evaluation/evaluation.py +1814 -0
  30. ads/aqua/extension/__init__.py +42 -0
  31. ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
  32. ads/aqua/extension/base_handler.py +90 -0
  33. ads/aqua/extension/common_handler.py +121 -0
  34. ads/aqua/extension/common_ws_msg_handler.py +36 -0
  35. ads/aqua/extension/deployment_handler.py +381 -0
  36. ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
  37. ads/aqua/extension/errors.py +30 -0
  38. ads/aqua/extension/evaluation_handler.py +129 -0
  39. ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
  40. ads/aqua/extension/finetune_handler.py +96 -0
  41. ads/aqua/extension/model_handler.py +390 -0
  42. ads/aqua/extension/models/__init__.py +0 -0
  43. ads/aqua/extension/models/ws_models.py +145 -0
  44. ads/aqua/extension/models_ws_msg_handler.py +50 -0
  45. ads/aqua/extension/ui_handler.py +300 -0
  46. ads/aqua/extension/ui_websocket_handler.py +130 -0
  47. ads/aqua/extension/utils.py +133 -0
  48. ads/aqua/finetuning/__init__.py +7 -0
  49. ads/aqua/finetuning/constants.py +23 -0
  50. ads/aqua/finetuning/entities.py +181 -0
  51. ads/aqua/finetuning/finetuning.py +749 -0
  52. ads/aqua/model/__init__.py +8 -0
  53. ads/aqua/model/constants.py +60 -0
  54. ads/aqua/model/entities.py +385 -0
  55. ads/aqua/model/enums.py +32 -0
  56. ads/aqua/model/model.py +2134 -0
  57. ads/aqua/model/utils.py +52 -0
  58. ads/aqua/modeldeployment/__init__.py +6 -0
  59. ads/aqua/modeldeployment/constants.py +10 -0
  60. ads/aqua/modeldeployment/deployment.py +1315 -0
  61. ads/aqua/modeldeployment/entities.py +653 -0
  62. ads/aqua/modeldeployment/utils.py +543 -0
  63. ads/aqua/resources/gpu_shapes_index.json +94 -0
  64. ads/aqua/server/__init__.py +4 -0
  65. ads/aqua/server/__main__.py +24 -0
  66. ads/aqua/server/app.py +47 -0
  67. ads/aqua/server/aqua_spec.yml +1291 -0
  68. ads/aqua/training/__init__.py +4 -0
  69. ads/aqua/training/exceptions.py +476 -0
  70. ads/aqua/ui.py +519 -0
  71. ads/automl/__init__.py +9 -0
  72. ads/automl/driver.py +330 -0
  73. ads/automl/provider.py +975 -0
  74. ads/bds/__init__.py +5 -0
  75. ads/bds/auth.py +127 -0
  76. ads/bds/big_data_service.py +255 -0
  77. ads/catalog/__init__.py +19 -0
  78. ads/catalog/model.py +1576 -0
  79. ads/catalog/notebook.py +461 -0
  80. ads/catalog/project.py +468 -0
  81. ads/catalog/summary.py +178 -0
  82. ads/common/__init__.py +11 -0
  83. ads/common/analyzer.py +65 -0
  84. ads/common/artifact/.model-ignore +63 -0
  85. ads/common/artifact/__init__.py +10 -0
  86. ads/common/auth.py +1122 -0
  87. ads/common/card_identifier.py +83 -0
  88. ads/common/config.py +647 -0
  89. ads/common/data.py +165 -0
  90. ads/common/decorator/__init__.py +9 -0
  91. ads/common/decorator/argument_to_case.py +88 -0
  92. ads/common/decorator/deprecate.py +69 -0
  93. ads/common/decorator/require_nonempty_arg.py +65 -0
  94. ads/common/decorator/runtime_dependency.py +178 -0
  95. ads/common/decorator/threaded.py +97 -0
  96. ads/common/decorator/utils.py +35 -0
  97. ads/common/dsc_file_system.py +303 -0
  98. ads/common/error.py +14 -0
  99. ads/common/extended_enum.py +81 -0
  100. ads/common/function/__init__.py +5 -0
  101. ads/common/function/fn_util.py +142 -0
  102. ads/common/function/func_conf.yaml +25 -0
  103. ads/common/ipython.py +76 -0
  104. ads/common/model.py +679 -0
  105. ads/common/model_artifact.py +1759 -0
  106. ads/common/model_artifact_schema.json +107 -0
  107. ads/common/model_export_util.py +664 -0
  108. ads/common/model_metadata.py +24 -0
  109. ads/common/object_storage_details.py +296 -0
  110. ads/common/oci_client.py +179 -0
  111. ads/common/oci_datascience.py +46 -0
  112. ads/common/oci_logging.py +1144 -0
  113. ads/common/oci_mixin.py +957 -0
  114. ads/common/oci_resource.py +136 -0
  115. ads/common/serializer.py +559 -0
  116. ads/common/utils.py +1852 -0
  117. ads/common/word_lists.py +1491 -0
  118. ads/common/work_request.py +189 -0
  119. ads/config.py +1 -0
  120. ads/data_labeling/__init__.py +13 -0
  121. ads/data_labeling/boundingbox.py +253 -0
  122. ads/data_labeling/constants.py +47 -0
  123. ads/data_labeling/data_labeling_service.py +244 -0
  124. ads/data_labeling/interface/__init__.py +5 -0
  125. ads/data_labeling/interface/loader.py +16 -0
  126. ads/data_labeling/interface/parser.py +16 -0
  127. ads/data_labeling/interface/reader.py +23 -0
  128. ads/data_labeling/loader/__init__.py +5 -0
  129. ads/data_labeling/loader/file_loader.py +241 -0
  130. ads/data_labeling/metadata.py +110 -0
  131. ads/data_labeling/mixin/__init__.py +5 -0
  132. ads/data_labeling/mixin/data_labeling.py +232 -0
  133. ads/data_labeling/ner.py +129 -0
  134. ads/data_labeling/parser/__init__.py +5 -0
  135. ads/data_labeling/parser/dls_record_parser.py +388 -0
  136. ads/data_labeling/parser/export_metadata_parser.py +94 -0
  137. ads/data_labeling/parser/export_record_parser.py +473 -0
  138. ads/data_labeling/reader/__init__.py +5 -0
  139. ads/data_labeling/reader/dataset_reader.py +574 -0
  140. ads/data_labeling/reader/dls_record_reader.py +121 -0
  141. ads/data_labeling/reader/export_record_reader.py +62 -0
  142. ads/data_labeling/reader/jsonl_reader.py +75 -0
  143. ads/data_labeling/reader/metadata_reader.py +203 -0
  144. ads/data_labeling/reader/record_reader.py +263 -0
  145. ads/data_labeling/record.py +52 -0
  146. ads/data_labeling/visualizer/__init__.py +5 -0
  147. ads/data_labeling/visualizer/image_visualizer.py +525 -0
  148. ads/data_labeling/visualizer/text_visualizer.py +357 -0
  149. ads/database/__init__.py +5 -0
  150. ads/database/connection.py +338 -0
  151. ads/dataset/__init__.py +10 -0
  152. ads/dataset/capabilities.md +51 -0
  153. ads/dataset/classification_dataset.py +339 -0
  154. ads/dataset/correlation.py +226 -0
  155. ads/dataset/correlation_plot.py +563 -0
  156. ads/dataset/dask_series.py +173 -0
  157. ads/dataset/dataframe_transformer.py +110 -0
  158. ads/dataset/dataset.py +1979 -0
  159. ads/dataset/dataset_browser.py +360 -0
  160. ads/dataset/dataset_with_target.py +995 -0
  161. ads/dataset/exception.py +25 -0
  162. ads/dataset/factory.py +987 -0
  163. ads/dataset/feature_engineering_transformer.py +35 -0
  164. ads/dataset/feature_selection.py +107 -0
  165. ads/dataset/forecasting_dataset.py +26 -0
  166. ads/dataset/helper.py +1450 -0
  167. ads/dataset/label_encoder.py +99 -0
  168. ads/dataset/mixin/__init__.py +5 -0
  169. ads/dataset/mixin/dataset_accessor.py +134 -0
  170. ads/dataset/pipeline.py +58 -0
  171. ads/dataset/plot.py +710 -0
  172. ads/dataset/progress.py +86 -0
  173. ads/dataset/recommendation.py +297 -0
  174. ads/dataset/recommendation_transformer.py +502 -0
  175. ads/dataset/regression_dataset.py +14 -0
  176. ads/dataset/sampled_dataset.py +1050 -0
  177. ads/dataset/target.py +98 -0
  178. ads/dataset/timeseries.py +18 -0
  179. ads/dbmixin/__init__.py +5 -0
  180. ads/dbmixin/db_pandas_accessor.py +153 -0
  181. ads/environment/__init__.py +9 -0
  182. ads/environment/ml_runtime.py +66 -0
  183. ads/evaluations/README.md +14 -0
  184. ads/evaluations/__init__.py +109 -0
  185. ads/evaluations/evaluation_plot.py +983 -0
  186. ads/evaluations/evaluator.py +1334 -0
  187. ads/evaluations/statistical_metrics.py +543 -0
  188. ads/experiments/__init__.py +9 -0
  189. ads/experiments/capabilities.md +0 -0
  190. ads/explanations/__init__.py +21 -0
  191. ads/explanations/base_explainer.py +142 -0
  192. ads/explanations/capabilities.md +83 -0
  193. ads/explanations/explainer.py +190 -0
  194. ads/explanations/mlx_global_explainer.py +1050 -0
  195. ads/explanations/mlx_interface.py +386 -0
  196. ads/explanations/mlx_local_explainer.py +287 -0
  197. ads/explanations/mlx_whatif_explainer.py +201 -0
  198. ads/feature_engineering/__init__.py +20 -0
  199. ads/feature_engineering/accessor/__init__.py +5 -0
  200. ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
  201. ads/feature_engineering/accessor/mixin/__init__.py +5 -0
  202. ads/feature_engineering/accessor/mixin/correlation.py +166 -0
  203. ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
  204. ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
  205. ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
  206. ads/feature_engineering/accessor/mixin/utils.py +65 -0
  207. ads/feature_engineering/accessor/series_accessor.py +431 -0
  208. ads/feature_engineering/adsimage/__init__.py +5 -0
  209. ads/feature_engineering/adsimage/image.py +192 -0
  210. ads/feature_engineering/adsimage/image_reader.py +170 -0
  211. ads/feature_engineering/adsimage/interface/__init__.py +5 -0
  212. ads/feature_engineering/adsimage/interface/reader.py +19 -0
  213. ads/feature_engineering/adsstring/__init__.py +7 -0
  214. ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
  215. ads/feature_engineering/adsstring/string/__init__.py +8 -0
  216. ads/feature_engineering/data_schema.json +57 -0
  217. ads/feature_engineering/dataset/__init__.py +5 -0
  218. ads/feature_engineering/dataset/zip_code_data.py +42062 -0
  219. ads/feature_engineering/exceptions.py +40 -0
  220. ads/feature_engineering/feature_type/__init__.py +133 -0
  221. ads/feature_engineering/feature_type/address.py +184 -0
  222. ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
  223. ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
  224. ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
  225. ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
  226. ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
  227. ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
  228. ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
  229. ads/feature_engineering/feature_type/adsstring/string.py +258 -0
  230. ads/feature_engineering/feature_type/base.py +58 -0
  231. ads/feature_engineering/feature_type/boolean.py +183 -0
  232. ads/feature_engineering/feature_type/category.py +146 -0
  233. ads/feature_engineering/feature_type/constant.py +137 -0
  234. ads/feature_engineering/feature_type/continuous.py +151 -0
  235. ads/feature_engineering/feature_type/creditcard.py +314 -0
  236. ads/feature_engineering/feature_type/datetime.py +190 -0
  237. ads/feature_engineering/feature_type/discrete.py +134 -0
  238. ads/feature_engineering/feature_type/document.py +43 -0
  239. ads/feature_engineering/feature_type/gis.py +251 -0
  240. ads/feature_engineering/feature_type/handler/__init__.py +5 -0
  241. ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
  242. ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
  243. ads/feature_engineering/feature_type/handler/warnings.py +128 -0
  244. ads/feature_engineering/feature_type/integer.py +142 -0
  245. ads/feature_engineering/feature_type/ip_address.py +144 -0
  246. ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
  247. ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
  248. ads/feature_engineering/feature_type/lat_long.py +256 -0
  249. ads/feature_engineering/feature_type/object.py +43 -0
  250. ads/feature_engineering/feature_type/ordinal.py +132 -0
  251. ads/feature_engineering/feature_type/phone_number.py +135 -0
  252. ads/feature_engineering/feature_type/string.py +171 -0
  253. ads/feature_engineering/feature_type/text.py +93 -0
  254. ads/feature_engineering/feature_type/unknown.py +43 -0
  255. ads/feature_engineering/feature_type/zip_code.py +164 -0
  256. ads/feature_engineering/feature_type_manager.py +406 -0
  257. ads/feature_engineering/schema.py +795 -0
  258. ads/feature_engineering/utils.py +245 -0
  259. ads/feature_store/.readthedocs.yaml +19 -0
  260. ads/feature_store/README.md +65 -0
  261. ads/feature_store/__init__.py +9 -0
  262. ads/feature_store/common/__init__.py +0 -0
  263. ads/feature_store/common/enums.py +339 -0
  264. ads/feature_store/common/exceptions.py +18 -0
  265. ads/feature_store/common/spark_session_singleton.py +125 -0
  266. ads/feature_store/common/utils/__init__.py +0 -0
  267. ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
  268. ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
  269. ads/feature_store/common/utils/transformation_utils.py +82 -0
  270. ads/feature_store/common/utils/utility.py +403 -0
  271. ads/feature_store/data_validation/__init__.py +0 -0
  272. ads/feature_store/data_validation/great_expectation.py +129 -0
  273. ads/feature_store/dataset.py +1230 -0
  274. ads/feature_store/dataset_job.py +530 -0
  275. ads/feature_store/docs/Dockerfile +7 -0
  276. ads/feature_store/docs/Makefile +44 -0
  277. ads/feature_store/docs/conf.py +28 -0
  278. ads/feature_store/docs/requirements.txt +14 -0
  279. ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
  280. ads/feature_store/docs/source/cicd.rst +137 -0
  281. ads/feature_store/docs/source/conf.py +86 -0
  282. ads/feature_store/docs/source/data_versioning.rst +33 -0
  283. ads/feature_store/docs/source/dataset.rst +388 -0
  284. ads/feature_store/docs/source/dataset_job.rst +27 -0
  285. ads/feature_store/docs/source/demo.rst +70 -0
  286. ads/feature_store/docs/source/entity.rst +78 -0
  287. ads/feature_store/docs/source/feature_group.rst +624 -0
  288. ads/feature_store/docs/source/feature_group_job.rst +29 -0
  289. ads/feature_store/docs/source/feature_store.rst +122 -0
  290. ads/feature_store/docs/source/feature_store_class.rst +123 -0
  291. ads/feature_store/docs/source/feature_validation.rst +66 -0
  292. ads/feature_store/docs/source/figures/cicd.png +0 -0
  293. ads/feature_store/docs/source/figures/data_validation.png +0 -0
  294. ads/feature_store/docs/source/figures/data_versioning.png +0 -0
  295. ads/feature_store/docs/source/figures/dataset.gif +0 -0
  296. ads/feature_store/docs/source/figures/dataset.png +0 -0
  297. ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
  298. ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
  299. ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
  300. ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
  301. ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
  302. ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
  303. ads/feature_store/docs/source/figures/entity.png +0 -0
  304. ads/feature_store/docs/source/figures/feature_group.png +0 -0
  305. ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
  306. ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
  307. ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
  308. ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
  309. ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
  310. ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
  311. ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
  312. ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
  313. ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
  314. ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
  315. ads/feature_store/docs/source/figures/overview.png +0 -0
  316. ads/feature_store/docs/source/figures/resource_manager.png +0 -0
  317. ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
  318. ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
  319. ads/feature_store/docs/source/figures/stats_1.png +0 -0
  320. ads/feature_store/docs/source/figures/stats_2.png +0 -0
  321. ads/feature_store/docs/source/figures/stats_d.png +0 -0
  322. ads/feature_store/docs/source/figures/stats_fg.png +0 -0
  323. ads/feature_store/docs/source/figures/transformation.png +0 -0
  324. ads/feature_store/docs/source/figures/transformations.gif +0 -0
  325. ads/feature_store/docs/source/figures/validation.png +0 -0
  326. ads/feature_store/docs/source/figures/validation_fg.png +0 -0
  327. ads/feature_store/docs/source/figures/validation_results.png +0 -0
  328. ads/feature_store/docs/source/figures/validation_summary.png +0 -0
  329. ads/feature_store/docs/source/index.rst +81 -0
  330. ads/feature_store/docs/source/module.rst +8 -0
  331. ads/feature_store/docs/source/notebook.rst +94 -0
  332. ads/feature_store/docs/source/overview.rst +47 -0
  333. ads/feature_store/docs/source/quickstart.rst +176 -0
  334. ads/feature_store/docs/source/release_notes.rst +194 -0
  335. ads/feature_store/docs/source/setup_feature_store.rst +81 -0
  336. ads/feature_store/docs/source/statistics.rst +58 -0
  337. ads/feature_store/docs/source/transformation.rst +199 -0
  338. ads/feature_store/docs/source/ui.rst +65 -0
  339. ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
  340. ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
  341. ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
  342. ads/feature_store/entity.py +718 -0
  343. ads/feature_store/execution_strategy/__init__.py +0 -0
  344. ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
  345. ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
  346. ads/feature_store/execution_strategy/engine/__init__.py +0 -0
  347. ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
  348. ads/feature_store/execution_strategy/execution_strategy.py +113 -0
  349. ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
  350. ads/feature_store/execution_strategy/spark/__init__.py +0 -0
  351. ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
  352. ads/feature_store/feature.py +192 -0
  353. ads/feature_store/feature_group.py +1494 -0
  354. ads/feature_store/feature_group_expectation.py +346 -0
  355. ads/feature_store/feature_group_job.py +602 -0
  356. ads/feature_store/feature_lineage/__init__.py +0 -0
  357. ads/feature_store/feature_lineage/graphviz_service.py +180 -0
  358. ads/feature_store/feature_option_details.py +50 -0
  359. ads/feature_store/feature_statistics/__init__.py +0 -0
  360. ads/feature_store/feature_statistics/statistics_service.py +99 -0
  361. ads/feature_store/feature_store.py +699 -0
  362. ads/feature_store/feature_store_registrar.py +518 -0
  363. ads/feature_store/input_feature_detail.py +149 -0
  364. ads/feature_store/mixin/__init__.py +4 -0
  365. ads/feature_store/mixin/oci_feature_store.py +145 -0
  366. ads/feature_store/model_details.py +73 -0
  367. ads/feature_store/query/__init__.py +0 -0
  368. ads/feature_store/query/filter.py +266 -0
  369. ads/feature_store/query/generator/__init__.py +0 -0
  370. ads/feature_store/query/generator/query_generator.py +298 -0
  371. ads/feature_store/query/join.py +161 -0
  372. ads/feature_store/query/query.py +403 -0
  373. ads/feature_store/query/validator/__init__.py +0 -0
  374. ads/feature_store/query/validator/query_validator.py +57 -0
  375. ads/feature_store/response/__init__.py +0 -0
  376. ads/feature_store/response/response_builder.py +68 -0
  377. ads/feature_store/service/__init__.py +0 -0
  378. ads/feature_store/service/oci_dataset.py +139 -0
  379. ads/feature_store/service/oci_dataset_job.py +199 -0
  380. ads/feature_store/service/oci_entity.py +125 -0
  381. ads/feature_store/service/oci_feature_group.py +164 -0
  382. ads/feature_store/service/oci_feature_group_job.py +214 -0
  383. ads/feature_store/service/oci_feature_store.py +182 -0
  384. ads/feature_store/service/oci_lineage.py +87 -0
  385. ads/feature_store/service/oci_transformation.py +104 -0
  386. ads/feature_store/statistics/__init__.py +0 -0
  387. ads/feature_store/statistics/abs_feature_value.py +49 -0
  388. ads/feature_store/statistics/charts/__init__.py +0 -0
  389. ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
  390. ads/feature_store/statistics/charts/box_plot.py +148 -0
  391. ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
  392. ads/feature_store/statistics/charts/probability_distribution.py +68 -0
  393. ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
  394. ads/feature_store/statistics/feature_stat.py +126 -0
  395. ads/feature_store/statistics/generic_feature_value.py +33 -0
  396. ads/feature_store/statistics/statistics.py +41 -0
  397. ads/feature_store/statistics_config.py +101 -0
  398. ads/feature_store/templates/feature_store_template.yaml +45 -0
  399. ads/feature_store/transformation.py +499 -0
  400. ads/feature_store/validation_output.py +57 -0
  401. ads/hpo/__init__.py +9 -0
  402. ads/hpo/_imports.py +91 -0
  403. ads/hpo/ads_search_space.py +439 -0
  404. ads/hpo/distributions.py +325 -0
  405. ads/hpo/objective.py +280 -0
  406. ads/hpo/search_cv.py +1657 -0
  407. ads/hpo/stopping_criterion.py +75 -0
  408. ads/hpo/tuner_artifact.py +413 -0
  409. ads/hpo/utils.py +91 -0
  410. ads/hpo/validation.py +140 -0
  411. ads/hpo/visualization/__init__.py +5 -0
  412. ads/hpo/visualization/_contour.py +23 -0
  413. ads/hpo/visualization/_edf.py +20 -0
  414. ads/hpo/visualization/_intermediate_values.py +21 -0
  415. ads/hpo/visualization/_optimization_history.py +25 -0
  416. ads/hpo/visualization/_parallel_coordinate.py +169 -0
  417. ads/hpo/visualization/_param_importances.py +26 -0
  418. ads/jobs/__init__.py +53 -0
  419. ads/jobs/ads_job.py +663 -0
  420. ads/jobs/builders/__init__.py +5 -0
  421. ads/jobs/builders/base.py +156 -0
  422. ads/jobs/builders/infrastructure/__init__.py +6 -0
  423. ads/jobs/builders/infrastructure/base.py +165 -0
  424. ads/jobs/builders/infrastructure/dataflow.py +1252 -0
  425. ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
  426. ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
  427. ads/jobs/builders/infrastructure/utils.py +65 -0
  428. ads/jobs/builders/runtimes/__init__.py +5 -0
  429. ads/jobs/builders/runtimes/artifact.py +338 -0
  430. ads/jobs/builders/runtimes/base.py +325 -0
  431. ads/jobs/builders/runtimes/container_runtime.py +242 -0
  432. ads/jobs/builders/runtimes/python_runtime.py +1016 -0
  433. ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
  434. ads/jobs/cli.py +104 -0
  435. ads/jobs/env_var_parser.py +131 -0
  436. ads/jobs/extension.py +160 -0
  437. ads/jobs/schema/__init__.py +5 -0
  438. ads/jobs/schema/infrastructure_schema.json +116 -0
  439. ads/jobs/schema/job_schema.json +42 -0
  440. ads/jobs/schema/runtime_schema.json +183 -0
  441. ads/jobs/schema/validator.py +141 -0
  442. ads/jobs/serializer.py +296 -0
  443. ads/jobs/templates/__init__.py +5 -0
  444. ads/jobs/templates/container.py +6 -0
  445. ads/jobs/templates/driver_notebook.py +177 -0
  446. ads/jobs/templates/driver_oci.py +500 -0
  447. ads/jobs/templates/driver_python.py +48 -0
  448. ads/jobs/templates/driver_pytorch.py +852 -0
  449. ads/jobs/templates/driver_utils.py +615 -0
  450. ads/jobs/templates/hostname_from_env.c +55 -0
  451. ads/jobs/templates/oci_metrics.py +181 -0
  452. ads/jobs/utils.py +104 -0
  453. ads/llm/__init__.py +28 -0
  454. ads/llm/autogen/__init__.py +2 -0
  455. ads/llm/autogen/constants.py +15 -0
  456. ads/llm/autogen/reports/__init__.py +2 -0
  457. ads/llm/autogen/reports/base.py +67 -0
  458. ads/llm/autogen/reports/data.py +103 -0
  459. ads/llm/autogen/reports/session.py +526 -0
  460. ads/llm/autogen/reports/templates/chat_box.html +13 -0
  461. ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
  462. ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
  463. ads/llm/autogen/reports/utils.py +56 -0
  464. ads/llm/autogen/v02/__init__.py +4 -0
  465. ads/llm/autogen/v02/client.py +295 -0
  466. ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
  467. ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
  468. ads/llm/autogen/v02/loggers/__init__.py +6 -0
  469. ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
  470. ads/llm/autogen/v02/loggers/session_logger.py +580 -0
  471. ads/llm/autogen/v02/loggers/utils.py +86 -0
  472. ads/llm/autogen/v02/runtime_logging.py +163 -0
  473. ads/llm/chain.py +268 -0
  474. ads/llm/chat_template.py +31 -0
  475. ads/llm/deploy.py +63 -0
  476. ads/llm/guardrails/__init__.py +5 -0
  477. ads/llm/guardrails/base.py +442 -0
  478. ads/llm/guardrails/huggingface.py +44 -0
  479. ads/llm/langchain/__init__.py +5 -0
  480. ads/llm/langchain/plugins/__init__.py +5 -0
  481. ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
  482. ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
  483. ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
  484. ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
  485. ads/llm/langchain/plugins/llms/__init__.py +5 -0
  486. ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
  487. ads/llm/requirements.txt +3 -0
  488. ads/llm/serialize.py +219 -0
  489. ads/llm/serializers/__init__.py +0 -0
  490. ads/llm/serializers/retrieval_qa.py +153 -0
  491. ads/llm/serializers/runnable_parallel.py +27 -0
  492. ads/llm/templates/score_chain.jinja2 +155 -0
  493. ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
  494. ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
  495. ads/model/__init__.py +52 -0
  496. ads/model/artifact.py +573 -0
  497. ads/model/artifact_downloader.py +254 -0
  498. ads/model/artifact_uploader.py +267 -0
  499. ads/model/base_properties.py +238 -0
  500. ads/model/common/.model-ignore +66 -0
  501. ads/model/common/__init__.py +5 -0
  502. ads/model/common/utils.py +142 -0
  503. ads/model/datascience_model.py +2635 -0
  504. ads/model/deployment/__init__.py +20 -0
  505. ads/model/deployment/common/__init__.py +5 -0
  506. ads/model/deployment/common/utils.py +308 -0
  507. ads/model/deployment/model_deployer.py +466 -0
  508. ads/model/deployment/model_deployment.py +1846 -0
  509. ads/model/deployment/model_deployment_infrastructure.py +671 -0
  510. ads/model/deployment/model_deployment_properties.py +493 -0
  511. ads/model/deployment/model_deployment_runtime.py +838 -0
  512. ads/model/extractor/__init__.py +5 -0
  513. ads/model/extractor/automl_extractor.py +74 -0
  514. ads/model/extractor/embedding_onnx_extractor.py +80 -0
  515. ads/model/extractor/huggingface_extractor.py +88 -0
  516. ads/model/extractor/keras_extractor.py +84 -0
  517. ads/model/extractor/lightgbm_extractor.py +93 -0
  518. ads/model/extractor/model_info_extractor.py +114 -0
  519. ads/model/extractor/model_info_extractor_factory.py +105 -0
  520. ads/model/extractor/pytorch_extractor.py +87 -0
  521. ads/model/extractor/sklearn_extractor.py +112 -0
  522. ads/model/extractor/spark_extractor.py +89 -0
  523. ads/model/extractor/tensorflow_extractor.py +85 -0
  524. ads/model/extractor/xgboost_extractor.py +94 -0
  525. ads/model/framework/__init__.py +5 -0
  526. ads/model/framework/automl_model.py +178 -0
  527. ads/model/framework/embedding_onnx_model.py +438 -0
  528. ads/model/framework/huggingface_model.py +399 -0
  529. ads/model/framework/lightgbm_model.py +266 -0
  530. ads/model/framework/pytorch_model.py +266 -0
  531. ads/model/framework/sklearn_model.py +250 -0
  532. ads/model/framework/spark_model.py +326 -0
  533. ads/model/framework/tensorflow_model.py +254 -0
  534. ads/model/framework/xgboost_model.py +258 -0
  535. ads/model/generic_model.py +3518 -0
  536. ads/model/model_artifact_boilerplate/README.md +381 -0
  537. ads/model/model_artifact_boilerplate/__init__.py +5 -0
  538. ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
  539. ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
  540. ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
  541. ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
  542. ads/model/model_artifact_boilerplate/score.py +61 -0
  543. ads/model/model_file_description_schema.json +68 -0
  544. ads/model/model_introspect.py +331 -0
  545. ads/model/model_metadata.py +1810 -0
  546. ads/model/model_metadata_mixin.py +460 -0
  547. ads/model/model_properties.py +63 -0
  548. ads/model/model_version_set.py +739 -0
  549. ads/model/runtime/__init__.py +5 -0
  550. ads/model/runtime/env_info.py +306 -0
  551. ads/model/runtime/model_deployment_details.py +37 -0
  552. ads/model/runtime/model_provenance_details.py +58 -0
  553. ads/model/runtime/runtime_info.py +81 -0
  554. ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
  555. ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
  556. ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
  557. ads/model/runtime/utils.py +201 -0
  558. ads/model/serde/__init__.py +5 -0
  559. ads/model/serde/common.py +40 -0
  560. ads/model/serde/model_input.py +547 -0
  561. ads/model/serde/model_serializer.py +1184 -0
  562. ads/model/service/__init__.py +5 -0
  563. ads/model/service/oci_datascience_model.py +1076 -0
  564. ads/model/service/oci_datascience_model_deployment.py +500 -0
  565. ads/model/service/oci_datascience_model_version_set.py +176 -0
  566. ads/model/transformer/__init__.py +5 -0
  567. ads/model/transformer/onnx_transformer.py +324 -0
  568. ads/mysqldb/__init__.py +5 -0
  569. ads/mysqldb/mysql_db.py +227 -0
  570. ads/opctl/__init__.py +18 -0
  571. ads/opctl/anomaly_detection.py +11 -0
  572. ads/opctl/backend/__init__.py +5 -0
  573. ads/opctl/backend/ads_dataflow.py +353 -0
  574. ads/opctl/backend/ads_ml_job.py +710 -0
  575. ads/opctl/backend/ads_ml_pipeline.py +164 -0
  576. ads/opctl/backend/ads_model_deployment.py +209 -0
  577. ads/opctl/backend/base.py +146 -0
  578. ads/opctl/backend/local.py +1053 -0
  579. ads/opctl/backend/marketplace/__init__.py +9 -0
  580. ads/opctl/backend/marketplace/helm_helper.py +173 -0
  581. ads/opctl/backend/marketplace/local_marketplace.py +271 -0
  582. ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
  583. ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
  584. ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
  585. ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
  586. ads/opctl/backend/marketplace/models/__init__.py +5 -0
  587. ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
  588. ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
  589. ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
  590. ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
  591. ads/opctl/cli.py +707 -0
  592. ads/opctl/cmds.py +869 -0
  593. ads/opctl/conda/__init__.py +5 -0
  594. ads/opctl/conda/cli.py +193 -0
  595. ads/opctl/conda/cmds.py +749 -0
  596. ads/opctl/conda/config.yaml +34 -0
  597. ads/opctl/conda/manifest_template.yaml +13 -0
  598. ads/opctl/conda/multipart_uploader.py +188 -0
  599. ads/opctl/conda/pack.py +89 -0
  600. ads/opctl/config/__init__.py +5 -0
  601. ads/opctl/config/base.py +57 -0
  602. ads/opctl/config/diagnostics/__init__.py +5 -0
  603. ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
  604. ads/opctl/config/merger.py +255 -0
  605. ads/opctl/config/resolver.py +297 -0
  606. ads/opctl/config/utils.py +79 -0
  607. ads/opctl/config/validator.py +17 -0
  608. ads/opctl/config/versioner.py +68 -0
  609. ads/opctl/config/yaml_parsers/__init__.py +7 -0
  610. ads/opctl/config/yaml_parsers/base.py +58 -0
  611. ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
  612. ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
  613. ads/opctl/constants.py +66 -0
  614. ads/opctl/decorator/__init__.py +5 -0
  615. ads/opctl/decorator/common.py +129 -0
  616. ads/opctl/diagnostics/__init__.py +5 -0
  617. ads/opctl/diagnostics/__main__.py +25 -0
  618. ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
  619. ads/opctl/diagnostics/check_requirements.py +144 -0
  620. ads/opctl/diagnostics/requirement_exception.py +9 -0
  621. ads/opctl/distributed/README.md +109 -0
  622. ads/opctl/distributed/__init__.py +5 -0
  623. ads/opctl/distributed/certificates.py +32 -0
  624. ads/opctl/distributed/cli.py +207 -0
  625. ads/opctl/distributed/cmds.py +731 -0
  626. ads/opctl/distributed/common/__init__.py +5 -0
  627. ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
  628. ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
  629. ads/opctl/distributed/common/cluster_config_helper.py +103 -0
  630. ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
  631. ads/opctl/distributed/common/cluster_runner.py +54 -0
  632. ads/opctl/distributed/common/framework_factory.py +29 -0
  633. ads/opctl/docker/Dockerfile.job +103 -0
  634. ads/opctl/docker/Dockerfile.job.arm +107 -0
  635. ads/opctl/docker/Dockerfile.job.gpu +175 -0
  636. ads/opctl/docker/base-env.yaml +13 -0
  637. ads/opctl/docker/cuda.repo +6 -0
  638. ads/opctl/docker/operator/.dockerignore +0 -0
  639. ads/opctl/docker/operator/Dockerfile +41 -0
  640. ads/opctl/docker/operator/Dockerfile.gpu +85 -0
  641. ads/opctl/docker/operator/cuda.repo +6 -0
  642. ads/opctl/docker/operator/environment.yaml +8 -0
  643. ads/opctl/forecast.py +11 -0
  644. ads/opctl/index.yaml +3 -0
  645. ads/opctl/model/__init__.py +5 -0
  646. ads/opctl/model/cli.py +65 -0
  647. ads/opctl/model/cmds.py +73 -0
  648. ads/opctl/operator/README.md +4 -0
  649. ads/opctl/operator/__init__.py +31 -0
  650. ads/opctl/operator/cli.py +344 -0
  651. ads/opctl/operator/cmd.py +596 -0
  652. ads/opctl/operator/common/__init__.py +5 -0
  653. ads/opctl/operator/common/backend_factory.py +460 -0
  654. ads/opctl/operator/common/const.py +27 -0
  655. ads/opctl/operator/common/data/synthetic.csv +16001 -0
  656. ads/opctl/operator/common/dictionary_merger.py +148 -0
  657. ads/opctl/operator/common/errors.py +42 -0
  658. ads/opctl/operator/common/operator_config.py +99 -0
  659. ads/opctl/operator/common/operator_loader.py +811 -0
  660. ads/opctl/operator/common/operator_schema.yaml +130 -0
  661. ads/opctl/operator/common/operator_yaml_generator.py +152 -0
  662. ads/opctl/operator/common/utils.py +208 -0
  663. ads/opctl/operator/lowcode/__init__.py +5 -0
  664. ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
  665. ads/opctl/operator/lowcode/anomaly/README.md +207 -0
  666. ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
  667. ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
  668. ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
  669. ads/opctl/operator/lowcode/anomaly/const.py +167 -0
  670. ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
  671. ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
  672. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
  673. ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
  674. ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
  675. ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
  676. ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
  677. ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
  678. ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
  679. ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
  680. ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
  681. ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
  682. ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
  683. ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
  684. ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
  685. ads/opctl/operator/lowcode/common/__init__.py +5 -0
  686. ads/opctl/operator/lowcode/common/const.py +10 -0
  687. ads/opctl/operator/lowcode/common/data.py +116 -0
  688. ads/opctl/operator/lowcode/common/errors.py +47 -0
  689. ads/opctl/operator/lowcode/common/transformations.py +296 -0
  690. ads/opctl/operator/lowcode/common/utils.py +384 -0
  691. ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
  692. ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
  693. ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
  694. ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
  695. ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
  696. ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
  697. ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
  698. ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
  699. ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
  700. ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
  701. ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
  702. ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
  703. ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
  704. ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
  705. ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
  706. ads/opctl/operator/lowcode/forecast/README.md +209 -0
  707. ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
  708. ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
  709. ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
  710. ads/opctl/operator/lowcode/forecast/const.py +92 -0
  711. ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
  712. ads/opctl/operator/lowcode/forecast/errors.py +26 -0
  713. ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
  714. ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
  715. ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
  716. ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
  717. ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
  718. ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
  719. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
  720. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
  721. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
  722. ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
  723. ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
  724. ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
  725. ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
  726. ads/opctl/operator/lowcode/forecast/utils.py +397 -0
  727. ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
  728. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
  729. ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
  730. ads/opctl/operator/lowcode/pii/MLoperator +17 -0
  731. ads/opctl/operator/lowcode/pii/README.md +208 -0
  732. ads/opctl/operator/lowcode/pii/__init__.py +5 -0
  733. ads/opctl/operator/lowcode/pii/__main__.py +78 -0
  734. ads/opctl/operator/lowcode/pii/cmd.py +39 -0
  735. ads/opctl/operator/lowcode/pii/constant.py +84 -0
  736. ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
  737. ads/opctl/operator/lowcode/pii/errors.py +27 -0
  738. ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
  739. ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
  740. ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
  741. ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
  742. ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
  743. ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
  744. ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
  745. ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
  746. ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
  747. ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
  748. ads/opctl/operator/lowcode/pii/model/report.py +487 -0
  749. ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
  750. ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
  751. ads/opctl/operator/lowcode/pii/utils.py +43 -0
  752. ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
  753. ads/opctl/operator/lowcode/recommender/README.md +206 -0
  754. ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
  755. ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
  756. ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
  757. ads/opctl/operator/lowcode/recommender/constant.py +30 -0
  758. ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
  759. ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
  760. ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
  761. ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
  762. ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
  763. ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
  764. ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
  765. ads/opctl/operator/lowcode/recommender/utils.py +13 -0
  766. ads/opctl/operator/runtime/__init__.py +5 -0
  767. ads/opctl/operator/runtime/const.py +17 -0
  768. ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
  769. ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
  770. ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
  771. ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
  772. ads/opctl/operator/runtime/runtime.py +115 -0
  773. ads/opctl/schema.yaml.yml +36 -0
  774. ads/opctl/script.py +40 -0
  775. ads/opctl/spark/__init__.py +5 -0
  776. ads/opctl/spark/cli.py +43 -0
  777. ads/opctl/spark/cmds.py +147 -0
  778. ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
  779. ads/opctl/utils.py +344 -0
  780. ads/oracledb/__init__.py +5 -0
  781. ads/oracledb/oracle_db.py +346 -0
  782. ads/pipeline/__init__.py +39 -0
  783. ads/pipeline/ads_pipeline.py +2279 -0
  784. ads/pipeline/ads_pipeline_run.py +772 -0
  785. ads/pipeline/ads_pipeline_step.py +605 -0
  786. ads/pipeline/builders/__init__.py +5 -0
  787. ads/pipeline/builders/infrastructure/__init__.py +5 -0
  788. ads/pipeline/builders/infrastructure/custom_script.py +32 -0
  789. ads/pipeline/cli.py +119 -0
  790. ads/pipeline/extension.py +291 -0
  791. ads/pipeline/schema/__init__.py +5 -0
  792. ads/pipeline/schema/cs_step_schema.json +35 -0
  793. ads/pipeline/schema/ml_step_schema.json +31 -0
  794. ads/pipeline/schema/pipeline_schema.json +71 -0
  795. ads/pipeline/visualizer/__init__.py +5 -0
  796. ads/pipeline/visualizer/base.py +570 -0
  797. ads/pipeline/visualizer/graph_renderer.py +272 -0
  798. ads/pipeline/visualizer/text_renderer.py +84 -0
  799. ads/secrets/__init__.py +11 -0
  800. ads/secrets/adb.py +386 -0
  801. ads/secrets/auth_token.py +86 -0
  802. ads/secrets/big_data_service.py +365 -0
  803. ads/secrets/mysqldb.py +149 -0
  804. ads/secrets/oracledb.py +160 -0
  805. ads/secrets/secrets.py +407 -0
  806. ads/telemetry/__init__.py +7 -0
  807. ads/telemetry/base.py +69 -0
  808. ads/telemetry/client.py +122 -0
  809. ads/telemetry/telemetry.py +257 -0
  810. ads/templates/dataflow_pyspark.jinja2 +13 -0
  811. ads/templates/dataflow_sparksql.jinja2 +22 -0
  812. ads/templates/func.jinja2 +20 -0
  813. ads/templates/schemas/openapi.json +1740 -0
  814. ads/templates/score-pkl.jinja2 +173 -0
  815. ads/templates/score.jinja2 +322 -0
  816. ads/templates/score_embedding_onnx.jinja2 +202 -0
  817. ads/templates/score_generic.jinja2 +165 -0
  818. ads/templates/score_huggingface_pipeline.jinja2 +217 -0
  819. ads/templates/score_lightgbm.jinja2 +185 -0
  820. ads/templates/score_onnx.jinja2 +407 -0
  821. ads/templates/score_onnx_new.jinja2 +473 -0
  822. ads/templates/score_oracle_automl.jinja2 +185 -0
  823. ads/templates/score_pyspark.jinja2 +154 -0
  824. ads/templates/score_pytorch.jinja2 +219 -0
  825. ads/templates/score_scikit-learn.jinja2 +184 -0
  826. ads/templates/score_tensorflow.jinja2 +184 -0
  827. ads/templates/score_xgboost.jinja2 +178 -0
  828. ads/text_dataset/__init__.py +5 -0
  829. ads/text_dataset/backends.py +211 -0
  830. ads/text_dataset/dataset.py +445 -0
  831. ads/text_dataset/extractor.py +207 -0
  832. ads/text_dataset/options.py +53 -0
  833. ads/text_dataset/udfs.py +22 -0
  834. ads/text_dataset/utils.py +49 -0
  835. ads/type_discovery/__init__.py +9 -0
  836. ads/type_discovery/abstract_detector.py +21 -0
  837. ads/type_discovery/constant_detector.py +41 -0
  838. ads/type_discovery/continuous_detector.py +54 -0
  839. ads/type_discovery/credit_card_detector.py +99 -0
  840. ads/type_discovery/datetime_detector.py +92 -0
  841. ads/type_discovery/discrete_detector.py +118 -0
  842. ads/type_discovery/document_detector.py +146 -0
  843. ads/type_discovery/ip_detector.py +68 -0
  844. ads/type_discovery/latlon_detector.py +90 -0
  845. ads/type_discovery/phone_number_detector.py +63 -0
  846. ads/type_discovery/type_discovery_driver.py +87 -0
  847. ads/type_discovery/typed_feature.py +594 -0
  848. ads/type_discovery/unknown_detector.py +41 -0
  849. ads/type_discovery/zipcode_detector.py +48 -0
  850. ads/vault/__init__.py +7 -0
  851. ads/vault/vault.py +237 -0
  852. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/METADATA +150 -149
  853. oracle_ads-2.13.10rc0.dist-info/RECORD +858 -0
  854. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/WHEEL +1 -2
  855. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/entry_points.txt +2 -1
  856. oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
  857. oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
  858. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,1334 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2020, 2023 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ from cycler import cycler
8
+ import logging
9
+ import matplotlib as mpl
10
+ import numpy as np
11
+ from numpy.typing import ArrayLike
12
+ import pandas as pd
13
+ import re
14
+ from sklearn.preprocessing import LabelEncoder
15
+ import tempfile
16
+ from typing import List, Any
17
+
18
+ logging.getLogger("matplotlib").setLevel(logging.WARNING)
19
+ mpl.rcParams["image.cmap"] = "BuGn"
20
+ mpl.rcParams["axes.prop_cycle"] = cycler(
21
+ color=["teal", "blueviolet", "forestgreen", "peru", "y", "dodgerblue", "r"]
22
+ )
23
+
24
+ from ads.common.data import ADSData
25
+ from ads.common.decorator.runtime_dependency import (
26
+ runtime_dependency,
27
+ OptionalDependency,
28
+ )
29
+ from ads.common.decorator.deprecate import deprecated
30
+ from ads.common import logger
31
+ from ads.common.model import ADSModel
32
+ from ads.common.model_metadata import UseCaseType
33
+ from ads.dataset.dataset_with_target import ADSDatasetWithTarget
34
+ from ads.evaluations.evaluation_plot import EvaluationPlot
35
+ from ads.evaluations.statistical_metrics import (
36
+ ModelEvaluator,
37
+ DEFAULT_BIN_CLASS_METRICS,
38
+ DEFAULT_MULTI_CLASS_METRICS,
39
+ DEFAULT_REG_METRICS,
40
+ DEFAULT_BIN_CLASS_LABELS_MAP,
41
+ DEFAULT_MULTI_CLASS_LABELS_MAP,
42
+ DEFAULT_REG_LABELS_MAP,
43
+ )
44
+ from ads.model.generic_model import GenericModel, VERIFY_STATUS_NAME
45
+
46
+ METRICS_TO_MINIMIZE = ["hamming_loss", "hinge_loss", "mse", "mae"]
47
+ POSITIVE_CLASS_NAMES = ["yes", "y", "t", "true", "1"]
48
+
49
+
50
+ class Evaluator(object):
51
+ """
52
+ BETA FEATURE
53
+ Evaluator is the new and preferred way to evaluate a model of list of models.
54
+ It contains a superset of the features of the soon-to-be-deprecated ADSEvaluator.
55
+
56
+ Methods
57
+ -------
58
+ display()
59
+ Shows all plots and metrics within the jupyter notebook.
60
+ html()
61
+ Returns the raw string of the html report
62
+ save(filename)
63
+ Saves the html report to the provided file location.
64
+ add_model(model)
65
+ Adds a model to the existsing report. See documentation for more details.
66
+ add_metric(metric_fn)
67
+ Adds a metric to the existsing report. See documentation for more details.
68
+ add_plot(plotting_fn)
69
+ Adds a plot to the existing report. See documentation for more details.
70
+
71
+ """
72
+
73
+ def __init__(
74
+ self,
75
+ models: List[GenericModel],
76
+ X: ArrayLike,
77
+ y: ArrayLike,
78
+ y_preds: List[ArrayLike] = None,
79
+ y_scores: List[ArrayLike] = None,
80
+ X_train: ArrayLike = None,
81
+ y_train: ArrayLike = None,
82
+ classes: List = None,
83
+ positive_class: str = None,
84
+ legend_labels: dict = None,
85
+ use_case_type: UseCaseType = None,
86
+ ):
87
+ """Creates an ads evaluator object.
88
+
89
+ Parameters
90
+ ----------
91
+ models : ads.model.GenericModel instance
92
+ Test data to evaluate model on.
93
+ The object can be built using from one of the framworks supported in `ads.model.framework`
94
+ X : DataFrame-like
95
+ The data used to make a prediction.
96
+ Can be set to None if `y_preds` is given. (And `y_scores` for more thorough analysis).
97
+ y : array-like
98
+ The true values corresponding to the input data
99
+ y_preds : list of array-like, optional
100
+ The predictions from each model in the same order as the models
101
+ y_scores : list of array-like, optional
102
+ The predict_probas from each model in the same order as the models
103
+ X_train : DataFrame-like, optional
104
+ The data used to train the model
105
+ y_train : array-like, optional
106
+ The true values corresponding to the input training data
107
+ positive_class : str or int, optional
108
+ The class to report metrics for binary dataset. If the target classes is True or False,
109
+ positive_class will be set to True by default. If the dataset is multiclass or multilabel,
110
+ this will be ignored.
111
+ legend_labels : dict, optional
112
+ List of legend labels. Defaults to `None`.
113
+ If legend_labels not specified class names will be used for plots.
114
+ classes : List or None, optional
115
+ A List of the possible labels for y, when evaluating a classification use case
116
+ use_case_type : str, optional
117
+ The type of problem this model is solving. This can be set during `prepare()`.
118
+ Examples: "binary_classification", "regression", "multinomial_classification"
119
+ Full list of supported types can be found here: `ads.common.model_metadata.UseCaseType`
120
+
121
+ Examples
122
+ --------
123
+ >>> import tempfile
124
+ >>> from ads.evaluations.evaluator import Evaluator
125
+ >>> from sklearn.tree import DecisionTreeClassifier
126
+ >>> from sklearn.datasets import make_classification
127
+ >>> from sklearn.model_selection import train_test_split
128
+ >>> from ads.model.framework.sklearn_model import SklearnModel
129
+ >>> from ads.common.model_metadata import UseCaseType
130
+ >>>
131
+ >>> X, y = make_classification(n_samples=1000)
132
+ >>> X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
133
+ >>> est = DecisionTreeClassifier().fit(X_train, y_train)
134
+ >>> model = SklearnModel(estimator=est, artifact_dir=tempfile.mkdtemp())
135
+ >>> model.prepare(
136
+ inference_conda_env="generalml_p38_cpu_v1",
137
+ training_conda_env="generalml_p38_cpu_v1",
138
+ X_sample=X_test,
139
+ y_sample=y_test,
140
+ use_case_type=UseCaseType.BINARY_CLASSIFICATION,
141
+ )
142
+ >>> report = Evaluator([my_model], X=X_test, y=y_test)
143
+ >>> report.display()
144
+
145
+ """
146
+ self._verify_models(models)
147
+ self.X, self.y, self.X_train, self.y_train = X, y, X_train, y_train
148
+ self.legend_labels = legend_labels
149
+ self.positive_class = positive_class
150
+
151
+ self._determine_problem_type(models, use_case_type)
152
+ self._determine_classes(classes)
153
+
154
+ self.model_names = []
155
+ self.evaluation = pd.DataFrame()
156
+ self.add_models(models, y_preds=y_preds, y_scores=y_scores)
157
+
158
+ def _verify_models(self, models):
159
+ assert isinstance(
160
+ models, list
161
+ ), f"The `models` argument must be a list of models, instead got: {models}"
162
+ for m in models:
163
+ if not isinstance(m, GenericModel):
164
+ raise ValueError(
165
+ f"Please register and prepare model {m} with ads. More information here: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_registration/introduction.html#register"
166
+ )
167
+ sum_stat = m.summary_status().reset_index()
168
+ if (
169
+ sum_stat.loc[sum_stat["Step"] == VERIFY_STATUS_NAME, "Status"]
170
+ == "Not Available"
171
+ ).any():
172
+ raise ValueError(
173
+ f"Model {m} has not been prepared, and `verify` cannot be run (including the pre and post processing from the score.py). This may cause issues. Prepare the model in accordance with the documentation: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_registration/model_artifact.html#prepare-the-model-artifact"
174
+ )
175
+
176
+ def _determine_problem_type(self, models, use_case_type):
177
+ if use_case_type is not None:
178
+ self.problem_type = use_case_type
179
+ problem_type = models[0].metadata_taxonomy["UseCaseType"].value
180
+ if problem_type is not None:
181
+ for m in models:
182
+ assert (
183
+ problem_type == m.metadata_taxonomy["UseCaseType"].value
184
+ ), f"Cannot compare models of different Use Case types. The first model is of type {problem_type}, while model: {m} is of Use Case type: {m.metadata_taxonomy['UseCaseType'].value}"
185
+ self.problem_type = problem_type
186
+ else:
187
+ if not models[0].schema_output.keys:
188
+ raise ValueError(
189
+ f"The Use Case Type of this model, {models[0]}, is ambigious. Please re-run Evaluator with `use_case_type` set to a valid type (full list found here: ads.common.model_metadata.UseCaseType). To avoid setting this in the future, set the `use_case_type` when preparing the model. Or update your model's use_case_type attribute here: `model.metadata_taxonomy['UseCaseType'].value` More information here: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_registration/model_metadata.html#taxonomy-metadata"
190
+ )
191
+ logger.warn(
192
+ f"The Use Case Type of this model, {models[0]}, is ambigious. Please set the `model.metadata_taxonomy['UseCaseType'].value` attribute to one of the options in ads.common.model_metadata.UseCaseType"
193
+ )
194
+
195
+ output_col = models[0].schema_output.keys[0]
196
+ if models[0].schema_output[output_col].feature_type != "Continuous":
197
+ if len(np.unique(self.y)) == 2:
198
+ self.problem_type = UseCaseType.BINARY_CLASSIFICATION
199
+ else:
200
+ self.problem_type = UseCaseType.MULTINOMIAL_CLASSIFICATION
201
+ else:
202
+ self.problem_type = UseCaseType.REGRESSION
203
+ logger.info(f"Set Use Case Type to: {self.problem_type}")
204
+
205
+ def _determine_classes(self, classes):
206
+ if self.problem_type in [UseCaseType.REGRESSION]:
207
+ self.classes = []
208
+ self.metrics_to_show = DEFAULT_REG_METRICS
209
+ self.is_classifier = False
210
+ else:
211
+ self.is_classifier = True
212
+ self.classes = (
213
+ classes or np.unique(self.y_train)
214
+ if self.y_train is not None
215
+ else np.unique(self.y)
216
+ )
217
+ self.num_classes = len(self.classes)
218
+ if len(self.classes) == 2:
219
+ self.metrics_to_show = DEFAULT_BIN_CLASS_METRICS
220
+ if (
221
+ self.positive_class is None
222
+ or self.positive_class not in self.classes
223
+ ):
224
+ self.positive_class = next(
225
+ (
226
+ x
227
+ for x in list(self.classes)
228
+ if str(x).lower() in POSITIVE_CLASS_NAMES
229
+ ),
230
+ self.classes[0],
231
+ )
232
+ logger.info(
233
+ f"Using {self.positive_class} as the positive class. Use `positive_class` to set this value."
234
+ )
235
+ else:
236
+ self.metrics_to_show = DEFAULT_MULTI_CLASS_METRICS
237
+
238
+ def _get_model_name(self, model):
239
+ name = str(model.algorithm) + "_" + str(model.framework)
240
+ name_edit = re.sub(r" ?\([^)]+\)", "", name)
241
+ if name_edit in self.model_names:
242
+ name_edit += "_1"
243
+ num_tries = 1
244
+ while name_edit in self.model_names:
245
+ num_tries += 1
246
+ name_edit = name_edit[:-1] + str(num_tries)
247
+ self.model_names.append(name_edit)
248
+ return name_edit
249
+
250
+ def _score_data(self, model, X):
251
+ y_pred = model.verify(X)["prediction"]
252
+
253
+ y_score = None
254
+ # we will compute y_score only for binary classification cases because only for binary classification can
255
+ # we use it for ROC Curves and AUC
256
+ if self.is_classifier and hasattr(model.estimator, "predict_proba"):
257
+ if len(self.classes) == 2:
258
+ # positive label index is assumed to be 0 if the ADSModel does not have a positive class defined
259
+ positive_class_index = 0
260
+ # For prediction probability, we only consider the positive class.
261
+ if self.positive_class is not None:
262
+ if self.positive_class not in list(self.classes):
263
+ raise ValueError(
264
+ "Invalid positive class '%s' for model %s. Positive class should be one of %s."
265
+ % (
266
+ self.positive_class,
267
+ model.estimator.__class__.__name__,
268
+ list(self.classes),
269
+ )
270
+ )
271
+ positive_class_index = list(self.classes).index(self.positive_class)
272
+ y_score = model.estimator.predict_proba(X)[:, positive_class_index]
273
+ else:
274
+ y_score = model.estimator.predict_proba(X)
275
+ return y_pred, y_score
276
+
277
+ def add_models(
278
+ self,
279
+ models: List[GenericModel],
280
+ y_preds: List[Any] = None,
281
+ y_scores: List[Any] = None,
282
+ ):
283
+ """Add a model to an existing Evaluator to avoid re-calculating the values.
284
+
285
+ Parameters
286
+ ----------
287
+ models : List[ads.model.GenericModel]
288
+ Test data to evaluate model on.
289
+ The object can be built using from one of the framworks supported in `ads.model.framework`
290
+ y_preds : list of array-like, optional
291
+ The predictions from each model in the same order as the models
292
+ y_scores : list of array-like, optional
293
+ The predict_probas from each model in the same order as the models
294
+
295
+ Returns
296
+ -------
297
+ self
298
+
299
+ Examples
300
+ --------
301
+ >>> evaluator = Evaluator(models = [model1, model2], X=X, y=y)
302
+ >>> evaluator.add_models(models = [model3])
303
+ """
304
+
305
+ assert isinstance(models, List), "The `models` parameter must be of type list."
306
+ if self.is_classifier:
307
+ self._le = LabelEncoder().fit(self.y)
308
+ for i, m in enumerate(models):
309
+ m_name = self._get_model_name(m)
310
+
311
+ if y_preds is None:
312
+ y_pred, y_score = self._score_data(m, self.X)
313
+ else:
314
+ y_pred = y_preds[i]
315
+ y_score = y_scores[i] if isinstance(y_scores, list) else None
316
+ if self.is_classifier:
317
+ y_true, y_pred = self._le.transform(self.y), self._le.transform(y_pred)
318
+ classes = self._le.transform(self.classes)
319
+ pos_class = None
320
+ if len(self.classes) == 2:
321
+ pos_class = self._le.transform([self.positive_class])[0]
322
+ else:
323
+ y_true, y_pred, classes, pos_class = self.y, y_pred, None, None
324
+ new_model_metrics = ModelEvaluator(
325
+ y_true=y_true,
326
+ y_pred=y_pred,
327
+ model_name=m_name,
328
+ classes=classes,
329
+ y_score=y_score,
330
+ positive_class=pos_class,
331
+ ).get_metrics()
332
+ self.evaluation = pd.concat(
333
+ [self.evaluation, new_model_metrics], axis=1, sort=False
334
+ )
335
+ return self
336
+
337
+ @runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
338
+ @runtime_dependency(
339
+ module="ipywidgets",
340
+ object="HTML",
341
+ install_from=OptionalDependency.NOTEBOOK,
342
+ )
343
+ def display(
344
+ self,
345
+ plots=None,
346
+ perfect=False,
347
+ baseline=True,
348
+ legend_labels=None,
349
+ precision=4,
350
+ metrics_labels=None,
351
+ ):
352
+ """Visualize evaluation report.
353
+
354
+ Parameters
355
+ ----------
356
+ plots : list, optional
357
+ Filter the plots that are displayed. Defaults to None. The name of the plots are as below:
358
+
359
+ - regression - residuals_qq, residuals_vs_fitted
360
+ - binary classification - normalized_confusion_matrix, roc_curve, pr_curve
361
+ - multi class classification - normalized_confusion_matrix, precision_by_label, recall_by_label, f1_by_label
362
+ perfect: bool, optional (default False)
363
+ If True, will show how a perfect classifier would perform.
364
+ baseline: bool, optional (default True)
365
+ If True, will show how a random classifier would perform.
366
+ legend_labels : dict, optional
367
+ Rename legend labels, that used for multi class classification plots. Defaults to None.
368
+ legend_labels dict keys are the same as class names. legend_labels dict values are strings.
369
+ If legend_labels not specified class names will be used for plots.
370
+ precision: int, optional (default 4)
371
+ The number of decimal points to show for each score/loss value
372
+ metrics_labels: List, optional
373
+ The metrics that should be included in the html table.
374
+
375
+ Returns
376
+ -------
377
+ None
378
+ Nothing. Outputs several evaluation plots as specified by `plots`.
379
+
380
+ Examples
381
+ --------
382
+
383
+ >>> evaluator = Evaluator(models=[model1, model2], X=X, y=y)
384
+ >>> evaluator.display()
385
+
386
+ >>> legend_labels={'class_0': 'green', 'class_1': 'yellow', 'class_2': 'red'}
387
+ >>> multi_evaluator = Evaluator(models=[model1, model2], X=X, y=y, legend_labels=legend_labels)
388
+ >>> multi_evaluator.display(plots=["normalized_confusion_matrix",
389
+ ... "precision_by_label", "recall_by_label", "f1_by_label"])
390
+ """
391
+ from IPython.core.display import display, HTML
392
+
393
+ legend_labels = (
394
+ legend_labels if legend_labels is not None else self.legend_labels
395
+ )
396
+ if legend_labels is None and self.is_classifier:
397
+ legend_labels = dict(
398
+ zip([str(x) for x in self._le.transform(self.classes)], self.classes)
399
+ )
400
+ # pass to plotting class
401
+ self._get_plots_html(
402
+ plots=plots, perfect=perfect, baseline=baseline, legend_labels=legend_labels
403
+ )
404
+ display(
405
+ HTML(self._get_metrics_html(precision=precision, labels=metrics_labels))
406
+ )
407
+
408
+ def html(
409
+ self,
410
+ plots=None,
411
+ perfect=False,
412
+ baseline=True,
413
+ legend_labels=None,
414
+ precision=4,
415
+ metrics_labels=None,
416
+ ):
417
+ """Get raw HTML report.
418
+
419
+ Parameters
420
+ ----------
421
+ plots : list, optional
422
+ Filter the plots that are displayed. Defaults to None. The name of the plots are as below:
423
+
424
+ - regression - residuals_qq, residuals_vs_fitted
425
+ - binary classification - normalized_confusion_matrix, roc_curve, pr_curve
426
+ - multi class classification - normalized_confusion_matrix, precision_by_label, recall_by_label, f1_by_label
427
+ perfect: bool, optional (default False)
428
+ If True, will show how a perfect classifier would perform.
429
+ baseline: bool, optional (default True)
430
+ If True, will show how a random classifier would perform.
431
+ legend_labels : dict, optional
432
+ Rename legend labels, that used for multi class classification plots. Defaults to None.
433
+ legend_labels dict keys are the same as class names. legend_labels dict values are strings.
434
+ If legend_labels not specified class names will be used for plots.
435
+ precision: int, optional (default 4)
436
+ The number of decimal points to show for each score/loss value
437
+ metrics_labels: List, optional
438
+ The metrics that should be included in the html table.
439
+
440
+ Returns
441
+ -------
442
+ None
443
+ Nothing. Outputs several evaluation plots as specified by `plots`.
444
+
445
+ Examples
446
+ --------
447
+
448
+ >>> evaluator = Evaluator(models=[model1, model2], X=X, y=y)
449
+ >>> raw_html = evaluator.html()
450
+ """
451
+ html_plots = self._get_plots_html(
452
+ plots=plots, perfect=perfect, baseline=baseline, legend_labels=legend_labels
453
+ )
454
+ html_metrics = self._get_metrics_html(
455
+ precision=precision, labels=metrics_labels
456
+ )
457
+ html_raw = (
458
+ "<h1>Evaluation Report</h1> \
459
+ <h2>Evaluation Plots</h2> "
460
+ + " \
461
+ ".join(
462
+ html_plots
463
+ )
464
+ + f"<h2>Evaluation Metrics</h2> \
465
+ <p> {html_metrics} </p>"
466
+ )
467
+ return html_raw
468
+
469
+ def save(self, filename: str, **kwargs):
470
+ """Save HTML report.
471
+
472
+ Parameters
473
+ ----------
474
+ filename: str
475
+ The name and path of where to save the html report.
476
+ plots : list, optional
477
+ Filter the plots that are displayed. Defaults to None. The name of the plots are as below:
478
+
479
+ - regression - residuals_qq, residuals_vs_fitted
480
+ - binary classification - normalized_confusion_matrix, roc_curve, pr_curve
481
+ - multi class classification - normalized_confusion_matrix, precision_by_label, recall_by_label, f1_by_label
482
+ perfect: bool, optional (default False)
483
+ If True, will show how a perfect classifier would perform.
484
+ baseline: bool, optional (default True)
485
+ If True, will show how a random classifier would perform.
486
+ legend_labels : dict, optional
487
+ Rename legend labels, that used for multi class classification plots. Defaults to None.
488
+ legend_labels dict keys are the same as class names. legend_labels dict values are strings.
489
+ If legend_labels not specified class names will be used for plots.
490
+ precision: int, optional (default 4)
491
+ The number of decimal points to show for each score/loss value
492
+ metrics_labels: List, optional
493
+ The metrics that should be included in the html table.
494
+
495
+ Returns
496
+ -------
497
+ None
498
+ Nothing. Outputs several evaluation plots as specified by `plots`.
499
+
500
+ Examples
501
+ --------
502
+
503
+ >>> evaluator = Evaluator(models=[model1, model2], X=X, y=y)
504
+ >>> evaluator.save("report.html")
505
+ """
506
+ raw_html = self.html(**kwargs)
507
+ with open(filename, "w") as f:
508
+ f.write(raw_html)
509
+
510
+ def _get_plots_html(
511
+ self,
512
+ plots=None,
513
+ perfect=False,
514
+ baseline=True,
515
+ legend_labels=None,
516
+ ):
517
+ return EvaluationPlot.plot(
518
+ self.evaluation, plots, len(self.classes), perfect, baseline, legend_labels
519
+ )
520
+
521
+ def _get_metrics_html(self, precision=4, labels=None):
522
+ def highlight_max(s):
523
+ """Highlight the maximum in a Series yellow.
524
+
525
+ Parameters
526
+ ----------
527
+ s : series object
528
+ the series being evaluated
529
+
530
+ Returns
531
+ -------
532
+ list
533
+ containing background color data or empty if not max
534
+ """
535
+ if s.name not in METRICS_TO_MINIMIZE:
536
+ is_max = s == s.max()
537
+ else:
538
+ is_max = s == s.min()
539
+ return ["background-color: lightgreen" if v else "" for v in is_max]
540
+
541
+ def _pretty_label(df, labels, copy=True):
542
+ """
543
+ Output specified labels in proper format.
544
+ If the labels are provided in then used them. Otherwise, use default.
545
+
546
+ Parameters
547
+ ----------
548
+ labels : dictionary
549
+ map printing specific labels for metrics display
550
+
551
+ Returns
552
+ -------
553
+ dataframe
554
+ dataframe with index names modified according to input labels
555
+ """
556
+ df_display = df.loc[list(labels.keys())]
557
+
558
+ if copy:
559
+ df_display = df_display.copy()
560
+ for k, v in labels.items():
561
+ df_display.rename(index={k: v}, inplace=True)
562
+ return df_display
563
+
564
+ if labels is None:
565
+ if self.is_classifier:
566
+ if len(self.classes) == 2:
567
+ labels = DEFAULT_BIN_CLASS_LABELS_MAP
568
+ else:
569
+ labels = DEFAULT_MULTI_CLASS_LABELS_MAP
570
+ else:
571
+ labels = DEFAULT_REG_LABELS_MAP
572
+ html_raw = (
573
+ _pretty_label(self.evaluation, labels)
574
+ .style.apply(highlight_max, axis=1)
575
+ .format(precision=precision)
576
+ .set_properties(**{"text-align": "center"})
577
+ .set_table_attributes("class=table")
578
+ .set_caption(
579
+ '<div align="left"><b style="font-size:20px;">'
580
+ + "Evaluation Metrics:</b></div>"
581
+ )
582
+ .to_html()
583
+ )
584
+ return html_raw
585
+
586
+
587
+ class ADSEvaluator(object):
588
+ """ADS Evaluator class. This class holds field and methods for creating and using
589
+ ADS evaluator objects.
590
+
591
+ Attributes
592
+ ----------
593
+ evaluations : list[DataFrame]
594
+ list of evaluations.
595
+ is_classifier : bool
596
+ Whether the dataset looks like a classification problem (versus regression).
597
+ legend_labels : dict
598
+ List of legend labels. Defaults to `None`.
599
+ metrics_to_show : list[str]
600
+ Names of metrics to show.
601
+ models : list[ads.common.model.ADSModel]
602
+ The object built using `ADSModel.from_estimator()`.
603
+ positive_class : str or int
604
+ The class to report metrics for binary dataset, assumed to be true.
605
+ show_full_name :bool
606
+ Whether to show the name of the evaluator in relevant contexts.
607
+ test_data : ads.common.data.ADSData
608
+ Test data to evaluate model on.
609
+ training_data : ads.common.data.ADSData
610
+ Training data to evaluate model.
611
+
612
+ Positive_Class_names : list
613
+ Class attribute listing the ways to represent positive classes
614
+
615
+ Methods
616
+ -------
617
+ add_metrics(func, names)
618
+ Adds the listed metics to the evaluator it is called on
619
+ del_metrics(names)
620
+ Removes listed metrics from the evaluator object it is called on
621
+ add_models(models, show_full_name)
622
+ Adds the listed models to the evaluator object
623
+ del_models(names)
624
+ Removes the listed models from the evaluator object
625
+ show_in_notebook(plots, use_training_data, perfect, baseline, legend_labels)
626
+ Visualize evalutation plots in the notebook
627
+ calculate_cost(tn_weight, fp_weight, fn_weight, tp_weight, use_training_data)
628
+ Returns a cost associated with the input weights
629
+ """
630
+
631
+ Positive_Class_Names = ["yes", "y", "t", "true", "1"]
632
+
633
+ def __init__(
634
+ self,
635
+ test_data,
636
+ models,
637
+ training_data=None,
638
+ positive_class=None,
639
+ legend_labels=None,
640
+ show_full_name=False,
641
+ classes=None,
642
+ classification_threshold=50,
643
+ ):
644
+ """Creates an ads evaluator object.
645
+
646
+ Parameters
647
+ ----------
648
+ test_data : ads.common.data.ADSData instance
649
+ Test data to evaluate model on.
650
+ The object can be built using `ADSData.build()`.
651
+ models : list[ads.common.model.ADSModel]
652
+ The object can be built using `ADSModel.from_estimator()`.
653
+ Maximum length of the list is 3
654
+ training_data : ads.common.data.ADSData instance, optional
655
+ Training data to evaluate model on and compare metrics against test data.
656
+ The object can be built using `ADSData.build()`
657
+ positive_class : str or int, optional
658
+ The class to report metrics for binary dataset. If the target classes is True or False,
659
+ positive_class will be set to True by default. If the dataset is multiclass or multilabel,
660
+ this will be ignored.
661
+ legend_labels : dict, optional
662
+ List of legend labels. Defaults to `None`.
663
+ If legend_labels not specified class names will be used for plots.
664
+ show_full_name : bool, optional
665
+ Show the name of the evaluator object. Defaults to `False`.
666
+ classes : List or None, optional
667
+ A List of the possible labels for y, when evaluating a classification use case
668
+ classification_threshold : int, defaults to 50
669
+ The maximum number of unique values that y must have to qualify as classification.
670
+ If this threshold is exceeded, Evaluator assumes the model is regression.
671
+
672
+ Examples
673
+ --------
674
+
675
+ >>> train, test = ds.train_test_split()
676
+ >>> model1 = MyModelClass1.train(train)
677
+ >>> model2 = MyModelClass2.train(train)
678
+ >>> evaluator = ADSEvaluator(test, [model1, model2])
679
+
680
+ >>> legend_labels={'class_0': 'one', 'class_1': 'two', 'class_2': 'three'}
681
+ >>> multi_evaluator = ADSEvaluator(test, models=[model1, model2],
682
+ ... legend_labels=legend_labels)
683
+
684
+ """
685
+ if any(isinstance(m, ADSModel) for m in models):
686
+ logger.warn(
687
+ f"ADSModel is being deprecated. Users should instead use GenericModel or one of its subclasses. More information here: https://accelerated-data-science.readthedocs.io/en/latest/user_guide/model_registration/introduction.html#register"
688
+ )
689
+ self.evaluations = []
690
+ if isinstance(training_data, ADSDatasetWithTarget):
691
+ training_data, _ = training_data.train_test_split(test_size=0.0)
692
+ if isinstance(test_data, ADSDatasetWithTarget):
693
+ test_data, _ = test_data.train_test_split(test_size=0.0)
694
+
695
+ if not isinstance(test_data, ADSData):
696
+ raise ValueError(
697
+ "Expected test_data to be of type ADSData. More information here: https://accelerated-data-science.readthedocs.io/en/latest/ads.common.html#ads.common.data.ADSData"
698
+ )
699
+ if training_data and not isinstance(training_data, ADSData):
700
+ raise ValueError(
701
+ "Expected training_data to be of type ADSData. More information here: https://accelerated-data-science.readthedocs.io/en/latest/ads.common.html#ads.common.data.ADSData"
702
+ )
703
+ assert isinstance(
704
+ models, list
705
+ ), "The `models` argument should be a list of GenericModels. More information here: https://accelerated-data-science.readthedocs.io/en/latest/ads.common.html#ads.common.data.ADSData"
706
+
707
+ self.test_data = test_data
708
+ self.training_data = training_data
709
+ self.classes = []
710
+ self.is_classifier = (
711
+ hasattr(models[0], "classes_") and models[0].classes_ is not None
712
+ )
713
+ pclass = positive_class
714
+ if self.is_classifier:
715
+ self.classes = list(models[0].classes_)
716
+ if len(self.classes) == 2:
717
+ self.metrics_to_show = [
718
+ "accuracy",
719
+ "hamming_loss",
720
+ "precision",
721
+ "recall",
722
+ "f1",
723
+ "auc",
724
+ ]
725
+ if positive_class is None or positive_class not in self.classes:
726
+ pclass = next(
727
+ (
728
+ x
729
+ for x in list(self.classes)
730
+ if str(x).lower() in ADSEvaluator.Positive_Class_Names
731
+ ),
732
+ self.classes[0],
733
+ )
734
+ logger.info(
735
+ f"Using {pclass} as the positive class. Use `positive_class` to set this value."
736
+ )
737
+ else:
738
+ # Multi-class
739
+ self.metrics_to_show = [
740
+ "accuracy",
741
+ "hamming_loss",
742
+ "precision_weighted",
743
+ "precision_micro",
744
+ "recall_weighted",
745
+ "recall_micro",
746
+ "f1_weighted",
747
+ "f1_micro",
748
+ ]
749
+ else:
750
+ # Regression
751
+ self.metrics_to_show = ["r2_score", "mse", "mae"]
752
+ self.positive_class = pclass
753
+ self.legend_labels = legend_labels
754
+
755
+ for m in models:
756
+ if not (isinstance(m, ADSModel)):
757
+ try:
758
+ m = ADSModel.from_estimator(m.est)
759
+ except:
760
+ logger.info("This model cannot be converted to an ADS Model.")
761
+ self.evaluations = [pd.DataFrame(), pd.DataFrame()]
762
+ self.model_names = []
763
+ self.add_models(models, show_full_name=show_full_name)
764
+
765
+ def add_metrics(self, funcs, names):
766
+ """Adds the listed metrics to the evaluator object it is called on.
767
+
768
+ Parameters
769
+ ----------
770
+ funcs : list
771
+ The list of metrics to be added. This function will be provided `y_true`
772
+ and `y_pred`, the true and predicted values for each model.
773
+ names : list[str])
774
+ The list of metric names corresponding to the functions.
775
+
776
+ Returns
777
+ -------
778
+ Nothing
779
+
780
+ Examples
781
+ --------
782
+ >>> def f1(y_true, y_pred):
783
+ ... return np.max(y_true - y_pred)
784
+ >>> evaluator = ADSEvaluator(test, [model1, model2])
785
+ >>> evaluator.add_metrics([f1], ['Max Residual'])
786
+ >>> evaluator.metrics
787
+ Output table will include the desired metric
788
+ """
789
+
790
+ if len(funcs) != len(names):
791
+ raise ValueError("Could not find 1 unique name for each function")
792
+ for name, f in zip(names, funcs):
793
+ f_res = []
794
+ for m in self.evaluations[1].columns:
795
+ res = f(
796
+ self.evaluations[1][m]["y_true"], self.evaluations[1][m]["y_pred"]
797
+ )
798
+ f_res.append(res)
799
+ pd_res = pd.DataFrame(
800
+ [f_res], columns=self.evaluations[1].columns, index=[name]
801
+ )
802
+ self.evaluations[1] = pd.concat([self.evaluations[1], pd_res])
803
+ if self.evaluations[0].shape != (0, 0):
804
+ f_res = []
805
+ for m in self.evaluations[0].columns:
806
+ res = f(
807
+ self.evaluations[0][m]["y_true"],
808
+ self.evaluations[0][m]["y_pred"],
809
+ )
810
+ f_res.append(res)
811
+ pd_res = pd.DataFrame(
812
+ [f_res], columns=self.evaluations[0].columns, index=[name]
813
+ )
814
+ self.evaluations[0] = pd.concat([self.evaluations[0], pd_res])
815
+ if name not in self.metrics_to_show:
816
+ self.metrics_to_show.append(name)
817
+ setattr(self, "train_evaluations", self.evaluations[0])
818
+ setattr(self, "test_evaluations", self.evaluations[1])
819
+
820
+ def del_metrics(self, names):
821
+ """Removes the listed metrics from the evaluator object it is called on.
822
+
823
+ Parameters
824
+ ----------
825
+ names : list[str]
826
+ The list of names of metrics to be deleted. Names can be found by calling
827
+ `evaluator.test_evaluations.index`.
828
+
829
+ Returns
830
+ -------
831
+ None
832
+ `None`
833
+
834
+ Examples
835
+ --------
836
+ >>> evaluator = ADSEvaluator(test, [model1, model2])
837
+ >>> evaluator.del_metrics(['mse])
838
+ >>> evaluator.metrics
839
+ Output table will exclude the desired metric
840
+ """
841
+ self.evaluations[1].drop(index=names, inplace=True)
842
+ if self.evaluations[0].shape != (0, 0):
843
+ self.evaluations[0].drop(index=names, inplace=True)
844
+ self.metrics_to_show = [met for met in self.metrics_to_show if met not in names]
845
+
846
+ def add_models(self, models, show_full_name=False):
847
+ """Adds the listed models to the evaluator object it is called on.
848
+
849
+ Parameters
850
+ ----------
851
+ models : list[ADSModel]
852
+ The list of models to be added
853
+ show_full_name : bool, optional
854
+ Whether to show the full model name. Defaults to False.
855
+ ** NOT USED **
856
+
857
+ Returns
858
+ -------
859
+ Nothing
860
+
861
+ Examples
862
+ --------
863
+ >>> evaluator = ADSEvaluator(test, [model1, model2])
864
+ >>> evaluator.add_models("model3])
865
+ """
866
+
867
+ if type(models) is list:
868
+ total_train_metrics = self.evaluations[0]
869
+ total_test_metrics = self.evaluations[1]
870
+ for i, m in enumerate(models):
871
+ # if hasattr(m, 'classes_') != self.is_classifier:
872
+ # raise ValueError("All models should belong to same problem type.")
873
+ # calculate evaluations on testing and training data (if X_train is not None)
874
+ m_name = self._get_model_name(m.name)
875
+
876
+ if self.training_data is not None:
877
+ y_pred, y_score = self._score_data(m, self.training_data.X)
878
+ train_metrics = ModelEvaluator(
879
+ y_true=self.training_data.y,
880
+ y_pred=y_pred,
881
+ model_name=m_name,
882
+ classes=m.classes_ if self.is_classifier else None,
883
+ y_score=y_score,
884
+ positive_class=self.positive_class,
885
+ ).get_metrics()
886
+ total_train_metrics = pd.concat(
887
+ [total_train_metrics, train_metrics], axis=1
888
+ )
889
+
890
+ y_pred, y_score = self._score_data(m, self.test_data.X)
891
+ test_metrics = ModelEvaluator(
892
+ y_true=self.test_data.y,
893
+ y_pred=y_pred,
894
+ model_name=m_name,
895
+ classes=m.classes_ if self.is_classifier else None,
896
+ y_score=y_score,
897
+ positive_class=self.positive_class,
898
+ ).get_metrics()
899
+ total_test_metrics = pd.concat(
900
+ [total_test_metrics, test_metrics], axis=1, sort=False
901
+ )
902
+
903
+ self.evaluations = [total_train_metrics, total_test_metrics]
904
+ setattr(self, "train_evaluations", self.evaluations[0])
905
+ setattr(self, "test_evaluations", self.evaluations[1])
906
+
907
+ def del_models(self, names):
908
+ """Removes the listed models from the evaluator object it is called on.
909
+
910
+ Parameters
911
+ ----------
912
+ names : list[str]
913
+ the list of models to be delete. Names are the model names by default, and
914
+ assigned internally when conflicts exist. Actual names can be found using
915
+ `evaluator.test_evaluations.columns`
916
+
917
+ Returns
918
+ -------
919
+ Nothing
920
+
921
+ Examples
922
+ --------
923
+ >>> model3.rename("model3")
924
+ >>> evaluator = ADSEvaluator(test, [model1, model2, model3])
925
+ >>> evaluator.del_models([model3])
926
+ """
927
+
928
+ if type(names) is list:
929
+ self.model_names = [n for n in self.model_names if n not in names]
930
+ self.evaluations[1].drop(columns=names, inplace=True)
931
+ if self.evaluations[0].shape != (0, 0):
932
+ self.evaluations[0].drop(columns=names, inplace=True)
933
+
934
+ def show_in_notebook(
935
+ self,
936
+ plots=None,
937
+ use_training_data=False,
938
+ perfect=False,
939
+ baseline=True,
940
+ legend_labels=None,
941
+ ):
942
+ """Visualize evaluation plots.
943
+
944
+ Parameters
945
+ ----------
946
+ plots : list, optional
947
+ Filter the plots that are displayed. Defaults to None. The name of the plots are as below:
948
+
949
+ - regression - residuals_qq, residuals_vs_fitted
950
+ - binary classification - normalized_confusion_matrix, roc_curve, pr_curve
951
+ - multi class classification - normalized_confusion_matrix, precision_by_label, recall_by_label, f1_by_label
952
+
953
+ use_training_data : bool, optional
954
+ Use training data to generate plots. Defaults to `False`.
955
+ By default, this method uses test data to generate plots
956
+ legend_labels : dict, optional
957
+ Rename legend labels, that used for multi class classification plots. Defaults to None.
958
+ legend_labels dict keys are the same as class names. legend_labels dict values are strings.
959
+ If legend_labels not specified class names will be used for plots.
960
+
961
+ Returns
962
+ -------
963
+ None
964
+ Nothing. Outputs several evaluation plots as specified by `plots`.
965
+
966
+ Examples
967
+ --------
968
+
969
+ >>> evaluator = ADSEvaluator(test, [model1, model2])
970
+ >>> evaluator.show_in_notebook()
971
+
972
+ >>> legend_labels={'class_0': 'green', 'class_1': 'yellow', 'class_2': 'red'}
973
+ >>> multi_evaluator = ADSEvaluator(test, [model1, model2],
974
+ ... legend_labels=legend_labels)
975
+ >>> multi_evaluator.show_in_notebook(plots=["normalized_confusion_matrix",
976
+ ... "precision_by_label", "recall_by_label", "f1_by_label"])
977
+ """
978
+
979
+ # get evaluations
980
+ if use_training_data:
981
+ if self.training_data is None:
982
+ raise ValueError(
983
+ "Training data is not provided. Re-build ADSData with training and test data"
984
+ )
985
+ model_evaluation = self.evaluations[0]
986
+ else:
987
+ model_evaluation = self.evaluations[1]
988
+ legend_labels = (
989
+ legend_labels if legend_labels is not None else self.legend_labels
990
+ )
991
+ # pass to plotting class
992
+ EvaluationPlot.plot(
993
+ model_evaluation, plots, len(self.classes), perfect, baseline, legend_labels
994
+ )
995
+
996
+ def calculate_cost(
997
+ self, tn_weight, fp_weight, fn_weight, tp_weight, use_training_data=False
998
+ ):
999
+ """Returns a cost associated with the input weights.
1000
+
1001
+ Parameters
1002
+ ----------
1003
+ tn_weight : int, float
1004
+ The weight to assign true negatives in calculating the cost
1005
+ fp_weight : int, float
1006
+ The weight to assign false positives in calculating the cost
1007
+ fn_weight : int, float
1008
+ The weight to assign false negatives in calculating the cost
1009
+ tp_weight : int, float
1010
+ The weight to assign true positives in calculating the cost
1011
+ use_training_data : bool, optional
1012
+ Use training data to pull the metrics. Defaults to False
1013
+
1014
+ Returns
1015
+ -------
1016
+ :class:`pandas.DataFrame`
1017
+ DataFrame with the cost calculated for each model
1018
+
1019
+ Examples
1020
+ --------
1021
+ >>> evaluator = ADSEvaluator(test, [model1, model2])
1022
+ >>> costs_table = evaluator.calculate_cost(0, 10, 1000, 0)
1023
+ """
1024
+
1025
+ if len(self.classes) != 2:
1026
+ raise ValueError(
1027
+ "The calculate_cost api is not supported for non-binary classification datasets."
1028
+ )
1029
+ cost_per_model = []
1030
+ if use_training_data:
1031
+ if self.training_data is None:
1032
+ raise ValueError(
1033
+ "Training data is not provided. Re-build ADSData with training and test data."
1034
+ )
1035
+ ev = self.evaluations[0]
1036
+ else:
1037
+ ev = self.evaluations[1]
1038
+ list_of_model = ev.columns
1039
+ for m in list_of_model:
1040
+ tn, fp, fn, tp = ev[m]["raw_confusion_matrix"].ravel()
1041
+ cost_per_model.append(
1042
+ tn * tn_weight + fp * fp_weight + fn * fn_weight + tp * tp_weight
1043
+ )
1044
+ cost_df = pd.DataFrame({"model": list_of_model, "cost": cost_per_model})
1045
+ return cost_df
1046
+
1047
+ class EvaluationMetrics(object):
1048
+ """Class holding evaluation metrics.
1049
+
1050
+ Attributes
1051
+ ----------
1052
+ ev_test : list
1053
+ evaluation test metrics
1054
+ ev_train : list
1055
+ evaluation training metrics
1056
+ use_training : bool
1057
+ use training data
1058
+ less_is_more : list
1059
+ metrics list
1060
+
1061
+ Methods
1062
+ -------
1063
+ show_in_notebook()
1064
+ Shows visualization metrics as a color coded table
1065
+
1066
+ """
1067
+
1068
+ DEFAULT_LABELS_MAP = {
1069
+ "accuracy": "Accuracy",
1070
+ "hamming_loss": "Hamming distance",
1071
+ "kappa_score_": "Cohen's kappa coefficient",
1072
+ "precision": "Precision",
1073
+ "recall": "Recall",
1074
+ "f1": "F1",
1075
+ "auc": "ROC AUC",
1076
+ }
1077
+
1078
+ def __init__(
1079
+ self, ev_test, ev_train, use_training=False, less_is_more=None, precision=4
1080
+ ):
1081
+ self.ev_test = ev_test
1082
+ self.ev_train = ev_train
1083
+ self.use_training = use_training
1084
+ self.precision = precision
1085
+ if isinstance(less_is_more, list):
1086
+ self.less_is_more = [
1087
+ "hamming_loss",
1088
+ "hinge_loss",
1089
+ "mse",
1090
+ "mae",
1091
+ ] + less_is_more
1092
+ else:
1093
+ self.less_is_more = ["hamming_loss", "hinge_loss", "mse", "mae"]
1094
+
1095
+ def __repr__(self):
1096
+ self.show_in_notebook()
1097
+ return ""
1098
+
1099
+ @property
1100
+ def precision(self):
1101
+ return self._precision
1102
+
1103
+ @precision.setter
1104
+ def precision(self, value):
1105
+ """
1106
+ Set precision to @property of the class.
1107
+ """
1108
+ if not isinstance(value, int):
1109
+ if not (isinstance(value, float) and value.is_integer()):
1110
+ raise TypeError("'value' must be integer")
1111
+ value = int(value)
1112
+ if value < 0:
1113
+ raise ValueError("'value' must be non-negative")
1114
+ self._precision = value
1115
+
1116
+ def show_in_notebook(self, labels=DEFAULT_LABELS_MAP):
1117
+ """
1118
+ Visualizes evaluation metrics as a color coded table.
1119
+
1120
+ Parameters
1121
+ ----------
1122
+ labels : dictionary
1123
+ map printing specific labels for metrics display
1124
+
1125
+ Returns
1126
+ -------
1127
+ Nothing
1128
+ """
1129
+
1130
+ def highlight_max(s):
1131
+ """Highlight the maximum in a Series yellow.
1132
+
1133
+ Parameters
1134
+ ----------
1135
+ s : series object
1136
+ the series being evaluated
1137
+
1138
+ Returns
1139
+ -------
1140
+ list
1141
+ containing background color data or empty if not max
1142
+ """
1143
+ if s.name not in self.less_is_more:
1144
+ is_max = s == s.max()
1145
+ else:
1146
+ is_max = s == s.min()
1147
+ return ["background-color: lightgreen" if v else "" for v in is_max]
1148
+
1149
+ table_styles = [
1150
+ dict(props=[("text-align", "right")]),
1151
+ dict(selector="caption", props=[("caption-side", "top")]),
1152
+ ]
1153
+
1154
+ def _pretty_label(df, labels, copy=False):
1155
+ """
1156
+ Output specified labels in proper format.
1157
+ If the labels are provided in then used them. Otherwise, use default.
1158
+
1159
+ Parameters
1160
+ ----------
1161
+ labels : dictionary
1162
+ map printing specific labels for metrics display
1163
+
1164
+ Returns
1165
+ -------
1166
+ dataframe
1167
+ dataframe with index names modified according to input labels
1168
+ """
1169
+ if copy:
1170
+ df = df.copy()
1171
+ for k, v in labels.items():
1172
+ df.rename(index={k: v}, inplace=True)
1173
+ return df
1174
+
1175
+ @runtime_dependency(
1176
+ module="IPython", install_from=OptionalDependency.NOTEBOOK
1177
+ )
1178
+ @runtime_dependency(
1179
+ module="ipywidgets",
1180
+ object="HTML",
1181
+ install_from=OptionalDependency.NOTEBOOK,
1182
+ )
1183
+ def _display_metrics(df, data_name, labels, precision):
1184
+ """
1185
+ display metrics on web page
1186
+
1187
+ Parameters
1188
+ ----------
1189
+ df : dataframe
1190
+ metrics in dataframe format
1191
+ data_name : string
1192
+ name of data given metrics df describe
1193
+ labels : dictionary
1194
+ map printing specific labels for metrics display
1195
+ precision : int
1196
+ precision for metrics display
1197
+
1198
+ Returns
1199
+ -------
1200
+ Nothing
1201
+ """
1202
+ from IPython.core.display import display, HTML
1203
+
1204
+ display(
1205
+ HTML(
1206
+ _pretty_label(df, labels)
1207
+ .style.apply(highlight_max, axis=1)
1208
+ .format(precision=precision)
1209
+ .set_properties(**{"text-align": "center"})
1210
+ .set_table_attributes("class=table")
1211
+ .set_caption(
1212
+ '<div align="left"><b style="font-size:20px;">'
1213
+ + "Evaluation Metrics ("
1214
+ + data_name
1215
+ + "):</b></div>"
1216
+ )
1217
+ .to_html()
1218
+ )
1219
+ )
1220
+
1221
+ _display_metrics(self.ev_test, "testing data", labels, self.precision)
1222
+ if self.use_training:
1223
+ _display_metrics(self.ev_train, "training data", labels, self.precision)
1224
+
1225
+ @property
1226
+ def raw_metrics(self, metrics=None, use_training_data=False):
1227
+ """Returns the raw metric numbers
1228
+
1229
+ Parameters
1230
+ ----------
1231
+ metrics : list, optional
1232
+ Request metrics to pull. Defaults to all.
1233
+ use_training_data : bool, optional
1234
+ Use training data to pull metrics. Defaults to False
1235
+
1236
+ Returns
1237
+ -------
1238
+ dict
1239
+ The requested raw metrics for each model. If `metrics` is `None` return all.
1240
+
1241
+ Examples
1242
+ --------
1243
+ >>> evaluator = ADSEvaluator(test, [model1, model2])
1244
+ >>> raw_metrics_dictionary = evaluator.raw_metrics()
1245
+ """
1246
+
1247
+ [train_met, test_met] = self.evaluations
1248
+ test_d = test_met.to_dict()
1249
+ if use_training_data and train_met is not None:
1250
+ train_d = train_met.add_suffix("_train").to_dict()
1251
+ test_d.update(train_d)
1252
+ for m, data in test_d.items():
1253
+ ret = dict()
1254
+ for k, v in data.items():
1255
+ if isinstance(v, np.ndarray):
1256
+ ret[k] = v.tolist()
1257
+ else:
1258
+ ret[k] = v
1259
+ test_d[m] = ret
1260
+ return test_d
1261
+
1262
+ @property
1263
+ def metrics(self):
1264
+ """Returns evaluation metrics
1265
+
1266
+ Returns
1267
+ -------
1268
+ metrics
1269
+ HTML representation of a table comparing relevant metrics.
1270
+
1271
+ Examples
1272
+ --------
1273
+ >>> evaluator = ADSEvaluator(test, [model1, model2])
1274
+ >>> evaluator.metrics
1275
+ Outputs table displaying metrics.
1276
+ """
1277
+
1278
+ ev_test = self.evaluations[1].loc[self.metrics_to_show]
1279
+ use_training = self.evaluations[0].shape != (0, 0)
1280
+ ev_train = (
1281
+ self.evaluations[0].loc[self.metrics_to_show] if use_training else None
1282
+ )
1283
+ return ADSEvaluator.EvaluationMetrics(ev_test, ev_train, use_training)
1284
+
1285
+ """
1286
+ Internal methods
1287
+ """
1288
+
1289
+ def _get_model_name(self, name, show_full_name=False):
1290
+ name_edit = re.sub(r" ?\([^)]+\)", "", name)
1291
+ ## if name only has '(' without ')', the code above wouldnt remove the argument followed by '('.
1292
+ if "(" in name_edit and not show_full_name:
1293
+ name_edit = name.split("(")[0]
1294
+ logger.info("Use `show_full_name=True` to show the full model name.")
1295
+ if name_edit in self.model_names:
1296
+ name_edit += "_1"
1297
+ num_tries = 1
1298
+ while name_edit in self.model_names:
1299
+ num_tries += 1
1300
+ name_edit = name_edit[:-1] + str(num_tries)
1301
+ if num_tries == 1:
1302
+ logger.info(
1303
+ f"The name '{name_edit[:-2]}' is used by multiple models. "
1304
+ f"Use the `rename()` method to change the name."
1305
+ )
1306
+ self.model_names.append(name_edit)
1307
+ return name_edit
1308
+
1309
+ def _score_data(self, est, X):
1310
+ y_pred = est.predict(X)
1311
+ y_score = None
1312
+
1313
+ # we will compute y_score only for binary classification cases because only for binary classification can
1314
+ # we use it for ROC Curves and AUC etc
1315
+ if self.is_classifier and hasattr(est.est, "predict_proba"):
1316
+ if len(est.classes_) == 2:
1317
+ # positive label index is assumed to be 0 if the ADSModel does not have a positive class defined
1318
+ positive_class_index = 0
1319
+ # For prediction probability, we only consider the positive class.
1320
+ if self.positive_class is not None:
1321
+ if self.positive_class not in list(est.classes_):
1322
+ raise ValueError(
1323
+ "Invalid positive class '%s' for model %s. Positive class should be one of %s."
1324
+ % (
1325
+ self.positive_class,
1326
+ est.est.__class__.__name__,
1327
+ list(est.classes_),
1328
+ )
1329
+ )
1330
+ positive_class_index = list(est.classes_).index(self.positive_class)
1331
+ y_score = est.predict_proba(X)[:, positive_class_index]
1332
+ else:
1333
+ y_score = est.predict_proba(X)
1334
+ return y_pred, y_score