oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.9rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (857) hide show
  1. ads/aqua/__init__.py +40 -0
  2. ads/aqua/app.py +506 -0
  3. ads/aqua/cli.py +96 -0
  4. ads/aqua/client/__init__.py +3 -0
  5. ads/aqua/client/client.py +836 -0
  6. ads/aqua/client/openai_client.py +305 -0
  7. ads/aqua/common/__init__.py +5 -0
  8. ads/aqua/common/decorator.py +125 -0
  9. ads/aqua/common/entities.py +269 -0
  10. ads/aqua/common/enums.py +122 -0
  11. ads/aqua/common/errors.py +109 -0
  12. ads/aqua/common/utils.py +1285 -0
  13. ads/aqua/config/__init__.py +4 -0
  14. ads/aqua/config/container_config.py +248 -0
  15. ads/aqua/config/evaluation/__init__.py +4 -0
  16. ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
  17. ads/aqua/config/utils/__init__.py +4 -0
  18. ads/aqua/config/utils/serializer.py +339 -0
  19. ads/aqua/constants.py +116 -0
  20. ads/aqua/data.py +14 -0
  21. ads/aqua/dummy_data/icon.txt +1 -0
  22. ads/aqua/dummy_data/oci_model_deployments.json +56 -0
  23. ads/aqua/dummy_data/oci_models.json +1 -0
  24. ads/aqua/dummy_data/readme.md +26 -0
  25. ads/aqua/evaluation/__init__.py +8 -0
  26. ads/aqua/evaluation/constants.py +53 -0
  27. ads/aqua/evaluation/entities.py +186 -0
  28. ads/aqua/evaluation/errors.py +70 -0
  29. ads/aqua/evaluation/evaluation.py +1814 -0
  30. ads/aqua/extension/__init__.py +42 -0
  31. ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
  32. ads/aqua/extension/base_handler.py +90 -0
  33. ads/aqua/extension/common_handler.py +121 -0
  34. ads/aqua/extension/common_ws_msg_handler.py +36 -0
  35. ads/aqua/extension/deployment_handler.py +298 -0
  36. ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
  37. ads/aqua/extension/errors.py +30 -0
  38. ads/aqua/extension/evaluation_handler.py +129 -0
  39. ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
  40. ads/aqua/extension/finetune_handler.py +96 -0
  41. ads/aqua/extension/model_handler.py +390 -0
  42. ads/aqua/extension/models/__init__.py +0 -0
  43. ads/aqua/extension/models/ws_models.py +145 -0
  44. ads/aqua/extension/models_ws_msg_handler.py +50 -0
  45. ads/aqua/extension/ui_handler.py +282 -0
  46. ads/aqua/extension/ui_websocket_handler.py +130 -0
  47. ads/aqua/extension/utils.py +133 -0
  48. ads/aqua/finetuning/__init__.py +7 -0
  49. ads/aqua/finetuning/constants.py +23 -0
  50. ads/aqua/finetuning/entities.py +181 -0
  51. ads/aqua/finetuning/finetuning.py +749 -0
  52. ads/aqua/model/__init__.py +8 -0
  53. ads/aqua/model/constants.py +60 -0
  54. ads/aqua/model/entities.py +385 -0
  55. ads/aqua/model/enums.py +32 -0
  56. ads/aqua/model/model.py +2114 -0
  57. ads/aqua/modeldeployment/__init__.py +8 -0
  58. ads/aqua/modeldeployment/constants.py +10 -0
  59. ads/aqua/modeldeployment/deployment.py +1326 -0
  60. ads/aqua/modeldeployment/entities.py +653 -0
  61. ads/aqua/modeldeployment/inference.py +74 -0
  62. ads/aqua/modeldeployment/utils.py +543 -0
  63. ads/aqua/resources/gpu_shapes_index.json +94 -0
  64. ads/aqua/server/__init__.py +4 -0
  65. ads/aqua/server/__main__.py +24 -0
  66. ads/aqua/server/app.py +47 -0
  67. ads/aqua/server/aqua_spec.yml +1291 -0
  68. ads/aqua/training/__init__.py +4 -0
  69. ads/aqua/training/exceptions.py +476 -0
  70. ads/aqua/ui.py +499 -0
  71. ads/automl/__init__.py +9 -0
  72. ads/automl/driver.py +330 -0
  73. ads/automl/provider.py +975 -0
  74. ads/bds/__init__.py +5 -0
  75. ads/bds/auth.py +127 -0
  76. ads/bds/big_data_service.py +255 -0
  77. ads/catalog/__init__.py +19 -0
  78. ads/catalog/model.py +1576 -0
  79. ads/catalog/notebook.py +461 -0
  80. ads/catalog/project.py +468 -0
  81. ads/catalog/summary.py +178 -0
  82. ads/common/__init__.py +11 -0
  83. ads/common/analyzer.py +65 -0
  84. ads/common/artifact/.model-ignore +63 -0
  85. ads/common/artifact/__init__.py +10 -0
  86. ads/common/auth.py +1122 -0
  87. ads/common/card_identifier.py +83 -0
  88. ads/common/config.py +647 -0
  89. ads/common/data.py +165 -0
  90. ads/common/decorator/__init__.py +9 -0
  91. ads/common/decorator/argument_to_case.py +88 -0
  92. ads/common/decorator/deprecate.py +69 -0
  93. ads/common/decorator/require_nonempty_arg.py +65 -0
  94. ads/common/decorator/runtime_dependency.py +178 -0
  95. ads/common/decorator/threaded.py +97 -0
  96. ads/common/decorator/utils.py +35 -0
  97. ads/common/dsc_file_system.py +303 -0
  98. ads/common/error.py +14 -0
  99. ads/common/extended_enum.py +81 -0
  100. ads/common/function/__init__.py +5 -0
  101. ads/common/function/fn_util.py +142 -0
  102. ads/common/function/func_conf.yaml +25 -0
  103. ads/common/ipython.py +76 -0
  104. ads/common/model.py +679 -0
  105. ads/common/model_artifact.py +1759 -0
  106. ads/common/model_artifact_schema.json +107 -0
  107. ads/common/model_export_util.py +664 -0
  108. ads/common/model_metadata.py +24 -0
  109. ads/common/object_storage_details.py +296 -0
  110. ads/common/oci_client.py +175 -0
  111. ads/common/oci_datascience.py +46 -0
  112. ads/common/oci_logging.py +1144 -0
  113. ads/common/oci_mixin.py +957 -0
  114. ads/common/oci_resource.py +136 -0
  115. ads/common/serializer.py +559 -0
  116. ads/common/utils.py +1852 -0
  117. ads/common/word_lists.py +1491 -0
  118. ads/common/work_request.py +189 -0
  119. ads/data_labeling/__init__.py +13 -0
  120. ads/data_labeling/boundingbox.py +253 -0
  121. ads/data_labeling/constants.py +47 -0
  122. ads/data_labeling/data_labeling_service.py +244 -0
  123. ads/data_labeling/interface/__init__.py +5 -0
  124. ads/data_labeling/interface/loader.py +16 -0
  125. ads/data_labeling/interface/parser.py +16 -0
  126. ads/data_labeling/interface/reader.py +23 -0
  127. ads/data_labeling/loader/__init__.py +5 -0
  128. ads/data_labeling/loader/file_loader.py +241 -0
  129. ads/data_labeling/metadata.py +110 -0
  130. ads/data_labeling/mixin/__init__.py +5 -0
  131. ads/data_labeling/mixin/data_labeling.py +232 -0
  132. ads/data_labeling/ner.py +129 -0
  133. ads/data_labeling/parser/__init__.py +5 -0
  134. ads/data_labeling/parser/dls_record_parser.py +388 -0
  135. ads/data_labeling/parser/export_metadata_parser.py +94 -0
  136. ads/data_labeling/parser/export_record_parser.py +473 -0
  137. ads/data_labeling/reader/__init__.py +5 -0
  138. ads/data_labeling/reader/dataset_reader.py +574 -0
  139. ads/data_labeling/reader/dls_record_reader.py +121 -0
  140. ads/data_labeling/reader/export_record_reader.py +62 -0
  141. ads/data_labeling/reader/jsonl_reader.py +75 -0
  142. ads/data_labeling/reader/metadata_reader.py +203 -0
  143. ads/data_labeling/reader/record_reader.py +263 -0
  144. ads/data_labeling/record.py +52 -0
  145. ads/data_labeling/visualizer/__init__.py +5 -0
  146. ads/data_labeling/visualizer/image_visualizer.py +525 -0
  147. ads/data_labeling/visualizer/text_visualizer.py +357 -0
  148. ads/database/__init__.py +5 -0
  149. ads/database/connection.py +338 -0
  150. ads/dataset/__init__.py +10 -0
  151. ads/dataset/capabilities.md +51 -0
  152. ads/dataset/classification_dataset.py +339 -0
  153. ads/dataset/correlation.py +226 -0
  154. ads/dataset/correlation_plot.py +563 -0
  155. ads/dataset/dask_series.py +173 -0
  156. ads/dataset/dataframe_transformer.py +110 -0
  157. ads/dataset/dataset.py +1979 -0
  158. ads/dataset/dataset_browser.py +360 -0
  159. ads/dataset/dataset_with_target.py +995 -0
  160. ads/dataset/exception.py +25 -0
  161. ads/dataset/factory.py +987 -0
  162. ads/dataset/feature_engineering_transformer.py +35 -0
  163. ads/dataset/feature_selection.py +107 -0
  164. ads/dataset/forecasting_dataset.py +26 -0
  165. ads/dataset/helper.py +1450 -0
  166. ads/dataset/label_encoder.py +99 -0
  167. ads/dataset/mixin/__init__.py +5 -0
  168. ads/dataset/mixin/dataset_accessor.py +134 -0
  169. ads/dataset/pipeline.py +58 -0
  170. ads/dataset/plot.py +710 -0
  171. ads/dataset/progress.py +86 -0
  172. ads/dataset/recommendation.py +297 -0
  173. ads/dataset/recommendation_transformer.py +502 -0
  174. ads/dataset/regression_dataset.py +14 -0
  175. ads/dataset/sampled_dataset.py +1050 -0
  176. ads/dataset/target.py +98 -0
  177. ads/dataset/timeseries.py +18 -0
  178. ads/dbmixin/__init__.py +5 -0
  179. ads/dbmixin/db_pandas_accessor.py +153 -0
  180. ads/environment/__init__.py +9 -0
  181. ads/environment/ml_runtime.py +66 -0
  182. ads/evaluations/README.md +14 -0
  183. ads/evaluations/__init__.py +109 -0
  184. ads/evaluations/evaluation_plot.py +983 -0
  185. ads/evaluations/evaluator.py +1334 -0
  186. ads/evaluations/statistical_metrics.py +543 -0
  187. ads/experiments/__init__.py +9 -0
  188. ads/experiments/capabilities.md +0 -0
  189. ads/explanations/__init__.py +21 -0
  190. ads/explanations/base_explainer.py +142 -0
  191. ads/explanations/capabilities.md +83 -0
  192. ads/explanations/explainer.py +190 -0
  193. ads/explanations/mlx_global_explainer.py +1050 -0
  194. ads/explanations/mlx_interface.py +386 -0
  195. ads/explanations/mlx_local_explainer.py +287 -0
  196. ads/explanations/mlx_whatif_explainer.py +201 -0
  197. ads/feature_engineering/__init__.py +20 -0
  198. ads/feature_engineering/accessor/__init__.py +5 -0
  199. ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
  200. ads/feature_engineering/accessor/mixin/__init__.py +5 -0
  201. ads/feature_engineering/accessor/mixin/correlation.py +166 -0
  202. ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
  203. ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
  204. ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
  205. ads/feature_engineering/accessor/mixin/utils.py +65 -0
  206. ads/feature_engineering/accessor/series_accessor.py +431 -0
  207. ads/feature_engineering/adsimage/__init__.py +5 -0
  208. ads/feature_engineering/adsimage/image.py +192 -0
  209. ads/feature_engineering/adsimage/image_reader.py +170 -0
  210. ads/feature_engineering/adsimage/interface/__init__.py +5 -0
  211. ads/feature_engineering/adsimage/interface/reader.py +19 -0
  212. ads/feature_engineering/adsstring/__init__.py +7 -0
  213. ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
  214. ads/feature_engineering/adsstring/string/__init__.py +8 -0
  215. ads/feature_engineering/data_schema.json +57 -0
  216. ads/feature_engineering/dataset/__init__.py +5 -0
  217. ads/feature_engineering/dataset/zip_code_data.py +42062 -0
  218. ads/feature_engineering/exceptions.py +40 -0
  219. ads/feature_engineering/feature_type/__init__.py +133 -0
  220. ads/feature_engineering/feature_type/address.py +184 -0
  221. ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
  222. ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
  223. ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
  224. ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
  225. ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
  226. ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
  227. ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
  228. ads/feature_engineering/feature_type/adsstring/string.py +258 -0
  229. ads/feature_engineering/feature_type/base.py +58 -0
  230. ads/feature_engineering/feature_type/boolean.py +183 -0
  231. ads/feature_engineering/feature_type/category.py +146 -0
  232. ads/feature_engineering/feature_type/constant.py +137 -0
  233. ads/feature_engineering/feature_type/continuous.py +151 -0
  234. ads/feature_engineering/feature_type/creditcard.py +314 -0
  235. ads/feature_engineering/feature_type/datetime.py +190 -0
  236. ads/feature_engineering/feature_type/discrete.py +134 -0
  237. ads/feature_engineering/feature_type/document.py +43 -0
  238. ads/feature_engineering/feature_type/gis.py +251 -0
  239. ads/feature_engineering/feature_type/handler/__init__.py +5 -0
  240. ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
  241. ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
  242. ads/feature_engineering/feature_type/handler/warnings.py +128 -0
  243. ads/feature_engineering/feature_type/integer.py +142 -0
  244. ads/feature_engineering/feature_type/ip_address.py +144 -0
  245. ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
  246. ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
  247. ads/feature_engineering/feature_type/lat_long.py +256 -0
  248. ads/feature_engineering/feature_type/object.py +43 -0
  249. ads/feature_engineering/feature_type/ordinal.py +132 -0
  250. ads/feature_engineering/feature_type/phone_number.py +135 -0
  251. ads/feature_engineering/feature_type/string.py +171 -0
  252. ads/feature_engineering/feature_type/text.py +93 -0
  253. ads/feature_engineering/feature_type/unknown.py +43 -0
  254. ads/feature_engineering/feature_type/zip_code.py +164 -0
  255. ads/feature_engineering/feature_type_manager.py +406 -0
  256. ads/feature_engineering/schema.py +795 -0
  257. ads/feature_engineering/utils.py +245 -0
  258. ads/feature_store/.readthedocs.yaml +19 -0
  259. ads/feature_store/README.md +65 -0
  260. ads/feature_store/__init__.py +9 -0
  261. ads/feature_store/common/__init__.py +0 -0
  262. ads/feature_store/common/enums.py +339 -0
  263. ads/feature_store/common/exceptions.py +18 -0
  264. ads/feature_store/common/spark_session_singleton.py +125 -0
  265. ads/feature_store/common/utils/__init__.py +0 -0
  266. ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
  267. ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
  268. ads/feature_store/common/utils/transformation_utils.py +82 -0
  269. ads/feature_store/common/utils/utility.py +403 -0
  270. ads/feature_store/data_validation/__init__.py +0 -0
  271. ads/feature_store/data_validation/great_expectation.py +129 -0
  272. ads/feature_store/dataset.py +1230 -0
  273. ads/feature_store/dataset_job.py +530 -0
  274. ads/feature_store/docs/Dockerfile +7 -0
  275. ads/feature_store/docs/Makefile +44 -0
  276. ads/feature_store/docs/conf.py +28 -0
  277. ads/feature_store/docs/requirements.txt +14 -0
  278. ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
  279. ads/feature_store/docs/source/cicd.rst +137 -0
  280. ads/feature_store/docs/source/conf.py +86 -0
  281. ads/feature_store/docs/source/data_versioning.rst +33 -0
  282. ads/feature_store/docs/source/dataset.rst +388 -0
  283. ads/feature_store/docs/source/dataset_job.rst +27 -0
  284. ads/feature_store/docs/source/demo.rst +70 -0
  285. ads/feature_store/docs/source/entity.rst +78 -0
  286. ads/feature_store/docs/source/feature_group.rst +624 -0
  287. ads/feature_store/docs/source/feature_group_job.rst +29 -0
  288. ads/feature_store/docs/source/feature_store.rst +122 -0
  289. ads/feature_store/docs/source/feature_store_class.rst +123 -0
  290. ads/feature_store/docs/source/feature_validation.rst +66 -0
  291. ads/feature_store/docs/source/figures/cicd.png +0 -0
  292. ads/feature_store/docs/source/figures/data_validation.png +0 -0
  293. ads/feature_store/docs/source/figures/data_versioning.png +0 -0
  294. ads/feature_store/docs/source/figures/dataset.gif +0 -0
  295. ads/feature_store/docs/source/figures/dataset.png +0 -0
  296. ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
  297. ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
  298. ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
  299. ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
  300. ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
  301. ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
  302. ads/feature_store/docs/source/figures/entity.png +0 -0
  303. ads/feature_store/docs/source/figures/feature_group.png +0 -0
  304. ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
  305. ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
  306. ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
  307. ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
  308. ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
  309. ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
  310. ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
  311. ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
  312. ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
  313. ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
  314. ads/feature_store/docs/source/figures/overview.png +0 -0
  315. ads/feature_store/docs/source/figures/resource_manager.png +0 -0
  316. ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
  317. ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
  318. ads/feature_store/docs/source/figures/stats_1.png +0 -0
  319. ads/feature_store/docs/source/figures/stats_2.png +0 -0
  320. ads/feature_store/docs/source/figures/stats_d.png +0 -0
  321. ads/feature_store/docs/source/figures/stats_fg.png +0 -0
  322. ads/feature_store/docs/source/figures/transformation.png +0 -0
  323. ads/feature_store/docs/source/figures/transformations.gif +0 -0
  324. ads/feature_store/docs/source/figures/validation.png +0 -0
  325. ads/feature_store/docs/source/figures/validation_fg.png +0 -0
  326. ads/feature_store/docs/source/figures/validation_results.png +0 -0
  327. ads/feature_store/docs/source/figures/validation_summary.png +0 -0
  328. ads/feature_store/docs/source/index.rst +81 -0
  329. ads/feature_store/docs/source/module.rst +8 -0
  330. ads/feature_store/docs/source/notebook.rst +94 -0
  331. ads/feature_store/docs/source/overview.rst +47 -0
  332. ads/feature_store/docs/source/quickstart.rst +176 -0
  333. ads/feature_store/docs/source/release_notes.rst +194 -0
  334. ads/feature_store/docs/source/setup_feature_store.rst +81 -0
  335. ads/feature_store/docs/source/statistics.rst +58 -0
  336. ads/feature_store/docs/source/transformation.rst +199 -0
  337. ads/feature_store/docs/source/ui.rst +65 -0
  338. ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
  339. ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
  340. ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
  341. ads/feature_store/entity.py +718 -0
  342. ads/feature_store/execution_strategy/__init__.py +0 -0
  343. ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
  344. ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
  345. ads/feature_store/execution_strategy/engine/__init__.py +0 -0
  346. ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
  347. ads/feature_store/execution_strategy/execution_strategy.py +113 -0
  348. ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
  349. ads/feature_store/execution_strategy/spark/__init__.py +0 -0
  350. ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
  351. ads/feature_store/feature.py +192 -0
  352. ads/feature_store/feature_group.py +1494 -0
  353. ads/feature_store/feature_group_expectation.py +346 -0
  354. ads/feature_store/feature_group_job.py +602 -0
  355. ads/feature_store/feature_lineage/__init__.py +0 -0
  356. ads/feature_store/feature_lineage/graphviz_service.py +180 -0
  357. ads/feature_store/feature_option_details.py +50 -0
  358. ads/feature_store/feature_statistics/__init__.py +0 -0
  359. ads/feature_store/feature_statistics/statistics_service.py +99 -0
  360. ads/feature_store/feature_store.py +699 -0
  361. ads/feature_store/feature_store_registrar.py +518 -0
  362. ads/feature_store/input_feature_detail.py +149 -0
  363. ads/feature_store/mixin/__init__.py +4 -0
  364. ads/feature_store/mixin/oci_feature_store.py +145 -0
  365. ads/feature_store/model_details.py +73 -0
  366. ads/feature_store/query/__init__.py +0 -0
  367. ads/feature_store/query/filter.py +266 -0
  368. ads/feature_store/query/generator/__init__.py +0 -0
  369. ads/feature_store/query/generator/query_generator.py +298 -0
  370. ads/feature_store/query/join.py +161 -0
  371. ads/feature_store/query/query.py +403 -0
  372. ads/feature_store/query/validator/__init__.py +0 -0
  373. ads/feature_store/query/validator/query_validator.py +57 -0
  374. ads/feature_store/response/__init__.py +0 -0
  375. ads/feature_store/response/response_builder.py +68 -0
  376. ads/feature_store/service/__init__.py +0 -0
  377. ads/feature_store/service/oci_dataset.py +139 -0
  378. ads/feature_store/service/oci_dataset_job.py +199 -0
  379. ads/feature_store/service/oci_entity.py +125 -0
  380. ads/feature_store/service/oci_feature_group.py +164 -0
  381. ads/feature_store/service/oci_feature_group_job.py +214 -0
  382. ads/feature_store/service/oci_feature_store.py +182 -0
  383. ads/feature_store/service/oci_lineage.py +87 -0
  384. ads/feature_store/service/oci_transformation.py +104 -0
  385. ads/feature_store/statistics/__init__.py +0 -0
  386. ads/feature_store/statistics/abs_feature_value.py +49 -0
  387. ads/feature_store/statistics/charts/__init__.py +0 -0
  388. ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
  389. ads/feature_store/statistics/charts/box_plot.py +148 -0
  390. ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
  391. ads/feature_store/statistics/charts/probability_distribution.py +68 -0
  392. ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
  393. ads/feature_store/statistics/feature_stat.py +126 -0
  394. ads/feature_store/statistics/generic_feature_value.py +33 -0
  395. ads/feature_store/statistics/statistics.py +41 -0
  396. ads/feature_store/statistics_config.py +101 -0
  397. ads/feature_store/templates/feature_store_template.yaml +45 -0
  398. ads/feature_store/transformation.py +499 -0
  399. ads/feature_store/validation_output.py +57 -0
  400. ads/hpo/__init__.py +9 -0
  401. ads/hpo/_imports.py +91 -0
  402. ads/hpo/ads_search_space.py +439 -0
  403. ads/hpo/distributions.py +325 -0
  404. ads/hpo/objective.py +280 -0
  405. ads/hpo/search_cv.py +1657 -0
  406. ads/hpo/stopping_criterion.py +75 -0
  407. ads/hpo/tuner_artifact.py +413 -0
  408. ads/hpo/utils.py +91 -0
  409. ads/hpo/validation.py +140 -0
  410. ads/hpo/visualization/__init__.py +5 -0
  411. ads/hpo/visualization/_contour.py +23 -0
  412. ads/hpo/visualization/_edf.py +20 -0
  413. ads/hpo/visualization/_intermediate_values.py +21 -0
  414. ads/hpo/visualization/_optimization_history.py +25 -0
  415. ads/hpo/visualization/_parallel_coordinate.py +169 -0
  416. ads/hpo/visualization/_param_importances.py +26 -0
  417. ads/jobs/__init__.py +53 -0
  418. ads/jobs/ads_job.py +663 -0
  419. ads/jobs/builders/__init__.py +5 -0
  420. ads/jobs/builders/base.py +156 -0
  421. ads/jobs/builders/infrastructure/__init__.py +6 -0
  422. ads/jobs/builders/infrastructure/base.py +165 -0
  423. ads/jobs/builders/infrastructure/dataflow.py +1252 -0
  424. ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
  425. ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
  426. ads/jobs/builders/infrastructure/utils.py +65 -0
  427. ads/jobs/builders/runtimes/__init__.py +5 -0
  428. ads/jobs/builders/runtimes/artifact.py +338 -0
  429. ads/jobs/builders/runtimes/base.py +325 -0
  430. ads/jobs/builders/runtimes/container_runtime.py +242 -0
  431. ads/jobs/builders/runtimes/python_runtime.py +1016 -0
  432. ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
  433. ads/jobs/cli.py +104 -0
  434. ads/jobs/env_var_parser.py +131 -0
  435. ads/jobs/extension.py +160 -0
  436. ads/jobs/schema/__init__.py +5 -0
  437. ads/jobs/schema/infrastructure_schema.json +116 -0
  438. ads/jobs/schema/job_schema.json +42 -0
  439. ads/jobs/schema/runtime_schema.json +183 -0
  440. ads/jobs/schema/validator.py +141 -0
  441. ads/jobs/serializer.py +296 -0
  442. ads/jobs/templates/__init__.py +5 -0
  443. ads/jobs/templates/container.py +6 -0
  444. ads/jobs/templates/driver_notebook.py +177 -0
  445. ads/jobs/templates/driver_oci.py +500 -0
  446. ads/jobs/templates/driver_python.py +48 -0
  447. ads/jobs/templates/driver_pytorch.py +852 -0
  448. ads/jobs/templates/driver_utils.py +615 -0
  449. ads/jobs/templates/hostname_from_env.c +55 -0
  450. ads/jobs/templates/oci_metrics.py +181 -0
  451. ads/jobs/utils.py +104 -0
  452. ads/llm/__init__.py +28 -0
  453. ads/llm/autogen/__init__.py +2 -0
  454. ads/llm/autogen/constants.py +15 -0
  455. ads/llm/autogen/reports/__init__.py +2 -0
  456. ads/llm/autogen/reports/base.py +67 -0
  457. ads/llm/autogen/reports/data.py +103 -0
  458. ads/llm/autogen/reports/session.py +526 -0
  459. ads/llm/autogen/reports/templates/chat_box.html +13 -0
  460. ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
  461. ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
  462. ads/llm/autogen/reports/utils.py +56 -0
  463. ads/llm/autogen/v02/__init__.py +4 -0
  464. ads/llm/autogen/v02/client.py +295 -0
  465. ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
  466. ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
  467. ads/llm/autogen/v02/loggers/__init__.py +6 -0
  468. ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
  469. ads/llm/autogen/v02/loggers/session_logger.py +580 -0
  470. ads/llm/autogen/v02/loggers/utils.py +86 -0
  471. ads/llm/autogen/v02/runtime_logging.py +163 -0
  472. ads/llm/chain.py +268 -0
  473. ads/llm/chat_template.py +31 -0
  474. ads/llm/deploy.py +63 -0
  475. ads/llm/guardrails/__init__.py +5 -0
  476. ads/llm/guardrails/base.py +442 -0
  477. ads/llm/guardrails/huggingface.py +44 -0
  478. ads/llm/langchain/__init__.py +5 -0
  479. ads/llm/langchain/plugins/__init__.py +5 -0
  480. ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
  481. ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
  482. ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
  483. ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
  484. ads/llm/langchain/plugins/llms/__init__.py +5 -0
  485. ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
  486. ads/llm/requirements.txt +3 -0
  487. ads/llm/serialize.py +219 -0
  488. ads/llm/serializers/__init__.py +0 -0
  489. ads/llm/serializers/retrieval_qa.py +153 -0
  490. ads/llm/serializers/runnable_parallel.py +27 -0
  491. ads/llm/templates/score_chain.jinja2 +155 -0
  492. ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
  493. ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
  494. ads/model/__init__.py +52 -0
  495. ads/model/artifact.py +573 -0
  496. ads/model/artifact_downloader.py +254 -0
  497. ads/model/artifact_uploader.py +267 -0
  498. ads/model/base_properties.py +238 -0
  499. ads/model/common/.model-ignore +66 -0
  500. ads/model/common/__init__.py +5 -0
  501. ads/model/common/utils.py +142 -0
  502. ads/model/datascience_model.py +2635 -0
  503. ads/model/deployment/__init__.py +20 -0
  504. ads/model/deployment/common/__init__.py +5 -0
  505. ads/model/deployment/common/utils.py +308 -0
  506. ads/model/deployment/model_deployer.py +466 -0
  507. ads/model/deployment/model_deployment.py +1846 -0
  508. ads/model/deployment/model_deployment_infrastructure.py +671 -0
  509. ads/model/deployment/model_deployment_properties.py +493 -0
  510. ads/model/deployment/model_deployment_runtime.py +838 -0
  511. ads/model/extractor/__init__.py +5 -0
  512. ads/model/extractor/automl_extractor.py +74 -0
  513. ads/model/extractor/embedding_onnx_extractor.py +80 -0
  514. ads/model/extractor/huggingface_extractor.py +88 -0
  515. ads/model/extractor/keras_extractor.py +84 -0
  516. ads/model/extractor/lightgbm_extractor.py +93 -0
  517. ads/model/extractor/model_info_extractor.py +114 -0
  518. ads/model/extractor/model_info_extractor_factory.py +105 -0
  519. ads/model/extractor/pytorch_extractor.py +87 -0
  520. ads/model/extractor/sklearn_extractor.py +112 -0
  521. ads/model/extractor/spark_extractor.py +89 -0
  522. ads/model/extractor/tensorflow_extractor.py +85 -0
  523. ads/model/extractor/xgboost_extractor.py +94 -0
  524. ads/model/framework/__init__.py +5 -0
  525. ads/model/framework/automl_model.py +178 -0
  526. ads/model/framework/embedding_onnx_model.py +438 -0
  527. ads/model/framework/huggingface_model.py +399 -0
  528. ads/model/framework/lightgbm_model.py +266 -0
  529. ads/model/framework/pytorch_model.py +266 -0
  530. ads/model/framework/sklearn_model.py +250 -0
  531. ads/model/framework/spark_model.py +326 -0
  532. ads/model/framework/tensorflow_model.py +254 -0
  533. ads/model/framework/xgboost_model.py +258 -0
  534. ads/model/generic_model.py +3518 -0
  535. ads/model/model_artifact_boilerplate/README.md +381 -0
  536. ads/model/model_artifact_boilerplate/__init__.py +5 -0
  537. ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
  538. ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
  539. ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
  540. ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
  541. ads/model/model_artifact_boilerplate/score.py +61 -0
  542. ads/model/model_file_description_schema.json +68 -0
  543. ads/model/model_introspect.py +331 -0
  544. ads/model/model_metadata.py +1810 -0
  545. ads/model/model_metadata_mixin.py +460 -0
  546. ads/model/model_properties.py +63 -0
  547. ads/model/model_version_set.py +739 -0
  548. ads/model/runtime/__init__.py +5 -0
  549. ads/model/runtime/env_info.py +306 -0
  550. ads/model/runtime/model_deployment_details.py +37 -0
  551. ads/model/runtime/model_provenance_details.py +58 -0
  552. ads/model/runtime/runtime_info.py +81 -0
  553. ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
  554. ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
  555. ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
  556. ads/model/runtime/utils.py +201 -0
  557. ads/model/serde/__init__.py +5 -0
  558. ads/model/serde/common.py +40 -0
  559. ads/model/serde/model_input.py +547 -0
  560. ads/model/serde/model_serializer.py +1184 -0
  561. ads/model/service/__init__.py +5 -0
  562. ads/model/service/oci_datascience_model.py +1076 -0
  563. ads/model/service/oci_datascience_model_deployment.py +500 -0
  564. ads/model/service/oci_datascience_model_version_set.py +176 -0
  565. ads/model/transformer/__init__.py +5 -0
  566. ads/model/transformer/onnx_transformer.py +324 -0
  567. ads/mysqldb/__init__.py +5 -0
  568. ads/mysqldb/mysql_db.py +227 -0
  569. ads/opctl/__init__.py +18 -0
  570. ads/opctl/anomaly_detection.py +11 -0
  571. ads/opctl/backend/__init__.py +5 -0
  572. ads/opctl/backend/ads_dataflow.py +353 -0
  573. ads/opctl/backend/ads_ml_job.py +710 -0
  574. ads/opctl/backend/ads_ml_pipeline.py +164 -0
  575. ads/opctl/backend/ads_model_deployment.py +209 -0
  576. ads/opctl/backend/base.py +146 -0
  577. ads/opctl/backend/local.py +1053 -0
  578. ads/opctl/backend/marketplace/__init__.py +9 -0
  579. ads/opctl/backend/marketplace/helm_helper.py +173 -0
  580. ads/opctl/backend/marketplace/local_marketplace.py +271 -0
  581. ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
  582. ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
  583. ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
  584. ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
  585. ads/opctl/backend/marketplace/models/__init__.py +5 -0
  586. ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
  587. ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
  588. ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
  589. ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
  590. ads/opctl/cli.py +707 -0
  591. ads/opctl/cmds.py +869 -0
  592. ads/opctl/conda/__init__.py +5 -0
  593. ads/opctl/conda/cli.py +193 -0
  594. ads/opctl/conda/cmds.py +749 -0
  595. ads/opctl/conda/config.yaml +34 -0
  596. ads/opctl/conda/manifest_template.yaml +13 -0
  597. ads/opctl/conda/multipart_uploader.py +188 -0
  598. ads/opctl/conda/pack.py +89 -0
  599. ads/opctl/config/__init__.py +5 -0
  600. ads/opctl/config/base.py +57 -0
  601. ads/opctl/config/diagnostics/__init__.py +5 -0
  602. ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
  603. ads/opctl/config/merger.py +255 -0
  604. ads/opctl/config/resolver.py +297 -0
  605. ads/opctl/config/utils.py +79 -0
  606. ads/opctl/config/validator.py +17 -0
  607. ads/opctl/config/versioner.py +68 -0
  608. ads/opctl/config/yaml_parsers/__init__.py +7 -0
  609. ads/opctl/config/yaml_parsers/base.py +58 -0
  610. ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
  611. ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
  612. ads/opctl/constants.py +66 -0
  613. ads/opctl/decorator/__init__.py +5 -0
  614. ads/opctl/decorator/common.py +129 -0
  615. ads/opctl/diagnostics/__init__.py +5 -0
  616. ads/opctl/diagnostics/__main__.py +25 -0
  617. ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
  618. ads/opctl/diagnostics/check_requirements.py +144 -0
  619. ads/opctl/diagnostics/requirement_exception.py +9 -0
  620. ads/opctl/distributed/README.md +109 -0
  621. ads/opctl/distributed/__init__.py +5 -0
  622. ads/opctl/distributed/certificates.py +32 -0
  623. ads/opctl/distributed/cli.py +207 -0
  624. ads/opctl/distributed/cmds.py +731 -0
  625. ads/opctl/distributed/common/__init__.py +5 -0
  626. ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
  627. ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
  628. ads/opctl/distributed/common/cluster_config_helper.py +103 -0
  629. ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
  630. ads/opctl/distributed/common/cluster_runner.py +54 -0
  631. ads/opctl/distributed/common/framework_factory.py +29 -0
  632. ads/opctl/docker/Dockerfile.job +103 -0
  633. ads/opctl/docker/Dockerfile.job.arm +107 -0
  634. ads/opctl/docker/Dockerfile.job.gpu +175 -0
  635. ads/opctl/docker/base-env.yaml +13 -0
  636. ads/opctl/docker/cuda.repo +6 -0
  637. ads/opctl/docker/operator/.dockerignore +0 -0
  638. ads/opctl/docker/operator/Dockerfile +41 -0
  639. ads/opctl/docker/operator/Dockerfile.gpu +85 -0
  640. ads/opctl/docker/operator/cuda.repo +6 -0
  641. ads/opctl/docker/operator/environment.yaml +8 -0
  642. ads/opctl/forecast.py +11 -0
  643. ads/opctl/index.yaml +3 -0
  644. ads/opctl/model/__init__.py +5 -0
  645. ads/opctl/model/cli.py +65 -0
  646. ads/opctl/model/cmds.py +73 -0
  647. ads/opctl/operator/README.md +4 -0
  648. ads/opctl/operator/__init__.py +31 -0
  649. ads/opctl/operator/cli.py +344 -0
  650. ads/opctl/operator/cmd.py +596 -0
  651. ads/opctl/operator/common/__init__.py +5 -0
  652. ads/opctl/operator/common/backend_factory.py +460 -0
  653. ads/opctl/operator/common/const.py +27 -0
  654. ads/opctl/operator/common/data/synthetic.csv +16001 -0
  655. ads/opctl/operator/common/dictionary_merger.py +148 -0
  656. ads/opctl/operator/common/errors.py +42 -0
  657. ads/opctl/operator/common/operator_config.py +99 -0
  658. ads/opctl/operator/common/operator_loader.py +811 -0
  659. ads/opctl/operator/common/operator_schema.yaml +130 -0
  660. ads/opctl/operator/common/operator_yaml_generator.py +152 -0
  661. ads/opctl/operator/common/utils.py +208 -0
  662. ads/opctl/operator/lowcode/__init__.py +5 -0
  663. ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
  664. ads/opctl/operator/lowcode/anomaly/README.md +207 -0
  665. ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
  666. ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
  667. ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
  668. ads/opctl/operator/lowcode/anomaly/const.py +167 -0
  669. ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
  670. ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
  671. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
  672. ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
  673. ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
  674. ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
  675. ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
  676. ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
  677. ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
  678. ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
  679. ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
  680. ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
  681. ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
  682. ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
  683. ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
  684. ads/opctl/operator/lowcode/common/__init__.py +5 -0
  685. ads/opctl/operator/lowcode/common/const.py +10 -0
  686. ads/opctl/operator/lowcode/common/data.py +116 -0
  687. ads/opctl/operator/lowcode/common/errors.py +47 -0
  688. ads/opctl/operator/lowcode/common/transformations.py +296 -0
  689. ads/opctl/operator/lowcode/common/utils.py +384 -0
  690. ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
  691. ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
  692. ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
  693. ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
  694. ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
  695. ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
  696. ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
  697. ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
  698. ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
  699. ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
  700. ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
  701. ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
  702. ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
  703. ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
  704. ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
  705. ads/opctl/operator/lowcode/forecast/README.md +209 -0
  706. ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
  707. ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
  708. ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
  709. ads/opctl/operator/lowcode/forecast/const.py +92 -0
  710. ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
  711. ads/opctl/operator/lowcode/forecast/errors.py +26 -0
  712. ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
  713. ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
  714. ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
  715. ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
  716. ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
  717. ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
  718. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
  719. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
  720. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
  721. ads/opctl/operator/lowcode/forecast/model/prophet.py +445 -0
  722. ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
  723. ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
  724. ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
  725. ads/opctl/operator/lowcode/forecast/utils.py +397 -0
  726. ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
  727. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
  728. ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
  729. ads/opctl/operator/lowcode/pii/MLoperator +17 -0
  730. ads/opctl/operator/lowcode/pii/README.md +208 -0
  731. ads/opctl/operator/lowcode/pii/__init__.py +5 -0
  732. ads/opctl/operator/lowcode/pii/__main__.py +78 -0
  733. ads/opctl/operator/lowcode/pii/cmd.py +39 -0
  734. ads/opctl/operator/lowcode/pii/constant.py +84 -0
  735. ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
  736. ads/opctl/operator/lowcode/pii/errors.py +27 -0
  737. ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
  738. ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
  739. ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
  740. ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
  741. ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
  742. ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
  743. ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
  744. ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
  745. ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
  746. ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
  747. ads/opctl/operator/lowcode/pii/model/report.py +487 -0
  748. ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
  749. ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
  750. ads/opctl/operator/lowcode/pii/utils.py +43 -0
  751. ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
  752. ads/opctl/operator/lowcode/recommender/README.md +206 -0
  753. ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
  754. ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
  755. ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
  756. ads/opctl/operator/lowcode/recommender/constant.py +30 -0
  757. ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
  758. ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
  759. ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
  760. ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
  761. ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
  762. ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
  763. ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
  764. ads/opctl/operator/lowcode/recommender/utils.py +13 -0
  765. ads/opctl/operator/runtime/__init__.py +5 -0
  766. ads/opctl/operator/runtime/const.py +17 -0
  767. ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
  768. ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
  769. ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
  770. ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
  771. ads/opctl/operator/runtime/runtime.py +115 -0
  772. ads/opctl/schema.yaml.yml +36 -0
  773. ads/opctl/script.py +40 -0
  774. ads/opctl/spark/__init__.py +5 -0
  775. ads/opctl/spark/cli.py +43 -0
  776. ads/opctl/spark/cmds.py +147 -0
  777. ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
  778. ads/opctl/utils.py +344 -0
  779. ads/oracledb/__init__.py +5 -0
  780. ads/oracledb/oracle_db.py +346 -0
  781. ads/pipeline/__init__.py +39 -0
  782. ads/pipeline/ads_pipeline.py +2279 -0
  783. ads/pipeline/ads_pipeline_run.py +772 -0
  784. ads/pipeline/ads_pipeline_step.py +605 -0
  785. ads/pipeline/builders/__init__.py +5 -0
  786. ads/pipeline/builders/infrastructure/__init__.py +5 -0
  787. ads/pipeline/builders/infrastructure/custom_script.py +32 -0
  788. ads/pipeline/cli.py +119 -0
  789. ads/pipeline/extension.py +291 -0
  790. ads/pipeline/schema/__init__.py +5 -0
  791. ads/pipeline/schema/cs_step_schema.json +35 -0
  792. ads/pipeline/schema/ml_step_schema.json +31 -0
  793. ads/pipeline/schema/pipeline_schema.json +71 -0
  794. ads/pipeline/visualizer/__init__.py +5 -0
  795. ads/pipeline/visualizer/base.py +570 -0
  796. ads/pipeline/visualizer/graph_renderer.py +272 -0
  797. ads/pipeline/visualizer/text_renderer.py +84 -0
  798. ads/secrets/__init__.py +11 -0
  799. ads/secrets/adb.py +386 -0
  800. ads/secrets/auth_token.py +86 -0
  801. ads/secrets/big_data_service.py +365 -0
  802. ads/secrets/mysqldb.py +149 -0
  803. ads/secrets/oracledb.py +160 -0
  804. ads/secrets/secrets.py +407 -0
  805. ads/telemetry/__init__.py +7 -0
  806. ads/telemetry/base.py +69 -0
  807. ads/telemetry/client.py +125 -0
  808. ads/telemetry/telemetry.py +257 -0
  809. ads/templates/dataflow_pyspark.jinja2 +13 -0
  810. ads/templates/dataflow_sparksql.jinja2 +22 -0
  811. ads/templates/func.jinja2 +20 -0
  812. ads/templates/schemas/openapi.json +1740 -0
  813. ads/templates/score-pkl.jinja2 +173 -0
  814. ads/templates/score.jinja2 +322 -0
  815. ads/templates/score_embedding_onnx.jinja2 +202 -0
  816. ads/templates/score_generic.jinja2 +165 -0
  817. ads/templates/score_huggingface_pipeline.jinja2 +217 -0
  818. ads/templates/score_lightgbm.jinja2 +185 -0
  819. ads/templates/score_onnx.jinja2 +407 -0
  820. ads/templates/score_onnx_new.jinja2 +473 -0
  821. ads/templates/score_oracle_automl.jinja2 +185 -0
  822. ads/templates/score_pyspark.jinja2 +154 -0
  823. ads/templates/score_pytorch.jinja2 +219 -0
  824. ads/templates/score_scikit-learn.jinja2 +184 -0
  825. ads/templates/score_tensorflow.jinja2 +184 -0
  826. ads/templates/score_xgboost.jinja2 +178 -0
  827. ads/text_dataset/__init__.py +5 -0
  828. ads/text_dataset/backends.py +211 -0
  829. ads/text_dataset/dataset.py +445 -0
  830. ads/text_dataset/extractor.py +207 -0
  831. ads/text_dataset/options.py +53 -0
  832. ads/text_dataset/udfs.py +22 -0
  833. ads/text_dataset/utils.py +49 -0
  834. ads/type_discovery/__init__.py +9 -0
  835. ads/type_discovery/abstract_detector.py +21 -0
  836. ads/type_discovery/constant_detector.py +41 -0
  837. ads/type_discovery/continuous_detector.py +54 -0
  838. ads/type_discovery/credit_card_detector.py +99 -0
  839. ads/type_discovery/datetime_detector.py +92 -0
  840. ads/type_discovery/discrete_detector.py +118 -0
  841. ads/type_discovery/document_detector.py +146 -0
  842. ads/type_discovery/ip_detector.py +68 -0
  843. ads/type_discovery/latlon_detector.py +90 -0
  844. ads/type_discovery/phone_number_detector.py +63 -0
  845. ads/type_discovery/type_discovery_driver.py +87 -0
  846. ads/type_discovery/typed_feature.py +594 -0
  847. ads/type_discovery/unknown_detector.py +41 -0
  848. ads/type_discovery/zipcode_detector.py +48 -0
  849. ads/vault/__init__.py +7 -0
  850. ads/vault/vault.py +237 -0
  851. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/METADATA +150 -150
  852. oracle_ads-2.13.9rc1.dist-info/RECORD +858 -0
  853. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/WHEEL +1 -2
  854. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/entry_points.txt +2 -1
  855. oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
  856. oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
  857. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.9rc1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,995 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2020, 2023 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ from __future__ import absolute_import, print_function
8
+
9
+ import abc
10
+ import importlib
11
+ from collections import defaultdict
12
+ from numbers import Number
13
+ from typing import Tuple, Union
14
+
15
+ import pandas as pd
16
+ from ads.common import utils, logger
17
+ from ads.common.data import ADSData
18
+ from ads.common.decorator.runtime_dependency import (
19
+ runtime_dependency,
20
+ OptionalDependency,
21
+ )
22
+ from ads.dataset import helper
23
+ from ads.dataset.dataset import ADSDataset
24
+ from ads.dataset.feature_engineering_transformer import FeatureEngineeringTransformer
25
+ from ads.dataset.feature_selection import FeatureImportance
26
+ from ads.dataset.helper import (
27
+ DatasetDefaults,
28
+ deprecate_default_value,
29
+ deprecate_variable,
30
+ generate_sample,
31
+ get_target_type,
32
+ is_text_data,
33
+ )
34
+ from ads.dataset.label_encoder import DataFrameLabelEncoder
35
+ from ads.dataset.pipeline import TransformerPipeline
36
+ from ads.dataset.progress import DummyProgressBar
37
+ from ads.dataset.recommendation import Recommendation
38
+ from ads.dataset.recommendation_transformer import RecommendationTransformer
39
+ from ads.dataset.target import TargetVariable
40
+ from ads.type_discovery.typed_feature import (
41
+ CategoricalTypedFeature,
42
+ ContinuousTypedFeature,
43
+ DocumentTypedFeature,
44
+ GISTypedFeature,
45
+ OrdinalTypedFeature,
46
+ TypedFeature,
47
+ DateTimeTypedFeature,
48
+ TypedFeature
49
+ )
50
+ from sklearn.model_selection import train_test_split
51
+ from pandas.io.formats.printing import pprint_thing
52
+ from sklearn.preprocessing import FunctionTransformer
53
+ from abc import ABCMeta
54
+
55
+
56
+ class ADSDatasetWithTarget(ADSDataset, metaclass=ABCMeta):
57
+ """
58
+ This class provides APIs for preparing dataset for modeling.
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ df,
64
+ target,
65
+ sampled_df=None,
66
+ shape=None,
67
+ target_type=None,
68
+ sample_max_rows=-1,
69
+ type_discovery=True,
70
+ types={},
71
+ parent=None,
72
+ name="",
73
+ metadata=None,
74
+ transformer_pipeline=None,
75
+ description=None,
76
+ progress=DummyProgressBar(),
77
+ **kwargs,
78
+ ):
79
+ self.recommendation_transformer = None
80
+ if shape is None:
81
+ shape = df.shape
82
+ if sampled_df is None:
83
+ sampled_df = generate_sample(
84
+ df,
85
+ shape[0],
86
+ DatasetDefaults.sampling_confidence_level,
87
+ DatasetDefaults.sampling_confidence_interval,
88
+ **kwargs,
89
+ )
90
+
91
+ if parent is None:
92
+ cols = sampled_df.columns.tolist()
93
+ cols.insert(0, cols.pop(cols.index(target)))
94
+ ADSDataset.__init__(
95
+ self,
96
+ df,
97
+ sampled_df[[*cols]],
98
+ shape,
99
+ name=name,
100
+ description=description,
101
+ type_discovery=type_discovery,
102
+ types=types,
103
+ progress=progress,
104
+ metadata=metadata,
105
+ transformer_pipeline=transformer_pipeline,
106
+ sample_max_rows=sample_max_rows,
107
+ )
108
+ else:
109
+ self.__dict__ = parent.__dict__.copy()
110
+ cols = self.sampled_df.columns.tolist()
111
+ cols.insert(0, cols.pop(cols.index(target)))
112
+
113
+ self.sampled_df = parent.sampled_df[[*cols]]
114
+
115
+ # if parent has already been built, just reorder the columns to display the plot for target at beginning
116
+ if parent.correlation is None:
117
+ self.corr_futures = parent.corr_futures
118
+ else:
119
+ corr_cols = parent.sampled_df.select_dtypes(
120
+ exclude=["object"]
121
+ ).columns.values.tolist()
122
+ corr_cols.insert(0, corr_cols.pop(corr_cols.index(target)))
123
+ self.correlation = parent.correlation.reindex(corr_cols)[[corr_cols]]
124
+ self.feature_types = parent.feature_types
125
+ self.feature_dist_html_dict = {}
126
+ if len(parent.feature_dist_html_dict) > 0:
127
+ parent_feature_dist_html_dict = parent.feature_dist_html_dict.copy()
128
+ self.feature_dist_html_dict = {
129
+ target: parent_feature_dist_html_dict.pop(target)
130
+ }
131
+ self.feature_dist_html_dict.update(parent_feature_dist_html_dict)
132
+
133
+ # drop all rows where target is nan
134
+ target = target.strip().replace(" ", "_")
135
+
136
+ #
137
+ # as an optimization only dropna and regenerate sample when the target
138
+ # has na values
139
+ #
140
+
141
+ if self.df[target].isna().sum():
142
+ #
143
+ # remove rows for which the target is null
144
+ #
145
+ self.df = self.df.dropna(subset=[target])
146
+
147
+ #
148
+ # we cannot simply drop null values from the sampled_df after a change
149
+ # to the df - we must rebuild the sample from the new df
150
+ #
151
+ self.sampled_df = helper.generate_sample(
152
+ self.df,
153
+ sampled_df.shape[0],
154
+ helper.DatasetDefaults.sampling_confidence_level,
155
+ helper.DatasetDefaults.sampling_confidence_interval,
156
+ )
157
+ #
158
+ # after regenerating the sample we need to move the target back to the head
159
+ #
160
+ cols = self.sampled_df.columns.tolist()
161
+ cols.insert(0, cols.pop(cols.index(target)))
162
+ self.sampled_df = self.sampled_df[[*cols]]
163
+
164
+ if target_type is None:
165
+ target_type = get_target_type(target, sampled_df, **kwargs)
166
+ self.target = TargetVariable(self, target, target_type)
167
+
168
+ # remove target from type discovery conversion
169
+ for step in self.transformer_pipeline.steps:
170
+ if (
171
+ step[0] == "type_discovery"
172
+ and self.target.name in step[1].kw_args["dtypes"]
173
+ ):
174
+ step[1].kw_args["dtypes"].pop(self.target.name)
175
+
176
+ @staticmethod
177
+ def from_dataframe(
178
+ df: pd.DataFrame,
179
+ target: str,
180
+ sampled_df: pd.DataFrame = None,
181
+ shape: Tuple[int, int] = None,
182
+ target_type: TypedFeature = None,
183
+ positive_class=None,
184
+ **init_kwargs,
185
+ ):
186
+ from ads.dataset.classification_dataset import (
187
+ BinaryClassificationDataset,
188
+ BinaryTextClassificationDataset,
189
+ MultiClassClassificationDataset,
190
+ MultiClassTextClassificationDataset
191
+ )
192
+ from ads.dataset.forecasting_dataset import ForecastingDataset
193
+ from ads.dataset.regression_dataset import RegressionDataset
194
+
195
+ if sampled_df is None:
196
+ sampled_df = generate_sample(
197
+ df,
198
+ (shape or df.shape)[0],
199
+ DatasetDefaults.sampling_confidence_level,
200
+ DatasetDefaults.sampling_confidence_interval,
201
+ **init_kwargs,
202
+ )
203
+
204
+ if target not in df:
205
+ raise ValueError(
206
+ f"{target} column doesn't exist in data frame. Specify a valid one instead."
207
+ )
208
+
209
+ if target_type is None:
210
+ target_type = get_target_type(target, sampled_df, **init_kwargs)
211
+
212
+ if len(df[target].dropna()) == 0:
213
+ logger.warning(
214
+ "It is not recommended to use an empty column as the target variable."
215
+ )
216
+ raise ValueError(
217
+ f"We do not support using empty columns as the chosen target"
218
+ )
219
+ if utils.is_same_class(target_type, ContinuousTypedFeature):
220
+ return RegressionDataset(
221
+ df=df,
222
+ sampled_df=sampled_df,
223
+ target=target,
224
+ target_type=target_type,
225
+ shape=shape,
226
+ **init_kwargs,
227
+ )
228
+ elif utils.is_same_class(
229
+ target_type, DateTimeTypedFeature
230
+ ) or df.index.dtype.name.startswith("datetime"):
231
+ return ForecastingDataset(
232
+ df=df,
233
+ sampled_df=sampled_df,
234
+ target=target,
235
+ target_type=target_type,
236
+ shape=shape,
237
+ **init_kwargs,
238
+ )
239
+
240
+ # Adding ordinal typed feature, but ultimately we should rethink how we want to model this type
241
+ elif utils.is_same_class(target_type, CategoricalTypedFeature) or utils.is_same_class(
242
+ target_type, OrdinalTypedFeature
243
+ ):
244
+ if target_type.meta_data["internal"]["unique"] == 2:
245
+ if is_text_data(sampled_df, target):
246
+ return BinaryTextClassificationDataset(
247
+ df=df,
248
+ sampled_df=sampled_df,
249
+ target=target,
250
+ shape=shape,
251
+ target_type=target_type,
252
+ positive_class=positive_class,
253
+ **init_kwargs,
254
+ )
255
+
256
+ return BinaryClassificationDataset(
257
+ df=df,
258
+ sampled_df=sampled_df,
259
+ target=target,
260
+ shape=shape,
261
+ target_type=target_type,
262
+ positive_class=positive_class,
263
+ **init_kwargs,
264
+ )
265
+ else:
266
+ if is_text_data(sampled_df, target):
267
+ return MultiClassTextClassificationDataset(
268
+ df=df,
269
+ sampled_df=sampled_df,
270
+ target=target,
271
+ target_type=target_type,
272
+ shape=shape,
273
+ **init_kwargs,
274
+ )
275
+ return MultiClassClassificationDataset(
276
+ df=df,
277
+ sampled_df=sampled_df,
278
+ target=target,
279
+ target_type=target_type,
280
+ shape=shape,
281
+ **init_kwargs,
282
+ )
283
+ elif (
284
+ utils.is_same_class(target, DocumentTypedFeature)
285
+ or "text" in target_type["type"]
286
+ or "text" in target
287
+ ):
288
+ raise ValueError(
289
+ f"The column {target} cannot be used as the target column."
290
+ )
291
+ elif (
292
+ utils.is_same_class(target_type, GISTypedFeature)
293
+ or "coord" in target_type["type"]
294
+ or "coord" in target
295
+ ):
296
+ raise ValueError(
297
+ f"The column {target} cannot be used as the target column."
298
+ )
299
+ # This is to catch constant columns that are boolean. Added as a fix for pd.isnull(), and datasets with a
300
+ # binary target, but only data on one instance
301
+ elif target_type and target_type["low_level_type"] == "bool":
302
+ return BinaryClassificationDataset(
303
+ df=df,
304
+ sampled_df=sampled_df,
305
+ target=target,
306
+ shape=shape,
307
+ target_type=target_type,
308
+ positive_class=positive_class,
309
+ **init_kwargs,
310
+ )
311
+ raise ValueError(
312
+ f"Unable to identify problem type. Specify the data type of {target} using 'types'. "
313
+ f"For example, types = {{{target}: 'category'}}"
314
+ )
315
+
316
+ def rename_columns(self, columns):
317
+ """
318
+ Returns a dataset with columns renamed.
319
+ """
320
+ if isinstance(columns, list):
321
+ assert len(columns) == len(
322
+ self.columns.values
323
+ ), "columns length do not match the dataset"
324
+ columns = dict(zip(self.columns.values, columns))
325
+ assert isinstance(columns, dict)
326
+ new_target = None
327
+ if self.target.name in columns:
328
+ new_target = columns[self.target.name]
329
+ return self.rename(columns=columns, _new_target=new_target)
330
+
331
+ def select_best_features(self, score_func=None, k=12):
332
+ """
333
+ Return new dataset containing only the top k features.
334
+
335
+ Parameters
336
+ ----------
337
+ k: int, default 12
338
+ The top 'k' features to select.
339
+ score_func: function
340
+ Scoring function to use to rank the features. This scoring function should take a 2d array X(features)
341
+ and an array like y(target) and return a numeric score for each feature in the same order as X.
342
+
343
+ Notes
344
+ -----
345
+ See also https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_regression.html
346
+ and https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.f_classif.html
347
+
348
+ Examples
349
+ --------
350
+ >>> ds = DatasetBrowser("sklearn").open("iris")
351
+ >>> ds_small = ds.select_best_features(k=2)
352
+ """
353
+ tf = self._get_best_features_transformer(score_func=score_func, k=k)
354
+ return self._build_new_dataset(
355
+ tf[1].transform(self.df), tf[1].transform(self.sampled_df), transformers=tf
356
+ )
357
+
358
+ def auto_transform(
359
+ self,
360
+ correlation_threshold: float = 0.7,
361
+ frac: float = 1.0,
362
+ sample_size=1.0,
363
+ correlation_methods: Union[str, list] = "pearson",
364
+ ):
365
+ """
366
+ Return transformed dataset with several optimizations applied automatically.
367
+ The optimizations include:
368
+
369
+ - Dropping constant and primary key columns, which has no predictive quality,
370
+ - Imputation, to fill in missing values in noisy data:
371
+
372
+ - For continuous variables, fill with mean if less than 40% is missing, else drop,
373
+ - For categorical variables, fill with most frequent if less than 40% is missing, else drop,
374
+
375
+ - Dropping strongly co-correlated columns that tend to produce less generalizable models.
376
+
377
+ Parameters
378
+ ----------
379
+ correlation_threshold: float, defaults to 0.7. It must be between 0 and 1, inclusive
380
+ the correlation threshold where columns with correlation higher than the threshold will
381
+ be considered as strongly co-correlated and recommended to be taken care of.
382
+ frac: Is superseded by sample_size
383
+ sample_size: float, defaults to 1.0. Float, Range -> (0, 1]
384
+ What fraction of the data should be used in the calculation?
385
+ correlation_methods: Union[list, str], defaults to 'pearson'
386
+
387
+ - 'pearson': Use Pearson's Correlation between continuous features,
388
+ - 'cramers v': Use Cramer's V correlations between categorical features,
389
+ - 'correlation ratio': Use Correlation Ratio Correlation between categorical and continuous features,
390
+ - 'all': Is equivalent to ['pearson', 'cramers v', 'correlation ratio'].
391
+
392
+ Or a list containing any combination of these methods, for example, ['pearson', 'cramers v'].
393
+
394
+ Returns
395
+ -------
396
+ transformed_dataset : ADSDatasetWithTarget
397
+
398
+ Examples
399
+ --------
400
+ >>> ds_clean = ds.auto_transform()
401
+ """
402
+ frac = deprecate_default_value(
403
+ frac,
404
+ None,
405
+ 1,
406
+ "<code>frac=None</code> is deprecated. Use <code>sample_size=1.0</code> instead.",
407
+ FutureWarning,
408
+ )
409
+
410
+ if frac != 1.0:
411
+ deprecate_frac = deprecate_variable(
412
+ frac,
413
+ sample_size,
414
+ "<code>frac</code> is superseded by <code>sample_size</code>.",
415
+ DeprecationWarning,
416
+ )
417
+ if sample_size == 1.0:
418
+ sample_size = deprecate_frac
419
+
420
+ if correlation_threshold > 1 or correlation_threshold < 0:
421
+ raise AssertionError("correlation_threshold has to be between 0 and 1.")
422
+ with utils.get_progress_bar(5) as progress:
423
+ df, sampled_df, transformer_pipeline = self._transform(
424
+ progress=progress,
425
+ correlation_threshold=correlation_threshold,
426
+ frac=sample_size,
427
+ correlation_methods=correlation_methods,
428
+ )
429
+ return self._build_new_dataset(
430
+ df,
431
+ sampled_df=sampled_df,
432
+ transformers=transformer_pipeline.steps,
433
+ progress=progress,
434
+ )
435
+
436
+ def visualize_transforms(self):
437
+ """
438
+ Render a representation of the dataset's transform DAG.
439
+ """
440
+
441
+ helper.visualize_transformation(
442
+ self.transformer_pipeline,
443
+ text="- rows: {}\\l- columns: {}\\l".format(*self.shape),
444
+ )
445
+
446
+ def _suggested_code(self, action, recommendation_type, variable):
447
+ if action == "Drop":
448
+ return ".drop_columns([{}])".format('"' + variable + '"')
449
+ if action == "Do nothing":
450
+ return ""
451
+ if "Drop " in action:
452
+ return ".drop_columns([{}])".format('"' + action.split(" ")[1] + '"')
453
+ if action == "Down-sample":
454
+ return ".down_sample()"
455
+ if action == "Up-sample":
456
+ if importlib.util.find_spec("imblearn") is None:
457
+ return ".up_sample(sampler='default') \\n `pip install imbalanced-learn` to use default up-sampler."
458
+ else:
459
+ return ".up_sample(sampler='default')"
460
+ if recommendation_type == "positive_class" and action != "Do nothing":
461
+ return ".set_positive_class({}, missing_value=False)".format(
462
+ '"' + action + '"'
463
+ )
464
+ if recommendation_type == "imputation":
465
+ fill_val = helper.get_fill_val(
466
+ self.feature_types, variable, action, constant="constant"
467
+ )
468
+
469
+ fill_val = (
470
+ fill_val if isinstance(fill_val, Number) else '"' + fill_val + '"'
471
+ )
472
+ return ".fillna({}{}: {}{})".format(
473
+ "{", '"' + variable + '"', fill_val, "}"
474
+ )
475
+ else:
476
+ return ""
477
+
478
+ def suggest_recommendations(
479
+ self,
480
+ correlation_methods: Union[str, list] = "pearson",
481
+ print_code: bool = True,
482
+ correlation_threshold: float = 0.7,
483
+ overwrite: bool = None,
484
+ force_recompute: bool = False,
485
+ frac: float = 1.0,
486
+ sample_size: float = 1.0,
487
+ **kwargs,
488
+ ):
489
+ """
490
+ Returns a pandas dataframe with suggestions for dataset optimization. This includes:
491
+
492
+ - Identifying constant and primary key columns, which has no predictive quality,
493
+ - Imputation, to fill in missing values in noisy data:
494
+
495
+ - For continuous variables, fill with mean if less than 40% is missing, else drop,
496
+ - For categorical variables, fill with most frequent if less than 40% is missing, else drop,
497
+
498
+ - Identifying strongly co-correlated columns that tend to produce less generalizable models,
499
+ - Automatically balancing dataset for classification problems using up or down sampling.
500
+
501
+ Parameters
502
+ ----------
503
+ correlation_methods: Union[list, str], default to 'pearson'
504
+
505
+ - 'pearson': Use Pearson's Correlation between continuous features,
506
+ - 'cramers v': Use Cramer's V correlations between categorical features,
507
+ - 'correlation ratio': Use Correlation Ratio Correlation between categorical and continuous features,
508
+ - 'all': Is equivalent to ['pearson', 'cramers v', 'correlation ratio'].
509
+
510
+ Or a list containing any combination of these methods, for example, ['pearson', 'cramers v']
511
+ print_code: bool, Defaults to True
512
+ Print Python code for the suggested actions.
513
+ correlation_threshold: float. Defaults to 0.7. It must be between 0 and 1, inclusive
514
+ the correlation threshold where columns with correlation higher than the threshold will
515
+ be considered as strongly co-correated and recommended to be taken care of.
516
+ frac: Is superseded by sample_size
517
+ sample_size: float, defaults to 1.0. Float, Range -> (0, 1]
518
+ What fraction of the data should be used in the calculation?
519
+ overwrite:
520
+ Is deprecated and replaced by force_recompute.
521
+ force_recompute: bool, default to be False
522
+
523
+ - If False, it calculates the correlation matrix if there is no cached correlation matrix. Otherwise,
524
+ it returns the cached correlation matrix.
525
+ - If True, it calculates the correlation matrix regardless whether there is cached result or not.
526
+
527
+ Returns
528
+ -------
529
+ suggestion dataframe : pandas.DataFrame
530
+
531
+ Examples
532
+ --------
533
+ >>> suggestion_df = ds.suggest_recommendations(correlation_threshold=0.7)
534
+ """
535
+ frac = deprecate_default_value(
536
+ frac,
537
+ None,
538
+ 1,
539
+ "<code>frac=None</code> is deprecated. Use <code>sample_size=1.0</code>.",
540
+ FutureWarning,
541
+ )
542
+
543
+ if frac != 1.0:
544
+ deprecate_frac = deprecate_variable(
545
+ frac,
546
+ sample_size,
547
+ "<code>frac</code> is superseded by <code>sample_size</code>.",
548
+ DeprecationWarning,
549
+ )
550
+ if sample_size == 1.0:
551
+ sample_size = deprecate_frac
552
+
553
+ force_recompute = deprecate_variable(
554
+ overwrite,
555
+ force_recompute,
556
+ f"<code>overwrite=None</code> is deprecated. Use <code>force_recompute</code> instead.",
557
+ DeprecationWarning,
558
+ )
559
+
560
+ recommended = self._get_recommendations_transformer(
561
+ auto_transform=False,
562
+ correlation_threshold=correlation_threshold,
563
+ correlation_methods=correlation_methods,
564
+ force_recompute=force_recompute,
565
+ frac=sample_size,
566
+ **kwargs,
567
+ ).fit(self.sampled_df)
568
+
569
+ if len(recommended.reco_dict_) == 0:
570
+ logger.info("No recommendations.")
571
+ return pd.DataFrame()
572
+
573
+ column_names = [
574
+ "Message",
575
+ "Variables",
576
+ "Action",
577
+ "Selected Action",
578
+ "Recommendation Type",
579
+ ]
580
+
581
+ df_dict = defaultdict(list)
582
+
583
+ for recommendation_type, column_dict in recommended.reco_dict_.items():
584
+ if recommendation_type == "constant_column":
585
+ n_constant = len(column_dict)
586
+ df_dict["Recommendation Type"].extend(
587
+ [recommendation_type] * n_constant
588
+ )
589
+ df_dict["Variables"].extend(column_dict)
590
+ df_dict["Message"].extend(["Constant Column"] * n_constant)
591
+ df_dict["Action"].extend(["Drop"] * n_constant)
592
+ df_dict["Selected Action"].extend(["Drop"] * n_constant)
593
+ continue
594
+
595
+ for column, details_dict in column_dict.items():
596
+ max_length = len(details_dict["Action"])
597
+ for key, value in details_dict.items():
598
+ if isinstance(value, list):
599
+ df_dict[key].extend(value)
600
+ else:
601
+ df_dict[key].extend([value] * max_length)
602
+ df_dict["Recommendation Type"].extend(
603
+ [recommendation_type] * max_length
604
+ )
605
+ df_dict["Variables"].extend([column] * max_length)
606
+
607
+ suggestions_df = pd.DataFrame.from_dict(df_dict)[column_names]
608
+ suggestions_df["Code"] = suggestions_df.apply(
609
+ lambda x: self._suggested_code(
610
+ x["Action"], x["Recommendation Type"], x["Variables"]
611
+ ),
612
+ axis=1,
613
+ )
614
+ suggestion_df = (
615
+ suggestions_df.drop(columns=["Recommendation Type"])
616
+ .rename(columns={"Selected Action": "Suggested"})
617
+ .set_index(["Message", "Variables", "Suggested", "Action"])
618
+ .fillna("")
619
+ )
620
+ if print_code:
621
+ columns_to_impute = {}
622
+ columns_to_drop = []
623
+ consolidated_code = ""
624
+ suggestion_df_ = suggestion_df.reset_index()
625
+ suggested_code = suggestion_df_.loc[
626
+ suggestion_df_.Suggested == suggestion_df_.Action
627
+ ].Code.unique()
628
+ for code in suggested_code:
629
+ if ".drop_columns" in code:
630
+ columns_to_drop.append(code.split("[")[1].split("]")[0][1:-1])
631
+ elif ".fillna" in code:
632
+ impute_pair = code.split("{")[1].split("}")[0]
633
+ columns_to_impute[impute_pair.split(":")[0].replace('"', "")] = (
634
+ float(impute_pair.split(":")[1].strip())
635
+ if impute_pair.split(":")[1].strip().replace(".", "").isdigit()
636
+ else impute_pair.split(":")[1].strip().replace('"', "")
637
+ )
638
+ else:
639
+ consolidated_code += code
640
+ consolidated_code = (
641
+ "No more!" if len(consolidated_code) == 0 else consolidated_code
642
+ )
643
+
644
+ logger.info(f"Suggested columns to drop: {columns_to_drop}.")
645
+ logger.info(f"Suggested columns to impute: {columns_to_impute}.")
646
+ logger.info(f"Others: {consolidated_code}.")
647
+
648
+ return suggestion_df
649
+
650
+ @runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
651
+ def get_recommendations(
652
+ self,
653
+ correlation_methods: str = "pearson",
654
+ correlation_threshold: float = 0.7,
655
+ frac: float = 1.0,
656
+ sample_size: float = 1.0,
657
+ overwrite: bool = None,
658
+ force_recompute: bool = False,
659
+ display_format: str = "widget",
660
+ ):
661
+ """
662
+ Generate recommendations for dataset optimization. This includes:
663
+
664
+ - Identifying constant and primary key columns, which has no predictive quality,
665
+ - Imputation, to fill in missing values in noisy data:
666
+
667
+ - For continuous variables, fill with mean if less than 40% is missing, else drop,
668
+ - For categorical variables, fill with most frequent if less than 40% is missing, else drop,
669
+
670
+ - Identifying strongly co-correlated columns that tend to produce less generalizable models,
671
+ - Automatically balancing dataset for classification problems using up or down sampling.
672
+
673
+ Parameters
674
+ ----------
675
+ correlation_methods: Union[list, str], default to 'pearson'
676
+
677
+ - 'pearson': Use Pearson's Correlation between continuous features,
678
+ - 'cramers v': Use Cramer's V correlations between categorical features,
679
+ - 'correlation ratio': Use Correlation Ratio Correlation between categorical and continuous features,
680
+ - 'all': Is equivalent to ['pearson', 'cramers v', 'correlation ratio'].
681
+
682
+ Or a list containing any combination of these methods, for example, ['pearson', 'cramers v'].
683
+ correlation_threshold: float, defaults to 0.7. It must be between 0 and 1, inclusive
684
+ The correlation threshold where columns with correlation higher than the threshold will
685
+ be considered as strongly co-correlated and recommended to be taken care of.
686
+ frac: Is superseded by sample_size
687
+ sample_size: float, defaults to 1.0. Float, Range -> (0, 1]
688
+ What fraction of the data should be used in the calculation?
689
+ overwrite:
690
+ Is deprecated and replaced by force_recompute.
691
+ force_recompute: bool, default to be False
692
+
693
+ - If False, it calculates the correlation matrix if there is no cached correlation matrix. Otherwise,
694
+ it returns the cached correlation matrix.
695
+ - If True, it calculates the correlation matrix regardless whether there is cached result or not.
696
+
697
+ display_format: string, defaults to 'widget'.
698
+ Should be either 'widget' or 'table'. If 'widget',
699
+ a GUI style interface is popped out; if 'table', a table of suggestions is shown.
700
+ """
701
+ frac = deprecate_default_value(
702
+ frac,
703
+ None,
704
+ 1,
705
+ "<code>frac=None</code> is superseded by <code>sample_size=1.0</code>.",
706
+ FutureWarning,
707
+ )
708
+
709
+ if frac != 1.0:
710
+ deprecate_frac = deprecate_variable(
711
+ frac,
712
+ sample_size,
713
+ "<code>frac</code> is superseded by <code>sample_size</code>.",
714
+ DeprecationWarning,
715
+ )
716
+ if sample_size == 1.0:
717
+ sample_size = deprecate_frac
718
+
719
+ force_recompute = deprecate_variable(
720
+ overwrite,
721
+ force_recompute,
722
+ f"<code>overwrite=None</code> is deprecated. Use <code>force_recompute</code> instead.",
723
+ DeprecationWarning,
724
+ )
725
+
726
+ if display_format == "widget":
727
+ recommended = Recommendation(
728
+ self,
729
+ self._get_recommendations_transformer(
730
+ auto_transform=False,
731
+ correlation_threshold=correlation_threshold,
732
+ correlation_methods=correlation_methods,
733
+ frac=sample_size,
734
+ force_recompute=force_recompute,
735
+ ).fit(self.sampled_df),
736
+ )
737
+
738
+ if len(recommended.reco_dict) == 0:
739
+ logger.info("No recommendations.")
740
+
741
+ return recommended
742
+
743
+ elif display_format == "table":
744
+ df_suggestion = self.suggest_recommendations(
745
+ correlation_threshold=correlation_threshold,
746
+ frac=sample_size,
747
+ force_recompute=force_recompute,
748
+ )
749
+
750
+ from IPython.display import HTML, display
751
+
752
+ display(
753
+ HTML(
754
+ df_suggestion.to_html()
755
+ .replace(" `", "<code>")
756
+ .replace("` ", "</code>")
757
+ .replace("\\n", "<br>")
758
+ )
759
+ )
760
+
761
+ def get_transformed_dataset(self):
762
+ """
763
+ Return the transformed dataset with the recommendations applied.
764
+
765
+ This method should be called after applying the recommendations using the Recommendation#show_in_notebook() API.
766
+ """
767
+ if hasattr(self, "new_ds"):
768
+ return self.new_ds
769
+ logger.info(
770
+ "Use `get_recommendations()` to view or update recommendation or `auto_tranform()` first."
771
+ )
772
+ logger.warning(
773
+ "`get_transformed_dataset` is deprecated and will be removed in a future release."
774
+ )
775
+ return
776
+
777
+ def type_of_target(self):
778
+ """
779
+ Return the target type for the dataset.
780
+
781
+ Returns
782
+ -------
783
+ target_type: TypedFeature
784
+ an object of TypedFeature
785
+
786
+ Examples
787
+ --------
788
+ >>> ds = ds.set_target('target_class')
789
+ >>> assert(ds.type_of_target() == 'categorical')
790
+ """
791
+ return self.target.type
792
+
793
+ def train_test_split(self, test_size=0.1, random_state=utils.random_state):
794
+ """
795
+ Splits dataset to train and test data.
796
+
797
+ Parameters
798
+ ----------
799
+ test_size: Union[float, int], optional, default=0.1
800
+ random_state: Union[int, RandomState], optional, default=None
801
+
802
+ - If int, random_state is the seed used by the random number generator;
803
+ - If RandomState instance, random_state is the random number generator;
804
+ - If None, the random number generator is the RandomState instance used by np.random.
805
+
806
+ Returns
807
+ -------
808
+ train_data, test_data: tuple
809
+ tuple of ADSData instances
810
+
811
+ Examples
812
+ --------
813
+ >>> ds = DatasetFactory.open("data.csv")
814
+ >>> train, test = ds.train_test_split()
815
+ """
816
+ X_train, X_test, y_train, y_test = train_test_split(
817
+ self.df.drop(self.target.name, axis=1),
818
+ self.df[self.target.name],
819
+ test_size=test_size,
820
+ train_size=1 - test_size,
821
+ random_state=random_state,
822
+ )
823
+ train = ADSData.build(
824
+ X=X_train, y=y_train, name="Train Data", dataset_type=self.__class__
825
+ )
826
+ train.transformer_pipeline = self.transformer_pipeline
827
+ test = ADSData.build(
828
+ X=X_test, y=y_test, name="Test Data", dataset_type=self.__class__
829
+ )
830
+ return train, test
831
+
832
+ def train_validation_test_split(
833
+ self, test_size=0.1, validation_size=0.1, random_state=utils.random_state
834
+ ):
835
+ """
836
+ Splits dataset to train, validation and test data.
837
+
838
+ Parameters
839
+ ----------
840
+ test_size: Union[float, int], optional, default=0.1
841
+ validation_size: Union[float, int], optional, default=0.1
842
+ random_state: Union[int, RandomState], optional, default=None
843
+
844
+ - If int, random_state is the seed used by the random number generator;
845
+ - If RandomState instance, random_state is the random number generator;
846
+ - If None, the random number generator is the RandomState instance used by np.random.
847
+
848
+ Returns
849
+ -------
850
+ train_data, validation_data, test_data: tuple
851
+ tuple of ADSData instances
852
+
853
+ Examples
854
+ --------
855
+ >>> ds = DatasetFactory.open("data.csv")
856
+ >>> train, valid, test = ds.train_validation_test_split()
857
+ """
858
+ train, test = self.train_test_split(
859
+ test_size=test_size, random_state=random_state
860
+ )
861
+ X_train, X_valid, y_train, y_valid = train_test_split(
862
+ train.X, train.y, test_size=validation_size, random_state=random_state
863
+ )
864
+ train.X = X_train
865
+ train.y = y_train
866
+ valid = ADSData.build(
867
+ X=X_valid, y=y_valid, name="Validation Data", dataset_type=self.__class__
868
+ )
869
+ return train, valid, test
870
+
871
+ """
872
+ Internal methods
873
+ """
874
+
875
+ def __repr__(self):
876
+ rows, cols = self.shape
877
+ return f"{self.__class__.__name__}(target: {self.target.name}) {rows:,} rows, {cols:,} columns"
878
+
879
+ def _transform(
880
+ self,
881
+ progress=DummyProgressBar(),
882
+ fix_imbalance=True,
883
+ correlation_threshold=0.7,
884
+ frac=None,
885
+ correlation_methods="pearson",
886
+ ):
887
+ progress.update("Building the transformer pipeline")
888
+ if self.recommendation_transformer is None:
889
+ transformer_pipeline = TransformerPipeline(
890
+ steps=[
891
+ (
892
+ "recommendations",
893
+ self._get_recommendations_transformer(
894
+ fix_imbalance=fix_imbalance,
895
+ correlation_threshold=correlation_threshold,
896
+ frac=frac,
897
+ correlation_methods=correlation_methods,
898
+ ),
899
+ ),
900
+ (
901
+ "feature_engineering",
902
+ FeatureEngineeringTransformer(
903
+ feature_metadata=self.feature_types
904
+ ),
905
+ ),
906
+ ]
907
+ )
908
+ else:
909
+ # recommendations are already generated using get_recommendations().show_in_notebook() API
910
+ transformer_pipeline = TransformerPipeline(
911
+ steps=[
912
+ (
913
+ "feature_engineering",
914
+ FeatureEngineeringTransformer(
915
+ feature_metadata=self.feature_types
916
+ ),
917
+ )
918
+ ]
919
+ )
920
+ transformer_pipeline.steps = [
921
+ ("recommendations", self.recommendation_transformer)
922
+ ] + transformer_pipeline.steps
923
+ sampled_df = self.sampled_df.copy()
924
+ self.recommendation_transformer = None
925
+ df = self.df.copy()
926
+ for step in transformer_pipeline.steps:
927
+ progress.update("Applying transformation for %s" % step[0])
928
+ sampled_df = step[1].fit_transform(sampled_df)
929
+ df = step[1].transform(df, progress=progress, fit_transform=True)
930
+ return df, sampled_df, transformer_pipeline
931
+
932
+ def _get_best_features(self, score_func=None, k=12):
933
+ if isinstance(self.target.type, DateTimeTypedFeature):
934
+ return FeatureImportance._get_feature_ranking(
935
+ self.sampled_df.copy(),
936
+ self.target.name,
937
+ self.type_of_target(),
938
+ score_func=score_func,
939
+ k=k,
940
+ )
941
+ else:
942
+ return FeatureImportance._get_feature_ranking(
943
+ self.sampled_df.copy(),
944
+ self.target.name,
945
+ self.type_of_target(),
946
+ score_func=score_func,
947
+ k=k,
948
+ )
949
+
950
+ def _get_best_features_transformer(self, score_func=None, k=12):
951
+ feature_set = self._get_best_features(k=k, score_func=score_func)[
952
+ "features"
953
+ ].tolist()
954
+
955
+ def _select_features(df, feature_names, target):
956
+ if target in df.columns:
957
+ feature_names = feature_names + [target]
958
+ return df[feature_names]
959
+
960
+ return (
961
+ "select_{0}_best_features".format(k),
962
+ FunctionTransformer(
963
+ func=_select_features,
964
+ validate=False,
965
+ kw_args={"feature_names": feature_set, "target": self.target.name},
966
+ ).fit(self.sampled_df),
967
+ )
968
+
969
+ def _get_recommendations_transformer(
970
+ self,
971
+ fix_imbalance=True,
972
+ auto_transform=True,
973
+ correlation_threshold=0.7,
974
+ **kwargs,
975
+ ):
976
+ force_recompute = kwargs.pop("force_recompute", False)
977
+ frac = kwargs.pop("frac", 1)
978
+ correlation_methods = kwargs.pop("correlation_methods", "pearson")
979
+ return RecommendationTransformer(
980
+ feature_metadata=self.feature_types,
981
+ correlation=self.corr(
982
+ force_recompute=force_recompute,
983
+ frac=frac,
984
+ correlation_methods=correlation_methods,
985
+ **kwargs,
986
+ ),
987
+ target=self.target.name,
988
+ target_type=self.target.type,
989
+ is_balanced=self.target.is_balanced(),
990
+ feature_ranking=self._get_best_features(k=len(self.sampled_df)),
991
+ fix_imbalance=fix_imbalance,
992
+ len=self.__len__(),
993
+ auto_transform=auto_transform,
994
+ correlation_threshold=correlation_threshold,
995
+ )