oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (858) hide show
  1. ads/aqua/__init__.py +40 -0
  2. ads/aqua/app.py +507 -0
  3. ads/aqua/cli.py +96 -0
  4. ads/aqua/client/__init__.py +3 -0
  5. ads/aqua/client/client.py +836 -0
  6. ads/aqua/client/openai_client.py +305 -0
  7. ads/aqua/common/__init__.py +5 -0
  8. ads/aqua/common/decorator.py +125 -0
  9. ads/aqua/common/entities.py +274 -0
  10. ads/aqua/common/enums.py +134 -0
  11. ads/aqua/common/errors.py +109 -0
  12. ads/aqua/common/utils.py +1295 -0
  13. ads/aqua/config/__init__.py +4 -0
  14. ads/aqua/config/container_config.py +247 -0
  15. ads/aqua/config/evaluation/__init__.py +4 -0
  16. ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
  17. ads/aqua/config/utils/__init__.py +4 -0
  18. ads/aqua/config/utils/serializer.py +339 -0
  19. ads/aqua/constants.py +116 -0
  20. ads/aqua/data.py +14 -0
  21. ads/aqua/dummy_data/icon.txt +1 -0
  22. ads/aqua/dummy_data/oci_model_deployments.json +56 -0
  23. ads/aqua/dummy_data/oci_models.json +1 -0
  24. ads/aqua/dummy_data/readme.md +26 -0
  25. ads/aqua/evaluation/__init__.py +8 -0
  26. ads/aqua/evaluation/constants.py +53 -0
  27. ads/aqua/evaluation/entities.py +186 -0
  28. ads/aqua/evaluation/errors.py +70 -0
  29. ads/aqua/evaluation/evaluation.py +1814 -0
  30. ads/aqua/extension/__init__.py +42 -0
  31. ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
  32. ads/aqua/extension/base_handler.py +90 -0
  33. ads/aqua/extension/common_handler.py +121 -0
  34. ads/aqua/extension/common_ws_msg_handler.py +36 -0
  35. ads/aqua/extension/deployment_handler.py +381 -0
  36. ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
  37. ads/aqua/extension/errors.py +30 -0
  38. ads/aqua/extension/evaluation_handler.py +129 -0
  39. ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
  40. ads/aqua/extension/finetune_handler.py +96 -0
  41. ads/aqua/extension/model_handler.py +390 -0
  42. ads/aqua/extension/models/__init__.py +0 -0
  43. ads/aqua/extension/models/ws_models.py +145 -0
  44. ads/aqua/extension/models_ws_msg_handler.py +50 -0
  45. ads/aqua/extension/ui_handler.py +300 -0
  46. ads/aqua/extension/ui_websocket_handler.py +130 -0
  47. ads/aqua/extension/utils.py +133 -0
  48. ads/aqua/finetuning/__init__.py +7 -0
  49. ads/aqua/finetuning/constants.py +23 -0
  50. ads/aqua/finetuning/entities.py +181 -0
  51. ads/aqua/finetuning/finetuning.py +749 -0
  52. ads/aqua/model/__init__.py +8 -0
  53. ads/aqua/model/constants.py +60 -0
  54. ads/aqua/model/entities.py +385 -0
  55. ads/aqua/model/enums.py +32 -0
  56. ads/aqua/model/model.py +2134 -0
  57. ads/aqua/model/utils.py +52 -0
  58. ads/aqua/modeldeployment/__init__.py +6 -0
  59. ads/aqua/modeldeployment/constants.py +10 -0
  60. ads/aqua/modeldeployment/deployment.py +1315 -0
  61. ads/aqua/modeldeployment/entities.py +653 -0
  62. ads/aqua/modeldeployment/utils.py +543 -0
  63. ads/aqua/resources/gpu_shapes_index.json +94 -0
  64. ads/aqua/server/__init__.py +4 -0
  65. ads/aqua/server/__main__.py +24 -0
  66. ads/aqua/server/app.py +47 -0
  67. ads/aqua/server/aqua_spec.yml +1291 -0
  68. ads/aqua/training/__init__.py +4 -0
  69. ads/aqua/training/exceptions.py +476 -0
  70. ads/aqua/ui.py +519 -0
  71. ads/automl/__init__.py +9 -0
  72. ads/automl/driver.py +330 -0
  73. ads/automl/provider.py +975 -0
  74. ads/bds/__init__.py +5 -0
  75. ads/bds/auth.py +127 -0
  76. ads/bds/big_data_service.py +255 -0
  77. ads/catalog/__init__.py +19 -0
  78. ads/catalog/model.py +1576 -0
  79. ads/catalog/notebook.py +461 -0
  80. ads/catalog/project.py +468 -0
  81. ads/catalog/summary.py +178 -0
  82. ads/common/__init__.py +11 -0
  83. ads/common/analyzer.py +65 -0
  84. ads/common/artifact/.model-ignore +63 -0
  85. ads/common/artifact/__init__.py +10 -0
  86. ads/common/auth.py +1122 -0
  87. ads/common/card_identifier.py +83 -0
  88. ads/common/config.py +647 -0
  89. ads/common/data.py +165 -0
  90. ads/common/decorator/__init__.py +9 -0
  91. ads/common/decorator/argument_to_case.py +88 -0
  92. ads/common/decorator/deprecate.py +69 -0
  93. ads/common/decorator/require_nonempty_arg.py +65 -0
  94. ads/common/decorator/runtime_dependency.py +178 -0
  95. ads/common/decorator/threaded.py +97 -0
  96. ads/common/decorator/utils.py +35 -0
  97. ads/common/dsc_file_system.py +303 -0
  98. ads/common/error.py +14 -0
  99. ads/common/extended_enum.py +81 -0
  100. ads/common/function/__init__.py +5 -0
  101. ads/common/function/fn_util.py +142 -0
  102. ads/common/function/func_conf.yaml +25 -0
  103. ads/common/ipython.py +76 -0
  104. ads/common/model.py +679 -0
  105. ads/common/model_artifact.py +1759 -0
  106. ads/common/model_artifact_schema.json +107 -0
  107. ads/common/model_export_util.py +664 -0
  108. ads/common/model_metadata.py +24 -0
  109. ads/common/object_storage_details.py +296 -0
  110. ads/common/oci_client.py +179 -0
  111. ads/common/oci_datascience.py +46 -0
  112. ads/common/oci_logging.py +1144 -0
  113. ads/common/oci_mixin.py +957 -0
  114. ads/common/oci_resource.py +136 -0
  115. ads/common/serializer.py +559 -0
  116. ads/common/utils.py +1852 -0
  117. ads/common/word_lists.py +1491 -0
  118. ads/common/work_request.py +189 -0
  119. ads/config.py +1 -0
  120. ads/data_labeling/__init__.py +13 -0
  121. ads/data_labeling/boundingbox.py +253 -0
  122. ads/data_labeling/constants.py +47 -0
  123. ads/data_labeling/data_labeling_service.py +244 -0
  124. ads/data_labeling/interface/__init__.py +5 -0
  125. ads/data_labeling/interface/loader.py +16 -0
  126. ads/data_labeling/interface/parser.py +16 -0
  127. ads/data_labeling/interface/reader.py +23 -0
  128. ads/data_labeling/loader/__init__.py +5 -0
  129. ads/data_labeling/loader/file_loader.py +241 -0
  130. ads/data_labeling/metadata.py +110 -0
  131. ads/data_labeling/mixin/__init__.py +5 -0
  132. ads/data_labeling/mixin/data_labeling.py +232 -0
  133. ads/data_labeling/ner.py +129 -0
  134. ads/data_labeling/parser/__init__.py +5 -0
  135. ads/data_labeling/parser/dls_record_parser.py +388 -0
  136. ads/data_labeling/parser/export_metadata_parser.py +94 -0
  137. ads/data_labeling/parser/export_record_parser.py +473 -0
  138. ads/data_labeling/reader/__init__.py +5 -0
  139. ads/data_labeling/reader/dataset_reader.py +574 -0
  140. ads/data_labeling/reader/dls_record_reader.py +121 -0
  141. ads/data_labeling/reader/export_record_reader.py +62 -0
  142. ads/data_labeling/reader/jsonl_reader.py +75 -0
  143. ads/data_labeling/reader/metadata_reader.py +203 -0
  144. ads/data_labeling/reader/record_reader.py +263 -0
  145. ads/data_labeling/record.py +52 -0
  146. ads/data_labeling/visualizer/__init__.py +5 -0
  147. ads/data_labeling/visualizer/image_visualizer.py +525 -0
  148. ads/data_labeling/visualizer/text_visualizer.py +357 -0
  149. ads/database/__init__.py +5 -0
  150. ads/database/connection.py +338 -0
  151. ads/dataset/__init__.py +10 -0
  152. ads/dataset/capabilities.md +51 -0
  153. ads/dataset/classification_dataset.py +339 -0
  154. ads/dataset/correlation.py +226 -0
  155. ads/dataset/correlation_plot.py +563 -0
  156. ads/dataset/dask_series.py +173 -0
  157. ads/dataset/dataframe_transformer.py +110 -0
  158. ads/dataset/dataset.py +1979 -0
  159. ads/dataset/dataset_browser.py +360 -0
  160. ads/dataset/dataset_with_target.py +995 -0
  161. ads/dataset/exception.py +25 -0
  162. ads/dataset/factory.py +987 -0
  163. ads/dataset/feature_engineering_transformer.py +35 -0
  164. ads/dataset/feature_selection.py +107 -0
  165. ads/dataset/forecasting_dataset.py +26 -0
  166. ads/dataset/helper.py +1450 -0
  167. ads/dataset/label_encoder.py +99 -0
  168. ads/dataset/mixin/__init__.py +5 -0
  169. ads/dataset/mixin/dataset_accessor.py +134 -0
  170. ads/dataset/pipeline.py +58 -0
  171. ads/dataset/plot.py +710 -0
  172. ads/dataset/progress.py +86 -0
  173. ads/dataset/recommendation.py +297 -0
  174. ads/dataset/recommendation_transformer.py +502 -0
  175. ads/dataset/regression_dataset.py +14 -0
  176. ads/dataset/sampled_dataset.py +1050 -0
  177. ads/dataset/target.py +98 -0
  178. ads/dataset/timeseries.py +18 -0
  179. ads/dbmixin/__init__.py +5 -0
  180. ads/dbmixin/db_pandas_accessor.py +153 -0
  181. ads/environment/__init__.py +9 -0
  182. ads/environment/ml_runtime.py +66 -0
  183. ads/evaluations/README.md +14 -0
  184. ads/evaluations/__init__.py +109 -0
  185. ads/evaluations/evaluation_plot.py +983 -0
  186. ads/evaluations/evaluator.py +1334 -0
  187. ads/evaluations/statistical_metrics.py +543 -0
  188. ads/experiments/__init__.py +9 -0
  189. ads/experiments/capabilities.md +0 -0
  190. ads/explanations/__init__.py +21 -0
  191. ads/explanations/base_explainer.py +142 -0
  192. ads/explanations/capabilities.md +83 -0
  193. ads/explanations/explainer.py +190 -0
  194. ads/explanations/mlx_global_explainer.py +1050 -0
  195. ads/explanations/mlx_interface.py +386 -0
  196. ads/explanations/mlx_local_explainer.py +287 -0
  197. ads/explanations/mlx_whatif_explainer.py +201 -0
  198. ads/feature_engineering/__init__.py +20 -0
  199. ads/feature_engineering/accessor/__init__.py +5 -0
  200. ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
  201. ads/feature_engineering/accessor/mixin/__init__.py +5 -0
  202. ads/feature_engineering/accessor/mixin/correlation.py +166 -0
  203. ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
  204. ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
  205. ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
  206. ads/feature_engineering/accessor/mixin/utils.py +65 -0
  207. ads/feature_engineering/accessor/series_accessor.py +431 -0
  208. ads/feature_engineering/adsimage/__init__.py +5 -0
  209. ads/feature_engineering/adsimage/image.py +192 -0
  210. ads/feature_engineering/adsimage/image_reader.py +170 -0
  211. ads/feature_engineering/adsimage/interface/__init__.py +5 -0
  212. ads/feature_engineering/adsimage/interface/reader.py +19 -0
  213. ads/feature_engineering/adsstring/__init__.py +7 -0
  214. ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
  215. ads/feature_engineering/adsstring/string/__init__.py +8 -0
  216. ads/feature_engineering/data_schema.json +57 -0
  217. ads/feature_engineering/dataset/__init__.py +5 -0
  218. ads/feature_engineering/dataset/zip_code_data.py +42062 -0
  219. ads/feature_engineering/exceptions.py +40 -0
  220. ads/feature_engineering/feature_type/__init__.py +133 -0
  221. ads/feature_engineering/feature_type/address.py +184 -0
  222. ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
  223. ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
  224. ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
  225. ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
  226. ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
  227. ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
  228. ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
  229. ads/feature_engineering/feature_type/adsstring/string.py +258 -0
  230. ads/feature_engineering/feature_type/base.py +58 -0
  231. ads/feature_engineering/feature_type/boolean.py +183 -0
  232. ads/feature_engineering/feature_type/category.py +146 -0
  233. ads/feature_engineering/feature_type/constant.py +137 -0
  234. ads/feature_engineering/feature_type/continuous.py +151 -0
  235. ads/feature_engineering/feature_type/creditcard.py +314 -0
  236. ads/feature_engineering/feature_type/datetime.py +190 -0
  237. ads/feature_engineering/feature_type/discrete.py +134 -0
  238. ads/feature_engineering/feature_type/document.py +43 -0
  239. ads/feature_engineering/feature_type/gis.py +251 -0
  240. ads/feature_engineering/feature_type/handler/__init__.py +5 -0
  241. ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
  242. ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
  243. ads/feature_engineering/feature_type/handler/warnings.py +128 -0
  244. ads/feature_engineering/feature_type/integer.py +142 -0
  245. ads/feature_engineering/feature_type/ip_address.py +144 -0
  246. ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
  247. ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
  248. ads/feature_engineering/feature_type/lat_long.py +256 -0
  249. ads/feature_engineering/feature_type/object.py +43 -0
  250. ads/feature_engineering/feature_type/ordinal.py +132 -0
  251. ads/feature_engineering/feature_type/phone_number.py +135 -0
  252. ads/feature_engineering/feature_type/string.py +171 -0
  253. ads/feature_engineering/feature_type/text.py +93 -0
  254. ads/feature_engineering/feature_type/unknown.py +43 -0
  255. ads/feature_engineering/feature_type/zip_code.py +164 -0
  256. ads/feature_engineering/feature_type_manager.py +406 -0
  257. ads/feature_engineering/schema.py +795 -0
  258. ads/feature_engineering/utils.py +245 -0
  259. ads/feature_store/.readthedocs.yaml +19 -0
  260. ads/feature_store/README.md +65 -0
  261. ads/feature_store/__init__.py +9 -0
  262. ads/feature_store/common/__init__.py +0 -0
  263. ads/feature_store/common/enums.py +339 -0
  264. ads/feature_store/common/exceptions.py +18 -0
  265. ads/feature_store/common/spark_session_singleton.py +125 -0
  266. ads/feature_store/common/utils/__init__.py +0 -0
  267. ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
  268. ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
  269. ads/feature_store/common/utils/transformation_utils.py +82 -0
  270. ads/feature_store/common/utils/utility.py +403 -0
  271. ads/feature_store/data_validation/__init__.py +0 -0
  272. ads/feature_store/data_validation/great_expectation.py +129 -0
  273. ads/feature_store/dataset.py +1230 -0
  274. ads/feature_store/dataset_job.py +530 -0
  275. ads/feature_store/docs/Dockerfile +7 -0
  276. ads/feature_store/docs/Makefile +44 -0
  277. ads/feature_store/docs/conf.py +28 -0
  278. ads/feature_store/docs/requirements.txt +14 -0
  279. ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
  280. ads/feature_store/docs/source/cicd.rst +137 -0
  281. ads/feature_store/docs/source/conf.py +86 -0
  282. ads/feature_store/docs/source/data_versioning.rst +33 -0
  283. ads/feature_store/docs/source/dataset.rst +388 -0
  284. ads/feature_store/docs/source/dataset_job.rst +27 -0
  285. ads/feature_store/docs/source/demo.rst +70 -0
  286. ads/feature_store/docs/source/entity.rst +78 -0
  287. ads/feature_store/docs/source/feature_group.rst +624 -0
  288. ads/feature_store/docs/source/feature_group_job.rst +29 -0
  289. ads/feature_store/docs/source/feature_store.rst +122 -0
  290. ads/feature_store/docs/source/feature_store_class.rst +123 -0
  291. ads/feature_store/docs/source/feature_validation.rst +66 -0
  292. ads/feature_store/docs/source/figures/cicd.png +0 -0
  293. ads/feature_store/docs/source/figures/data_validation.png +0 -0
  294. ads/feature_store/docs/source/figures/data_versioning.png +0 -0
  295. ads/feature_store/docs/source/figures/dataset.gif +0 -0
  296. ads/feature_store/docs/source/figures/dataset.png +0 -0
  297. ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
  298. ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
  299. ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
  300. ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
  301. ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
  302. ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
  303. ads/feature_store/docs/source/figures/entity.png +0 -0
  304. ads/feature_store/docs/source/figures/feature_group.png +0 -0
  305. ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
  306. ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
  307. ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
  308. ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
  309. ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
  310. ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
  311. ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
  312. ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
  313. ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
  314. ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
  315. ads/feature_store/docs/source/figures/overview.png +0 -0
  316. ads/feature_store/docs/source/figures/resource_manager.png +0 -0
  317. ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
  318. ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
  319. ads/feature_store/docs/source/figures/stats_1.png +0 -0
  320. ads/feature_store/docs/source/figures/stats_2.png +0 -0
  321. ads/feature_store/docs/source/figures/stats_d.png +0 -0
  322. ads/feature_store/docs/source/figures/stats_fg.png +0 -0
  323. ads/feature_store/docs/source/figures/transformation.png +0 -0
  324. ads/feature_store/docs/source/figures/transformations.gif +0 -0
  325. ads/feature_store/docs/source/figures/validation.png +0 -0
  326. ads/feature_store/docs/source/figures/validation_fg.png +0 -0
  327. ads/feature_store/docs/source/figures/validation_results.png +0 -0
  328. ads/feature_store/docs/source/figures/validation_summary.png +0 -0
  329. ads/feature_store/docs/source/index.rst +81 -0
  330. ads/feature_store/docs/source/module.rst +8 -0
  331. ads/feature_store/docs/source/notebook.rst +94 -0
  332. ads/feature_store/docs/source/overview.rst +47 -0
  333. ads/feature_store/docs/source/quickstart.rst +176 -0
  334. ads/feature_store/docs/source/release_notes.rst +194 -0
  335. ads/feature_store/docs/source/setup_feature_store.rst +81 -0
  336. ads/feature_store/docs/source/statistics.rst +58 -0
  337. ads/feature_store/docs/source/transformation.rst +199 -0
  338. ads/feature_store/docs/source/ui.rst +65 -0
  339. ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
  340. ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
  341. ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
  342. ads/feature_store/entity.py +718 -0
  343. ads/feature_store/execution_strategy/__init__.py +0 -0
  344. ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
  345. ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
  346. ads/feature_store/execution_strategy/engine/__init__.py +0 -0
  347. ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
  348. ads/feature_store/execution_strategy/execution_strategy.py +113 -0
  349. ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
  350. ads/feature_store/execution_strategy/spark/__init__.py +0 -0
  351. ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
  352. ads/feature_store/feature.py +192 -0
  353. ads/feature_store/feature_group.py +1494 -0
  354. ads/feature_store/feature_group_expectation.py +346 -0
  355. ads/feature_store/feature_group_job.py +602 -0
  356. ads/feature_store/feature_lineage/__init__.py +0 -0
  357. ads/feature_store/feature_lineage/graphviz_service.py +180 -0
  358. ads/feature_store/feature_option_details.py +50 -0
  359. ads/feature_store/feature_statistics/__init__.py +0 -0
  360. ads/feature_store/feature_statistics/statistics_service.py +99 -0
  361. ads/feature_store/feature_store.py +699 -0
  362. ads/feature_store/feature_store_registrar.py +518 -0
  363. ads/feature_store/input_feature_detail.py +149 -0
  364. ads/feature_store/mixin/__init__.py +4 -0
  365. ads/feature_store/mixin/oci_feature_store.py +145 -0
  366. ads/feature_store/model_details.py +73 -0
  367. ads/feature_store/query/__init__.py +0 -0
  368. ads/feature_store/query/filter.py +266 -0
  369. ads/feature_store/query/generator/__init__.py +0 -0
  370. ads/feature_store/query/generator/query_generator.py +298 -0
  371. ads/feature_store/query/join.py +161 -0
  372. ads/feature_store/query/query.py +403 -0
  373. ads/feature_store/query/validator/__init__.py +0 -0
  374. ads/feature_store/query/validator/query_validator.py +57 -0
  375. ads/feature_store/response/__init__.py +0 -0
  376. ads/feature_store/response/response_builder.py +68 -0
  377. ads/feature_store/service/__init__.py +0 -0
  378. ads/feature_store/service/oci_dataset.py +139 -0
  379. ads/feature_store/service/oci_dataset_job.py +199 -0
  380. ads/feature_store/service/oci_entity.py +125 -0
  381. ads/feature_store/service/oci_feature_group.py +164 -0
  382. ads/feature_store/service/oci_feature_group_job.py +214 -0
  383. ads/feature_store/service/oci_feature_store.py +182 -0
  384. ads/feature_store/service/oci_lineage.py +87 -0
  385. ads/feature_store/service/oci_transformation.py +104 -0
  386. ads/feature_store/statistics/__init__.py +0 -0
  387. ads/feature_store/statistics/abs_feature_value.py +49 -0
  388. ads/feature_store/statistics/charts/__init__.py +0 -0
  389. ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
  390. ads/feature_store/statistics/charts/box_plot.py +148 -0
  391. ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
  392. ads/feature_store/statistics/charts/probability_distribution.py +68 -0
  393. ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
  394. ads/feature_store/statistics/feature_stat.py +126 -0
  395. ads/feature_store/statistics/generic_feature_value.py +33 -0
  396. ads/feature_store/statistics/statistics.py +41 -0
  397. ads/feature_store/statistics_config.py +101 -0
  398. ads/feature_store/templates/feature_store_template.yaml +45 -0
  399. ads/feature_store/transformation.py +499 -0
  400. ads/feature_store/validation_output.py +57 -0
  401. ads/hpo/__init__.py +9 -0
  402. ads/hpo/_imports.py +91 -0
  403. ads/hpo/ads_search_space.py +439 -0
  404. ads/hpo/distributions.py +325 -0
  405. ads/hpo/objective.py +280 -0
  406. ads/hpo/search_cv.py +1657 -0
  407. ads/hpo/stopping_criterion.py +75 -0
  408. ads/hpo/tuner_artifact.py +413 -0
  409. ads/hpo/utils.py +91 -0
  410. ads/hpo/validation.py +140 -0
  411. ads/hpo/visualization/__init__.py +5 -0
  412. ads/hpo/visualization/_contour.py +23 -0
  413. ads/hpo/visualization/_edf.py +20 -0
  414. ads/hpo/visualization/_intermediate_values.py +21 -0
  415. ads/hpo/visualization/_optimization_history.py +25 -0
  416. ads/hpo/visualization/_parallel_coordinate.py +169 -0
  417. ads/hpo/visualization/_param_importances.py +26 -0
  418. ads/jobs/__init__.py +53 -0
  419. ads/jobs/ads_job.py +663 -0
  420. ads/jobs/builders/__init__.py +5 -0
  421. ads/jobs/builders/base.py +156 -0
  422. ads/jobs/builders/infrastructure/__init__.py +6 -0
  423. ads/jobs/builders/infrastructure/base.py +165 -0
  424. ads/jobs/builders/infrastructure/dataflow.py +1252 -0
  425. ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
  426. ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
  427. ads/jobs/builders/infrastructure/utils.py +65 -0
  428. ads/jobs/builders/runtimes/__init__.py +5 -0
  429. ads/jobs/builders/runtimes/artifact.py +338 -0
  430. ads/jobs/builders/runtimes/base.py +325 -0
  431. ads/jobs/builders/runtimes/container_runtime.py +242 -0
  432. ads/jobs/builders/runtimes/python_runtime.py +1016 -0
  433. ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
  434. ads/jobs/cli.py +104 -0
  435. ads/jobs/env_var_parser.py +131 -0
  436. ads/jobs/extension.py +160 -0
  437. ads/jobs/schema/__init__.py +5 -0
  438. ads/jobs/schema/infrastructure_schema.json +116 -0
  439. ads/jobs/schema/job_schema.json +42 -0
  440. ads/jobs/schema/runtime_schema.json +183 -0
  441. ads/jobs/schema/validator.py +141 -0
  442. ads/jobs/serializer.py +296 -0
  443. ads/jobs/templates/__init__.py +5 -0
  444. ads/jobs/templates/container.py +6 -0
  445. ads/jobs/templates/driver_notebook.py +177 -0
  446. ads/jobs/templates/driver_oci.py +500 -0
  447. ads/jobs/templates/driver_python.py +48 -0
  448. ads/jobs/templates/driver_pytorch.py +852 -0
  449. ads/jobs/templates/driver_utils.py +615 -0
  450. ads/jobs/templates/hostname_from_env.c +55 -0
  451. ads/jobs/templates/oci_metrics.py +181 -0
  452. ads/jobs/utils.py +104 -0
  453. ads/llm/__init__.py +28 -0
  454. ads/llm/autogen/__init__.py +2 -0
  455. ads/llm/autogen/constants.py +15 -0
  456. ads/llm/autogen/reports/__init__.py +2 -0
  457. ads/llm/autogen/reports/base.py +67 -0
  458. ads/llm/autogen/reports/data.py +103 -0
  459. ads/llm/autogen/reports/session.py +526 -0
  460. ads/llm/autogen/reports/templates/chat_box.html +13 -0
  461. ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
  462. ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
  463. ads/llm/autogen/reports/utils.py +56 -0
  464. ads/llm/autogen/v02/__init__.py +4 -0
  465. ads/llm/autogen/v02/client.py +295 -0
  466. ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
  467. ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
  468. ads/llm/autogen/v02/loggers/__init__.py +6 -0
  469. ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
  470. ads/llm/autogen/v02/loggers/session_logger.py +580 -0
  471. ads/llm/autogen/v02/loggers/utils.py +86 -0
  472. ads/llm/autogen/v02/runtime_logging.py +163 -0
  473. ads/llm/chain.py +268 -0
  474. ads/llm/chat_template.py +31 -0
  475. ads/llm/deploy.py +63 -0
  476. ads/llm/guardrails/__init__.py +5 -0
  477. ads/llm/guardrails/base.py +442 -0
  478. ads/llm/guardrails/huggingface.py +44 -0
  479. ads/llm/langchain/__init__.py +5 -0
  480. ads/llm/langchain/plugins/__init__.py +5 -0
  481. ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
  482. ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
  483. ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
  484. ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
  485. ads/llm/langchain/plugins/llms/__init__.py +5 -0
  486. ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
  487. ads/llm/requirements.txt +3 -0
  488. ads/llm/serialize.py +219 -0
  489. ads/llm/serializers/__init__.py +0 -0
  490. ads/llm/serializers/retrieval_qa.py +153 -0
  491. ads/llm/serializers/runnable_parallel.py +27 -0
  492. ads/llm/templates/score_chain.jinja2 +155 -0
  493. ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
  494. ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
  495. ads/model/__init__.py +52 -0
  496. ads/model/artifact.py +573 -0
  497. ads/model/artifact_downloader.py +254 -0
  498. ads/model/artifact_uploader.py +267 -0
  499. ads/model/base_properties.py +238 -0
  500. ads/model/common/.model-ignore +66 -0
  501. ads/model/common/__init__.py +5 -0
  502. ads/model/common/utils.py +142 -0
  503. ads/model/datascience_model.py +2635 -0
  504. ads/model/deployment/__init__.py +20 -0
  505. ads/model/deployment/common/__init__.py +5 -0
  506. ads/model/deployment/common/utils.py +308 -0
  507. ads/model/deployment/model_deployer.py +466 -0
  508. ads/model/deployment/model_deployment.py +1846 -0
  509. ads/model/deployment/model_deployment_infrastructure.py +671 -0
  510. ads/model/deployment/model_deployment_properties.py +493 -0
  511. ads/model/deployment/model_deployment_runtime.py +838 -0
  512. ads/model/extractor/__init__.py +5 -0
  513. ads/model/extractor/automl_extractor.py +74 -0
  514. ads/model/extractor/embedding_onnx_extractor.py +80 -0
  515. ads/model/extractor/huggingface_extractor.py +88 -0
  516. ads/model/extractor/keras_extractor.py +84 -0
  517. ads/model/extractor/lightgbm_extractor.py +93 -0
  518. ads/model/extractor/model_info_extractor.py +114 -0
  519. ads/model/extractor/model_info_extractor_factory.py +105 -0
  520. ads/model/extractor/pytorch_extractor.py +87 -0
  521. ads/model/extractor/sklearn_extractor.py +112 -0
  522. ads/model/extractor/spark_extractor.py +89 -0
  523. ads/model/extractor/tensorflow_extractor.py +85 -0
  524. ads/model/extractor/xgboost_extractor.py +94 -0
  525. ads/model/framework/__init__.py +5 -0
  526. ads/model/framework/automl_model.py +178 -0
  527. ads/model/framework/embedding_onnx_model.py +438 -0
  528. ads/model/framework/huggingface_model.py +399 -0
  529. ads/model/framework/lightgbm_model.py +266 -0
  530. ads/model/framework/pytorch_model.py +266 -0
  531. ads/model/framework/sklearn_model.py +250 -0
  532. ads/model/framework/spark_model.py +326 -0
  533. ads/model/framework/tensorflow_model.py +254 -0
  534. ads/model/framework/xgboost_model.py +258 -0
  535. ads/model/generic_model.py +3518 -0
  536. ads/model/model_artifact_boilerplate/README.md +381 -0
  537. ads/model/model_artifact_boilerplate/__init__.py +5 -0
  538. ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
  539. ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
  540. ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
  541. ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
  542. ads/model/model_artifact_boilerplate/score.py +61 -0
  543. ads/model/model_file_description_schema.json +68 -0
  544. ads/model/model_introspect.py +331 -0
  545. ads/model/model_metadata.py +1810 -0
  546. ads/model/model_metadata_mixin.py +460 -0
  547. ads/model/model_properties.py +63 -0
  548. ads/model/model_version_set.py +739 -0
  549. ads/model/runtime/__init__.py +5 -0
  550. ads/model/runtime/env_info.py +306 -0
  551. ads/model/runtime/model_deployment_details.py +37 -0
  552. ads/model/runtime/model_provenance_details.py +58 -0
  553. ads/model/runtime/runtime_info.py +81 -0
  554. ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
  555. ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
  556. ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
  557. ads/model/runtime/utils.py +201 -0
  558. ads/model/serde/__init__.py +5 -0
  559. ads/model/serde/common.py +40 -0
  560. ads/model/serde/model_input.py +547 -0
  561. ads/model/serde/model_serializer.py +1184 -0
  562. ads/model/service/__init__.py +5 -0
  563. ads/model/service/oci_datascience_model.py +1076 -0
  564. ads/model/service/oci_datascience_model_deployment.py +500 -0
  565. ads/model/service/oci_datascience_model_version_set.py +176 -0
  566. ads/model/transformer/__init__.py +5 -0
  567. ads/model/transformer/onnx_transformer.py +324 -0
  568. ads/mysqldb/__init__.py +5 -0
  569. ads/mysqldb/mysql_db.py +227 -0
  570. ads/opctl/__init__.py +18 -0
  571. ads/opctl/anomaly_detection.py +11 -0
  572. ads/opctl/backend/__init__.py +5 -0
  573. ads/opctl/backend/ads_dataflow.py +353 -0
  574. ads/opctl/backend/ads_ml_job.py +710 -0
  575. ads/opctl/backend/ads_ml_pipeline.py +164 -0
  576. ads/opctl/backend/ads_model_deployment.py +209 -0
  577. ads/opctl/backend/base.py +146 -0
  578. ads/opctl/backend/local.py +1053 -0
  579. ads/opctl/backend/marketplace/__init__.py +9 -0
  580. ads/opctl/backend/marketplace/helm_helper.py +173 -0
  581. ads/opctl/backend/marketplace/local_marketplace.py +271 -0
  582. ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
  583. ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
  584. ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
  585. ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
  586. ads/opctl/backend/marketplace/models/__init__.py +5 -0
  587. ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
  588. ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
  589. ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
  590. ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
  591. ads/opctl/cli.py +707 -0
  592. ads/opctl/cmds.py +869 -0
  593. ads/opctl/conda/__init__.py +5 -0
  594. ads/opctl/conda/cli.py +193 -0
  595. ads/opctl/conda/cmds.py +749 -0
  596. ads/opctl/conda/config.yaml +34 -0
  597. ads/opctl/conda/manifest_template.yaml +13 -0
  598. ads/opctl/conda/multipart_uploader.py +188 -0
  599. ads/opctl/conda/pack.py +89 -0
  600. ads/opctl/config/__init__.py +5 -0
  601. ads/opctl/config/base.py +57 -0
  602. ads/opctl/config/diagnostics/__init__.py +5 -0
  603. ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
  604. ads/opctl/config/merger.py +255 -0
  605. ads/opctl/config/resolver.py +297 -0
  606. ads/opctl/config/utils.py +79 -0
  607. ads/opctl/config/validator.py +17 -0
  608. ads/opctl/config/versioner.py +68 -0
  609. ads/opctl/config/yaml_parsers/__init__.py +7 -0
  610. ads/opctl/config/yaml_parsers/base.py +58 -0
  611. ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
  612. ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
  613. ads/opctl/constants.py +66 -0
  614. ads/opctl/decorator/__init__.py +5 -0
  615. ads/opctl/decorator/common.py +129 -0
  616. ads/opctl/diagnostics/__init__.py +5 -0
  617. ads/opctl/diagnostics/__main__.py +25 -0
  618. ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
  619. ads/opctl/diagnostics/check_requirements.py +144 -0
  620. ads/opctl/diagnostics/requirement_exception.py +9 -0
  621. ads/opctl/distributed/README.md +109 -0
  622. ads/opctl/distributed/__init__.py +5 -0
  623. ads/opctl/distributed/certificates.py +32 -0
  624. ads/opctl/distributed/cli.py +207 -0
  625. ads/opctl/distributed/cmds.py +731 -0
  626. ads/opctl/distributed/common/__init__.py +5 -0
  627. ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
  628. ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
  629. ads/opctl/distributed/common/cluster_config_helper.py +103 -0
  630. ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
  631. ads/opctl/distributed/common/cluster_runner.py +54 -0
  632. ads/opctl/distributed/common/framework_factory.py +29 -0
  633. ads/opctl/docker/Dockerfile.job +103 -0
  634. ads/opctl/docker/Dockerfile.job.arm +107 -0
  635. ads/opctl/docker/Dockerfile.job.gpu +175 -0
  636. ads/opctl/docker/base-env.yaml +13 -0
  637. ads/opctl/docker/cuda.repo +6 -0
  638. ads/opctl/docker/operator/.dockerignore +0 -0
  639. ads/opctl/docker/operator/Dockerfile +41 -0
  640. ads/opctl/docker/operator/Dockerfile.gpu +85 -0
  641. ads/opctl/docker/operator/cuda.repo +6 -0
  642. ads/opctl/docker/operator/environment.yaml +8 -0
  643. ads/opctl/forecast.py +11 -0
  644. ads/opctl/index.yaml +3 -0
  645. ads/opctl/model/__init__.py +5 -0
  646. ads/opctl/model/cli.py +65 -0
  647. ads/opctl/model/cmds.py +73 -0
  648. ads/opctl/operator/README.md +4 -0
  649. ads/opctl/operator/__init__.py +31 -0
  650. ads/opctl/operator/cli.py +344 -0
  651. ads/opctl/operator/cmd.py +596 -0
  652. ads/opctl/operator/common/__init__.py +5 -0
  653. ads/opctl/operator/common/backend_factory.py +460 -0
  654. ads/opctl/operator/common/const.py +27 -0
  655. ads/opctl/operator/common/data/synthetic.csv +16001 -0
  656. ads/opctl/operator/common/dictionary_merger.py +148 -0
  657. ads/opctl/operator/common/errors.py +42 -0
  658. ads/opctl/operator/common/operator_config.py +99 -0
  659. ads/opctl/operator/common/operator_loader.py +811 -0
  660. ads/opctl/operator/common/operator_schema.yaml +130 -0
  661. ads/opctl/operator/common/operator_yaml_generator.py +152 -0
  662. ads/opctl/operator/common/utils.py +208 -0
  663. ads/opctl/operator/lowcode/__init__.py +5 -0
  664. ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
  665. ads/opctl/operator/lowcode/anomaly/README.md +207 -0
  666. ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
  667. ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
  668. ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
  669. ads/opctl/operator/lowcode/anomaly/const.py +167 -0
  670. ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
  671. ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
  672. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
  673. ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
  674. ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
  675. ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
  676. ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
  677. ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
  678. ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
  679. ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
  680. ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
  681. ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
  682. ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
  683. ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
  684. ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
  685. ads/opctl/operator/lowcode/common/__init__.py +5 -0
  686. ads/opctl/operator/lowcode/common/const.py +10 -0
  687. ads/opctl/operator/lowcode/common/data.py +116 -0
  688. ads/opctl/operator/lowcode/common/errors.py +47 -0
  689. ads/opctl/operator/lowcode/common/transformations.py +296 -0
  690. ads/opctl/operator/lowcode/common/utils.py +384 -0
  691. ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
  692. ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
  693. ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
  694. ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
  695. ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
  696. ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
  697. ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
  698. ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
  699. ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
  700. ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
  701. ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
  702. ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
  703. ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
  704. ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
  705. ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
  706. ads/opctl/operator/lowcode/forecast/README.md +209 -0
  707. ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
  708. ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
  709. ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
  710. ads/opctl/operator/lowcode/forecast/const.py +92 -0
  711. ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
  712. ads/opctl/operator/lowcode/forecast/errors.py +26 -0
  713. ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
  714. ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
  715. ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
  716. ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
  717. ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
  718. ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
  719. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
  720. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
  721. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
  722. ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
  723. ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
  724. ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
  725. ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
  726. ads/opctl/operator/lowcode/forecast/utils.py +397 -0
  727. ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
  728. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
  729. ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
  730. ads/opctl/operator/lowcode/pii/MLoperator +17 -0
  731. ads/opctl/operator/lowcode/pii/README.md +208 -0
  732. ads/opctl/operator/lowcode/pii/__init__.py +5 -0
  733. ads/opctl/operator/lowcode/pii/__main__.py +78 -0
  734. ads/opctl/operator/lowcode/pii/cmd.py +39 -0
  735. ads/opctl/operator/lowcode/pii/constant.py +84 -0
  736. ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
  737. ads/opctl/operator/lowcode/pii/errors.py +27 -0
  738. ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
  739. ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
  740. ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
  741. ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
  742. ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
  743. ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
  744. ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
  745. ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
  746. ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
  747. ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
  748. ads/opctl/operator/lowcode/pii/model/report.py +487 -0
  749. ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
  750. ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
  751. ads/opctl/operator/lowcode/pii/utils.py +43 -0
  752. ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
  753. ads/opctl/operator/lowcode/recommender/README.md +206 -0
  754. ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
  755. ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
  756. ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
  757. ads/opctl/operator/lowcode/recommender/constant.py +30 -0
  758. ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
  759. ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
  760. ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
  761. ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
  762. ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
  763. ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
  764. ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
  765. ads/opctl/operator/lowcode/recommender/utils.py +13 -0
  766. ads/opctl/operator/runtime/__init__.py +5 -0
  767. ads/opctl/operator/runtime/const.py +17 -0
  768. ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
  769. ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
  770. ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
  771. ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
  772. ads/opctl/operator/runtime/runtime.py +115 -0
  773. ads/opctl/schema.yaml.yml +36 -0
  774. ads/opctl/script.py +40 -0
  775. ads/opctl/spark/__init__.py +5 -0
  776. ads/opctl/spark/cli.py +43 -0
  777. ads/opctl/spark/cmds.py +147 -0
  778. ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
  779. ads/opctl/utils.py +344 -0
  780. ads/oracledb/__init__.py +5 -0
  781. ads/oracledb/oracle_db.py +346 -0
  782. ads/pipeline/__init__.py +39 -0
  783. ads/pipeline/ads_pipeline.py +2279 -0
  784. ads/pipeline/ads_pipeline_run.py +772 -0
  785. ads/pipeline/ads_pipeline_step.py +605 -0
  786. ads/pipeline/builders/__init__.py +5 -0
  787. ads/pipeline/builders/infrastructure/__init__.py +5 -0
  788. ads/pipeline/builders/infrastructure/custom_script.py +32 -0
  789. ads/pipeline/cli.py +119 -0
  790. ads/pipeline/extension.py +291 -0
  791. ads/pipeline/schema/__init__.py +5 -0
  792. ads/pipeline/schema/cs_step_schema.json +35 -0
  793. ads/pipeline/schema/ml_step_schema.json +31 -0
  794. ads/pipeline/schema/pipeline_schema.json +71 -0
  795. ads/pipeline/visualizer/__init__.py +5 -0
  796. ads/pipeline/visualizer/base.py +570 -0
  797. ads/pipeline/visualizer/graph_renderer.py +272 -0
  798. ads/pipeline/visualizer/text_renderer.py +84 -0
  799. ads/secrets/__init__.py +11 -0
  800. ads/secrets/adb.py +386 -0
  801. ads/secrets/auth_token.py +86 -0
  802. ads/secrets/big_data_service.py +365 -0
  803. ads/secrets/mysqldb.py +149 -0
  804. ads/secrets/oracledb.py +160 -0
  805. ads/secrets/secrets.py +407 -0
  806. ads/telemetry/__init__.py +7 -0
  807. ads/telemetry/base.py +69 -0
  808. ads/telemetry/client.py +122 -0
  809. ads/telemetry/telemetry.py +257 -0
  810. ads/templates/dataflow_pyspark.jinja2 +13 -0
  811. ads/templates/dataflow_sparksql.jinja2 +22 -0
  812. ads/templates/func.jinja2 +20 -0
  813. ads/templates/schemas/openapi.json +1740 -0
  814. ads/templates/score-pkl.jinja2 +173 -0
  815. ads/templates/score.jinja2 +322 -0
  816. ads/templates/score_embedding_onnx.jinja2 +202 -0
  817. ads/templates/score_generic.jinja2 +165 -0
  818. ads/templates/score_huggingface_pipeline.jinja2 +217 -0
  819. ads/templates/score_lightgbm.jinja2 +185 -0
  820. ads/templates/score_onnx.jinja2 +407 -0
  821. ads/templates/score_onnx_new.jinja2 +473 -0
  822. ads/templates/score_oracle_automl.jinja2 +185 -0
  823. ads/templates/score_pyspark.jinja2 +154 -0
  824. ads/templates/score_pytorch.jinja2 +219 -0
  825. ads/templates/score_scikit-learn.jinja2 +184 -0
  826. ads/templates/score_tensorflow.jinja2 +184 -0
  827. ads/templates/score_xgboost.jinja2 +178 -0
  828. ads/text_dataset/__init__.py +5 -0
  829. ads/text_dataset/backends.py +211 -0
  830. ads/text_dataset/dataset.py +445 -0
  831. ads/text_dataset/extractor.py +207 -0
  832. ads/text_dataset/options.py +53 -0
  833. ads/text_dataset/udfs.py +22 -0
  834. ads/text_dataset/utils.py +49 -0
  835. ads/type_discovery/__init__.py +9 -0
  836. ads/type_discovery/abstract_detector.py +21 -0
  837. ads/type_discovery/constant_detector.py +41 -0
  838. ads/type_discovery/continuous_detector.py +54 -0
  839. ads/type_discovery/credit_card_detector.py +99 -0
  840. ads/type_discovery/datetime_detector.py +92 -0
  841. ads/type_discovery/discrete_detector.py +118 -0
  842. ads/type_discovery/document_detector.py +146 -0
  843. ads/type_discovery/ip_detector.py +68 -0
  844. ads/type_discovery/latlon_detector.py +90 -0
  845. ads/type_discovery/phone_number_detector.py +63 -0
  846. ads/type_discovery/type_discovery_driver.py +87 -0
  847. ads/type_discovery/typed_feature.py +594 -0
  848. ads/type_discovery/unknown_detector.py +41 -0
  849. ads/type_discovery/zipcode_detector.py +48 -0
  850. ads/vault/__init__.py +7 -0
  851. ads/vault/vault.py +237 -0
  852. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/METADATA +150 -149
  853. oracle_ads-2.13.10rc0.dist-info/RECORD +858 -0
  854. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/WHEEL +1 -2
  855. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/entry_points.txt +2 -1
  856. oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
  857. oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
  858. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10rc0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,852 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2023, 2024 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+ """This module requires oracle-ads>=2.6.8
7
+ """
8
+ import getpass
9
+ import ipaddress
10
+ import logging
11
+ import multiprocessing
12
+ import os
13
+ import time
14
+ import shlex
15
+ import socket
16
+ import sys
17
+ import traceback
18
+
19
+ import oci
20
+ import psutil
21
+ import torch
22
+ from ads import set_auth
23
+ from ads.jobs import DataScienceJobRun
24
+ from ads.jobs.builders.infrastructure.dsc_job_runtime import (
25
+ PythonRuntimeHandler,
26
+ )
27
+ from ads.opctl.distributed.common import cluster_config_helper
28
+
29
+ try:
30
+ # This is used by ADS and testing
31
+ from . import driver_utils
32
+ from .driver_oci import GitSSHKey, GitManager
33
+ from .oci_metrics import collect_metrics, METRIC_NAMESPACE
34
+ except ImportError:
35
+ # This is used when the script is in a job run.
36
+ import driver_utils
37
+ from driver_oci import GitSSHKey, GitManager
38
+ from oci_metrics import collect_metrics, METRIC_NAMESPACE
39
+
40
+ logger = logging.getLogger(__name__)
41
+ logger = driver_utils.set_log_level(logger)
42
+
43
+
44
+ # Envs provisioned by the service
45
+ CONST_ENV_HOST_JOB_RUN_OCID = "MAIN_JOB_RUN_OCID"
46
+ CONST_ENV_JOB_RUN_OCID = "JOB_RUN_OCID"
47
+ # Envs set by the ADS API
48
+ OCI__WORKER_COUNT = "OCI__WORKER_COUNT"
49
+ CONST_ENV_NODE_RANK = "NODE_RANK"
50
+ CONST_ENV_NODE_COUNT = "NODE_COUNT"
51
+ CONST_ENV_LAUNCH_CMD = "OCI__LAUNCH_CMD"
52
+ CONST_ENV_DEEPSPEED = "OCI__DEEPSPEED"
53
+ # Envs set by this module
54
+ CONST_ENV_WORLD_SIZE = "WORLD_SIZE"
55
+ CONST_ENV_LD_PRELOAD = "LD_PRELOAD"
56
+ # Envs for debugging only
57
+ # OCI_ODSC_SERVICE_ENDPOINT is used for all processes in the job run
58
+ CONST_ENV_ODSC_SERVICE_ENDPOINT = "OCI_ODSC_SERVICE_ENDPOINT"
59
+ # OCI_DS_SERVICE_ENDPOINT is used only by the training process
60
+ CONST_ENV_DS_SERVICE_ENDPOINT = "OCI_DS_SERVICE_ENDPOINT"
61
+
62
+ # Constants used in logs
63
+ LOG_PREFIX_HOST_IP = "Distributed Training HOST IP: "
64
+ LOG_PREFIX_NODE_IP = "Node IP: "
65
+ LOG_PREFIX_PUBLIC_KEY = "HOST PUBLIC KEY: "
66
+ # Other constants used within this script
67
+ # Other constants used within this script
68
+ USER_HOME = os.environ.get("HOME", f"/home/{getpass.getuser()}")
69
+ SSH_DIR = os.environ.get("OCI__SSH_DIR", os.path.join(USER_HOME, ".ssh"))
70
+ DEFAULT_LAUNCHER = "torchrun"
71
+
72
+ # Set authentication method to resource principal
73
+ # This script is expected to be running inside the job run
74
+ if "OCI_RESOURCE_PRINCIPAL_VERSION" in os.environ:
75
+ set_auth("resource_principal")
76
+
77
+
78
+ class LazyEvaluate:
79
+ """This is a class to delay the function call until
80
+ its return value is needed for logging purpose.
81
+
82
+ Example::
83
+ logger.debug("The value is %s", LazyEvaluate(the_function, *args, **kwargs))
84
+
85
+ Python logging will only call the __str__() method when the value is needed.
86
+
87
+ In the above example, if the log level is INFO or above,
88
+ the_function() will not be called/evaluated.
89
+ If the log level is DEBUG, the_function will be called,
90
+ and if there is an error, the error will be logged.
91
+ The program will continue to run even if the error happens during logging.
92
+
93
+ """
94
+
95
+ def __init__(self, func, *args, **kwargs) -> None:
96
+ self.func = func
97
+ self.args = args
98
+ self.kwargs = kwargs
99
+
100
+ def eval(self):
101
+ """Evaluates the function call."""
102
+ return self.func(*self.args, **self.kwargs)
103
+
104
+ def __str__(self) -> str:
105
+ """Evaluate the function call and convert the return value as a string."""
106
+ try:
107
+ val = str(self.eval())
108
+ except Exception as ex:
109
+ logger.debug(traceback.format_exc())
110
+ val = f"ERROR: {str(ex)}"
111
+ return val
112
+
113
+
114
+ class Runner(driver_utils.JobRunner):
115
+ """Base runner class for PyTorch training job"""
116
+
117
+ # LAUNCHER stores the main command for launching the training job.
118
+ # e.g. torchrun, deepspeed, accelerate, etc.
119
+ LAUNCHER = ""
120
+
121
+ def __init__(self, code_dir: str = driver_utils.DEFAULT_CODE_DIR) -> None:
122
+ super().__init__(code_dir)
123
+ self.launch_cmd = os.environ.get(CONST_ENV_LAUNCH_CMD, "")
124
+
125
+ self.ds_client = driver_utils.OCIHelper.init_oci_client(
126
+ oci.data_science.DataScienceClient
127
+ )
128
+ self.ip = self.find_self_ip()
129
+ # IP address of other nodes as a list
130
+ self.node_ip_list = []
131
+ # DataScienceJobRun objects of other nodes as a list
132
+ self.node_runs = []
133
+
134
+ if CONST_ENV_HOST_JOB_RUN_OCID in os.environ:
135
+ # Print the node IP address to logs so that it can be obtained by the host.
136
+ print(f"{LOG_PREFIX_NODE_IP}{self.ip}")
137
+ self.host_ocid = os.environ[CONST_ENV_HOST_JOB_RUN_OCID]
138
+ logger.debug("Host job run OCID: %s", self.host_ocid)
139
+ self.host_ip = None
140
+ self.is_host = False
141
+ else:
142
+ # Print the host IP address to logs so that it can be obtained by the nodes.
143
+ print(f"{LOG_PREFIX_HOST_IP}{self.ip}")
144
+ self.host_ocid = os.environ.get(CONST_ENV_JOB_RUN_OCID)
145
+ self.host_ip = self.ip
146
+ self.is_host = True
147
+
148
+ self.host_job_run = DataScienceJobRun.from_ocid(self.host_ocid)
149
+ self.entrypoint_env = PythonRuntimeHandler.CONST_CODE_ENTRYPOINT
150
+ # The total number of nodes is OCI__WORKER_COUNT + 1
151
+ if CONST_ENV_NODE_COUNT in os.environ:
152
+ self.node_count = int(os.environ[CONST_ENV_NODE_COUNT])
153
+ else:
154
+ self.node_count = int(os.environ.get(OCI__WORKER_COUNT, 0)) + 1
155
+ logger.debug("Node count: %s", self.node_count)
156
+ self.gpu_count = torch.cuda.device_count()
157
+ logger.debug("GPU count on this node: %s", self.gpu_count)
158
+
159
+ logger.debug("Runner initialized.")
160
+
161
+ def launch_cmd_contains(self, arg) -> bool:
162
+ """Checks if the cmd for launching the training contains specific keyword argument."""
163
+ return f"--{arg}" in self.launch_cmd
164
+
165
+ def wait_for_host_ip_address(self, timeout=15 * 60) -> str:
166
+ """Waits until the IP address of the host is obtained.
167
+
168
+ Parameters
169
+ ----------
170
+ timeout : int, optional
171
+ Timeout in seconds, by default 15 minutes.
172
+
173
+ Returns
174
+ -------
175
+ str
176
+ IP address
177
+ """
178
+ if not self.host_ip:
179
+ logger.info("Waiting for host's IP address...")
180
+ self.host_ip = self.wait_for_ip_address(self.host_job_run, timeout)
181
+ return self
182
+
183
+ def wait_for_ip_address(self, job_run, timeout=15 * 60) -> str:
184
+ """Waits until the IP address of a particular job run is obtained.
185
+
186
+ Parameters
187
+ ----------
188
+ job_run : DataScienceJobRun
189
+ A DataScienceJobRun object
190
+ timeout : int, optional
191
+ Timeout in seconds, by default 15 minutes.
192
+
193
+ Returns
194
+ -------
195
+ str
196
+ IP address
197
+ """
198
+ logger.info("Waiting for IP address of job run %s", job_run.id)
199
+ if job_run == self.host_job_run:
200
+ log_prefix = LOG_PREFIX_HOST_IP
201
+ else:
202
+ log_prefix = LOG_PREFIX_NODE_IP
203
+ ip_address = self.wait_for_log(job_run, log_prefix, timeout).strip()
204
+ logger.info("IP of %s: %s", job_run.id[-6:], ip_address)
205
+ return ip_address
206
+
207
+ def wait_for_log(self, job_run, log_prefix, timeout=15 * 60) -> str:
208
+ """Waits until a log message with specific prefix is found in the logs of a job run.
209
+
210
+ Parameters
211
+ ----------
212
+ job_run : DataScienceJobRun
213
+ A DataScienceJobRun object
214
+ log_prefix : str
215
+ The prefix of the log message to look for.
216
+ timeout : int, optional
217
+ Timeout in seconds, by default 15 minutes.
218
+
219
+ Returns
220
+ -------
221
+ str
222
+ The log message with out the prefix.
223
+
224
+ Raises
225
+ ------
226
+ TimeoutError
227
+ Failed to obtain the log message within the specific timeout.
228
+ """
229
+ logger.debug(
230
+ "Waiting for logs with prefix '%s' from %s.", log_prefix, job_run.id
231
+ )
232
+ second_started = time.time()
233
+ log = None
234
+ while not log:
235
+ log = self.check_job_run_logs(job_run=job_run, log_prefix=log_prefix)
236
+ if log:
237
+ break
238
+ if time.time() - second_started > timeout:
239
+ raise TimeoutError(
240
+ f"Failed to obtain log with prefix {log_prefix} for {job_run.id} in {timeout} seconds."
241
+ )
242
+ time.sleep(60)
243
+ return log
244
+
245
+ @staticmethod
246
+ def check_job_run_logs(job_run, log_prefix: str) -> str:
247
+ """Checks the logs of a specific job run and find the log message with specific prefix.
248
+
249
+ Parameters
250
+ ----------
251
+ job_run : DataScienceJobRun
252
+ The Job run object from which the logs will be obtained.
253
+ log_prefix : str
254
+ The prefix to look for.
255
+
256
+ Returns
257
+ -------
258
+ str
259
+ The log message without the prefix.
260
+ """
261
+ logger.debug("Checking logs for job run %s", job_run.id)
262
+ logs = job_run.logs()
263
+ for log in logs:
264
+ if log["message"].startswith(log_prefix):
265
+ return log["message"][len(log_prefix) :]
266
+ return None
267
+
268
+ def find_self_ip(self):
269
+ """
270
+ Identify IP address by finding which of the host IP intersects with the CIDR block of the subnet
271
+ associated with the JOB_OCID
272
+ """
273
+ hostname = socket.gethostname()
274
+ logger.debug("Hostname: %s", hostname)
275
+ logger.debug(
276
+ "Get Host by Addr: %s", LazyEvaluate(socket.gethostbyaddr, hostname)
277
+ )
278
+ logger.debug("FQDN: %s", LazyEvaluate(socket.getfqdn, hostname))
279
+ if os.environ.get("JOB_OCID"):
280
+ subnet_id = self.ds_client.get_job(
281
+ os.environ["JOB_OCID"]
282
+ ).data.job_infrastructure_configuration_details.subnet_id
283
+ core_client = driver_utils.OCIHelper.init_oci_client(
284
+ oci.core.VirtualNetworkClient
285
+ )
286
+ cidr = core_client.get_subnet(subnet_id).data.cidr_block
287
+
288
+ for interface, snics in psutil.net_if_addrs().items():
289
+ ip = snics[0].address
290
+ if ipaddress.ip_address(ip) in ipaddress.ip_network(cidr):
291
+ logger.info("Node IP address: %s", ip)
292
+ # Specify the network interface for NCCL/GLOO
293
+ os.environ["GLOO_SOCKET_IFNAME"] = interface
294
+ os.environ["NCCL_SOCKET_IFNAME"] = interface
295
+ return ip
296
+ raise EnvironmentError("Unable to determine node IP address.")
297
+ else:
298
+ ip = socket.gethostbyname(hostname)
299
+ logger.info("Node IP address: %s", ip)
300
+ return ip
301
+
302
+ def fetch_code(self):
303
+ """Fetches source code from Git if repo uri is specified."""
304
+ if cluster_config_helper.OCI__RUNTIME_URI in os.environ:
305
+ self._fetch_git(code_dir=self.code_dir)
306
+ return self
307
+
308
+ def _fetch_git(self, code_dir):
309
+ """Fetches source code from Git repository."""
310
+ uri = os.environ.get(cluster_config_helper.OCI__RUNTIME_URI)
311
+ branch = os.environ.get(cluster_config_helper.OCI__RUNTIME_GIT_BRANCH)
312
+ commit = os.environ.get(cluster_config_helper.OCI__RUNTIME_GIT_COMMIT)
313
+ secret_ocid = os.environ.get(cluster_config_helper.OCI__RUNTIME_GIT_SECRET_ID)
314
+ # with GitSSHKey does nothing if secret_ocid is None or empty
315
+ with GitSSHKey(secret_ocid):
316
+ GitManager(uri, code_dir=code_dir).fetch_repo().checkout_code(
317
+ branch=branch, commit=commit
318
+ )
319
+
320
+ def get_cmd_with_entrypoint_and_args(self, prefix: str = "") -> str:
321
+ """Gets the command based on entrypoint and arguments.
322
+
323
+ Parameters
324
+ ----------
325
+ prefix : str, optional
326
+ Command prefix, by default ""
327
+ This can be used to set environment variables for the command.
328
+ e.g. ENV=1 command
329
+
330
+ Returns
331
+ -------
332
+ str
333
+ The command including the prefix, entrypoint and arguments.
334
+ """
335
+ cmd = os.environ[self.entrypoint_env]
336
+ if prefix:
337
+ cmd = prefix + " " + cmd
338
+ if sys.argv[1:]:
339
+ cmd += " " + " ".join(sys.argv[1:])
340
+ return cmd
341
+
342
+ def prepare_cmd(self, launch_args: list = None, prefix=""):
343
+ """Prepares the command for starting the training.
344
+
345
+ Parameters
346
+ ----------
347
+ launch_args : list
348
+ The command and arguments for starting the training as a list.
349
+ prefix : str, optional
350
+ The prefix to be added to the launch_args in the command, by default ""
351
+ This can be used to set environment variables for the command.
352
+ e.g. ENV=1 command
353
+
354
+ Returns
355
+ -------
356
+ str
357
+ The command for starting the training.
358
+ """
359
+ if not launch_args:
360
+ launch_args = []
361
+ # Append launch cmd args specified by the user.
362
+ if self.launch_cmd:
363
+ if self.LAUNCHER:
364
+ if not self.launch_cmd.startswith(self.LAUNCHER):
365
+ raise ValueError(f"Command not supported: '{self.launch_cmd}'. ")
366
+
367
+ launch_args.append(self.launch_cmd[len(self.LAUNCHER) + 1 :])
368
+ else:
369
+ launch_args.append(self.launch_cmd)
370
+ else:
371
+ launch_args.append(self.get_cmd_with_entrypoint_and_args())
372
+
373
+ if prefix:
374
+ launcher = f"{prefix} {self.LAUNCHER}"
375
+ else:
376
+ launcher = self.LAUNCHER
377
+
378
+ return f"{launcher} {' '.join(launch_args)}"
379
+
380
+ def time_cmd(self, cmd):
381
+ """Run the command and log the time used."""
382
+ # Show current working directory for debugging purpose
383
+ self.run_command("pwd", level=logging.DEBUG)
384
+ # Show all environment variables
385
+ self.run_command("printenv", level=logging.DEBUG)
386
+ training_start_time = time.time()
387
+ self.run_command(cmd, conda_prefix=self.conda_prefix, check=True)
388
+ logger.info("Time: %s seconds.", time.time() - training_start_time)
389
+
390
+ def run(self):
391
+ raise NotImplementedError()
392
+
393
+
394
+ class TorchRunner(Runner):
395
+ RDZV_PORT = 29400
396
+ LAUNCHER = "torchrun"
397
+
398
+ def __init__(self, code_dir: str = driver_utils.DEFAULT_CODE_DIR) -> None:
399
+ super().__init__(code_dir)
400
+ self.build_c_library()
401
+
402
+ def build_c_library(self):
403
+ C_SOURCE_CODE = "hostname_from_env.c"
404
+ source_path = os.path.join(
405
+ os.path.dirname(os.path.abspath(__file__)), C_SOURCE_CODE
406
+ )
407
+ if not os.path.exists(source_path):
408
+ logger.error("Source code %s not found.", source_path)
409
+ return
410
+
411
+ self.run_command(
412
+ "gcc -fPIC -shared -Wl,-soname,libhostname.so.1 -ldl "
413
+ f"-o {self.conda_prefix}/lib/libhostname.so.1 {source_path}",
414
+ conda_prefix=self.conda_prefix,
415
+ level=logging.DEBUG,
416
+ )
417
+ self.run_command(
418
+ f"ls {self.conda_prefix}/lib/libhostname*", level=logging.DEBUG
419
+ )
420
+
421
+ return self
422
+
423
+ def env_ld_preload(self) -> str:
424
+ """Generate environment variable config for LD_PRELOAD and OCI__HOSTNAME.
425
+ The return value can be used as the prefix of a bash command.
426
+ """
427
+ cmd_prefix = ""
428
+ # Use LD_PRELOAD only if LD_PRELOAD is not defined by the user.
429
+ # For pytorch>=2.0, we can use f"--local_addr={self.ip} " instead of LD_PRELOAD.
430
+ if CONST_ENV_LD_PRELOAD not in os.environ:
431
+ cmd_prefix = f"LD_PRELOAD={self.conda_prefix}/lib/libhostname.so.1 OCI__HOSTNAME={self.ip}"
432
+ return cmd_prefix
433
+
434
+ def get_rdzv_conf(self) -> str:
435
+ """Prepare additional rendezvous config for torch run.
436
+
437
+ The default read_timeout is 60 seconds.
438
+ The job run will fail if the node cannot reach the host within read_timeout.
439
+ """
440
+ rdzv_timeout = os.environ.get("OCI__RDZV_TIMEOUT", "600")
441
+ rdzv_conf = f"read_timeout={rdzv_timeout}"
442
+ return rdzv_conf
443
+
444
+ def run(self):
445
+ if self.gpu_count > 0:
446
+ nproc_per_node = self.gpu_count
447
+ else:
448
+ nproc_per_node = 1
449
+
450
+ launch_args = []
451
+ # Add nnode, nproc_per_node and rdzv args only if they are not specified by the user.
452
+ if not self.launch_cmd_contains("nnode"):
453
+ launch_args.append(f"--nnode={self.node_count}")
454
+ if not self.launch_cmd_contains("nproc_per_node"):
455
+ launch_args.append(f"--nproc_per_node={nproc_per_node}")
456
+ if not self.launch_cmd_contains("rdzv_backend"):
457
+ launch_args.extend(
458
+ [
459
+ "--rdzv_backend=c10d",
460
+ f"--rdzv_endpoint={self.host_ip}:{self.RDZV_PORT}",
461
+ f"--rdzv_conf={self.get_rdzv_conf()}",
462
+ ]
463
+ )
464
+
465
+ self.time_cmd(cmd=self.prepare_cmd(launch_args, prefix=self.env_ld_preload()))
466
+
467
+
468
+ class DeepSpeedRunner(Runner):
469
+ STOP_FILE = "/home/datascience/stop"
470
+ ERROR_FILE = "/home/datascience/error"
471
+ HOST_FILE = "/home/datascience/hostfile"
472
+ ENV_FILE = os.path.expanduser("~/.deepspeed_env")
473
+ LAUNCHER = "deepspeed"
474
+
475
+ def __init__(self, code_dir: str = driver_utils.DEFAULT_CODE_DIR) -> None:
476
+ super().__init__(code_dir)
477
+ self.update_os()
478
+
479
+ def update_os(self):
480
+ # Generate SSH host keys for SSH server
481
+ self.run_command("sudo ssh-keygen -A", level=logging.DEBUG, check=True)
482
+ # Install SSH server to accept SSH connections
483
+ # DeepSpeed uses "hostname -I" to determine the IP address
484
+ # pdsh is required for default multi node training
485
+ # torch cpp extension uses which command to find compiler
486
+ # DeepSpeed async_io requires libaio-devel
487
+ self.run_command(
488
+ "sudo --preserve-env yum install -y openssh-server hostname pdsh which libaio-devel",
489
+ level=logging.DEBUG,
490
+ check=True,
491
+ )
492
+ # Start SSH service
493
+ self.run_command("sudo /usr/sbin/sshd", level=logging.DEBUG, check=True)
494
+
495
+ def generate_key_pair(self):
496
+ self.run_command(
497
+ "ssh-keygen -q -t rsa -N '' <<< $'\ny'", level=logging.DEBUG, check=True
498
+ )
499
+ with open(os.path.join(SSH_DIR, "id_rsa.pub"), "r", encoding="utf-8") as f:
500
+ public_key = f.read()
501
+ print(f"{LOG_PREFIX_PUBLIC_KEY}{public_key}")
502
+ self.add_authoried_key(public_key)
503
+ self.run_command(
504
+ f"ssh-keyscan -H {self.host_ip} >> {SSH_DIR}/known_hosts",
505
+ level=logging.DEBUG,
506
+ check=True,
507
+ )
508
+ self.test_ssh_connection(self.host_ip)
509
+ # Check DeepSpeed compatibility
510
+ self.run_command(
511
+ "ds_report", conda_prefix=self.conda_prefix, level=logging.DEBUG
512
+ )
513
+ return self
514
+
515
+ @staticmethod
516
+ def add_authoried_key(public_key):
517
+ auth_keys_file = os.path.join(SSH_DIR, "authorized_keys")
518
+ os.makedirs(SSH_DIR, exist_ok=True)
519
+ with open(auth_keys_file, "a+", encoding="utf-8") as f:
520
+ f.write(public_key)
521
+ f.write("\n")
522
+ logger.debug("Public key saved to %s", auth_keys_file)
523
+
524
+ def fetch_host_public_key(self):
525
+ public_key = self.wait_for_log(self.host_job_run, LOG_PREFIX_PUBLIC_KEY)
526
+ print(f"{LOG_PREFIX_PUBLIC_KEY}{public_key}")
527
+ # logger.debug("%s", LOG_PREFIX_PUBLIC_KEY + public_key)
528
+ self.add_authoried_key(public_key)
529
+
530
+ def generate_hostfile(self):
531
+ runs = self.host_job_run.job.run_list()
532
+ self.node_runs = [
533
+ run
534
+ for run in runs
535
+ if run.status in ["ACCEPTED", "IN_PROGRESS"] and run.id != self.host_ocid
536
+ ]
537
+ self.node_ip_list = [self.wait_for_ip_address(run) for run in self.node_runs]
538
+ logger.info("Node IPs: %s", self.node_ip_list)
539
+ # Hostfile
540
+ logger.debug("Writing hostfile to %s", self.HOST_FILE)
541
+ os.makedirs(os.path.dirname(self.HOST_FILE), exist_ok=True)
542
+ host_file_content = [f"{ip} slots={self.gpu_count}" for ip in self.node_ip_list]
543
+ with open(self.HOST_FILE, "w", encoding="utf-8") as f:
544
+ f.write(f"{self.host_ip} slots={self.gpu_count}\n")
545
+ f.writelines(host_file_content)
546
+ self.run_command(f"cat {self.HOST_FILE}", level=logging.DEBUG)
547
+ # SSH config
548
+ ssh_config_path = os.path.join(SSH_DIR, "config")
549
+ logger.debug("Writing SSH config to %s", ssh_config_path)
550
+ with open(ssh_config_path, "w", encoding="utf-8") as f:
551
+ f.writelines(
552
+ [
553
+ "",
554
+ f"Host {self.host_ip}",
555
+ "IdentityFile /home/datascience/.ssh/id_rsa",
556
+ "User datascience",
557
+ ]
558
+ )
559
+ for node_ip in self.node_ip_list:
560
+ f.writelines(
561
+ [
562
+ "",
563
+ f"Host {node_ip}",
564
+ "IdentityFile /home/datascience/.ssh/id_rsa",
565
+ "User datascience",
566
+ ]
567
+ )
568
+ return self
569
+
570
+ def test_ssh_connection(self, host):
571
+ ret = self.run_command(
572
+ f"ssh -v -o PasswordAuthentication=no {host} hostname -I",
573
+ level=logging.DEBUG,
574
+ )
575
+ if ret == 0:
576
+ logger.debug("SSH connection to %s - OK", host)
577
+ else:
578
+ logger.debug("SSH connection to %s - FAILED", host)
579
+
580
+ def touch_file(self, filename):
581
+ """Creates an empty file with specific name on all the worker nodes."""
582
+ for node_ip in self.node_ip_list:
583
+ logger.debug("Sending stop file to %s", node_ip)
584
+ self.run_command(
585
+ f"ssh -v {node_ip} 'touch {filename}'",
586
+ level=logging.DEBUG,
587
+ check=True,
588
+ )
589
+
590
+ def save_deepspeed_env(self):
591
+ """Saves the environment variables for multi node training.
592
+ DeepSpeed performs multi-node training via SSH,
593
+ the environment variables configured by the job runs are not propagated to the SSH session.
594
+ DeepSpeed will load the environment variables from file for the SSH sessions.
595
+ """
596
+ with open(self.ENV_FILE, mode="w", encoding="utf-8") as f:
597
+ for k, v in os.environ.items():
598
+ # As of deepspeed==0.9.2, empty value or line break will cause parsing error,
599
+ # as the .deepspeed_env file is parsed line by line.
600
+ if not v or "\n" in v:
601
+ continue
602
+ # Ignore variables that are node specific
603
+ # The network interface name for each job run is a unique string, e.g. ens300f0v1604
604
+ if k in ["NCCL_SOCKET_IFNAME", "GLOO_SOCKET_IFNAME", "JOB_RUN_OCID"]:
605
+ continue
606
+ # Quote the value if it contains space
607
+ # Environment variable containing space may not be exported correctly when using pdsh
608
+ # https://github.com/microsoft/DeepSpeed/blob/v0.9.2/deepspeed/launcher/multinode_runner.py#L79
609
+ if " " in v:
610
+ v = shlex.quote(v)
611
+
612
+ f.write(f"{k}={v}\n")
613
+ # The following are required for specifying the network interface to be used by NCCL/GLOO
614
+ # The value should be the prefix of the expected network interface name
615
+ # https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html#nccl-socket-ifname
616
+ f.write("NCCL_SOCKET_IFNAME=ens\n")
617
+ f.write("GLOO_SOCKET_IFNAME=ens\n")
618
+ logger.debug("Environment variables saved to %s", self.ENV_FILE)
619
+ self.run_command(f"cat {self.ENV_FILE}")
620
+
621
+ def run_deepspeed_host(self, launch_args=None):
622
+ """Prepares the host and launch the deepspeed training.
623
+
624
+ Parameters
625
+ ----------
626
+ launch_args : str, optional
627
+ Additional command line arguments, by default None.
628
+ The deepspeed host file should be specified in the launch args.
629
+ For "deepspeed": --hostfile
630
+ For "accelerate launch": --deepspeed_hostfile
631
+ """
632
+ if self.node_count > 1:
633
+ self.generate_key_pair().generate_hostfile()
634
+ self.save_deepspeed_env()
635
+ # Wait for nodes to be ready
636
+ for run in self.node_runs:
637
+ self.wait_for_log(run, LOG_PREFIX_PUBLIC_KEY)
638
+
639
+ for node_ip in self.node_ip_list:
640
+ self.run_command(
641
+ f"ssh-keyscan -H {node_ip} >> {SSH_DIR}/known_hosts",
642
+ level=logging.DEBUG,
643
+ check=True,
644
+ )
645
+
646
+ cmd = self.prepare_cmd(launch_args)
647
+ # For DeepSpeed, we only need to run the cmd on the host
648
+ try:
649
+ self.time_cmd(cmd)
650
+ except:
651
+ # Caution: file will not be generated if job run is killed from the console.
652
+ self.touch_file(self.ERROR_FILE)
653
+ raise
654
+ # Signal stop
655
+ self.touch_file(self.STOP_FILE)
656
+
657
+ def run_deepspeed_worker(self):
658
+ self.fetch_host_public_key()
659
+ # Keep the job run alive until host job run is finished.
660
+ while not os.path.exists(self.STOP_FILE):
661
+ time.sleep(60)
662
+ # Stop the node if the host touched the error file.
663
+ if os.path.exists(self.ERROR_FILE):
664
+ logger.error("There is an error in the host job run.")
665
+ sys.exit(1)
666
+ # Stop the node if the host job run is CANCELLED or in unexpected state.
667
+ try:
668
+ self.host_job_run.sync()
669
+ except oci.exceptions.TransientServiceError:
670
+ # Ignore the transient error and try again next time.
671
+ continue
672
+ if self.host_job_run.status not in [
673
+ "ACCEPTED",
674
+ "IN_PROGRESS",
675
+ "SUCCEEDED",
676
+ ]:
677
+ logger.info(
678
+ "Host job run status is %s. Stopping job run...",
679
+ self.host_job_run.status,
680
+ )
681
+ sys.exit(2)
682
+ logger.info("Job finished successfully. Stopping job run...")
683
+
684
+ def run(self):
685
+ if self.is_host:
686
+ if self.node_count > 1:
687
+ launch_args = [f"--hostfile={self.HOST_FILE}"]
688
+ else:
689
+ launch_args = []
690
+ self.run_deepspeed_host(launch_args)
691
+ else:
692
+ self.run_deepspeed_worker()
693
+
694
+
695
+ class GenericRunner(TorchRunner, DeepSpeedRunner):
696
+ """Runner for running command other than ``torchrun``, ``deepspeed`` or ``accelerate``."""
697
+
698
+ LAUNCHER = ""
699
+
700
+ def use_deepspeed(self) -> bool:
701
+ """Indicate if DeepSpeed is used."""
702
+ if os.environ.get(CONST_ENV_DEEPSPEED):
703
+ return True
704
+ return False
705
+
706
+ def set_env_var(self):
707
+ """Set default environment variables."""
708
+ defaults = {
709
+ "WORLD_SIZE": self.node_count * self.gpu_count,
710
+ "MASTER_ADDR": self.host_ip,
711
+ "MASTER_PORT": self.RDZV_PORT,
712
+ }
713
+ for k, v in defaults.items():
714
+ if k not in os.environ:
715
+ os.environ[k] = str(v)
716
+
717
+ def run(self):
718
+ """Runs the user's command.
719
+ Note that for TorchRunner or DeepSpeedRunner,
720
+ we automatically add arguments for some settings,
721
+ like the number of nodes and the host node address.
722
+
723
+ This generic runner does not modify the command specified by the user.
724
+ User needs to make sure the command can work on all nodes.
725
+ User may use the environment variables in the command.
726
+ """
727
+ self.set_env_var()
728
+ if self.use_deepspeed():
729
+ if self.is_host:
730
+ self.run_deepspeed_host()
731
+ else:
732
+ self.run_deepspeed_worker()
733
+ else:
734
+ self.time_cmd(cmd=self.prepare_cmd(prefix=self.env_ld_preload()))
735
+
736
+
737
+ class AccelerateRunner(TorchRunner, DeepSpeedRunner):
738
+ """Runner for HuggingFace Accelerate."""
739
+
740
+ # accelerate launch will add main_process_port for deepspeed cmd even if it is not needed.
741
+ # https://github.com/huggingface/accelerate/blob/70920895e80f78d96d8f91e0beeb3ebdb8e5e5d6/src/accelerate/utils/launch.py#L233
742
+ DEFAULT_ARGS = [
743
+ "num_processes",
744
+ "num_machines",
745
+ "machine_rank",
746
+ "main_process_ip",
747
+ "main_process_port",
748
+ ]
749
+ TORCHRUN_ARGS = []
750
+ LAUNCHER = "accelerate launch"
751
+
752
+ def __init__(self, code_dir: str = driver_utils.DEFAULT_CODE_DIR) -> None:
753
+ super().__init__(code_dir)
754
+ # For "accelerate launch", only one of the following options can be used at one time
755
+ # `--cpu`, `--multi_gpu`, `--tpu`, `--use_deepspeed`, `--use_fsdp`.
756
+ # When a config file is not provided,
757
+ # --multi_gpu will be set automatically if there is more than 1 GPU
758
+ # self.multi_gpu = bool(self.node_count > 1 or self.gpu_count > 1)
759
+ self.num_machines = self.node_count
760
+ self.machine_rank = os.environ["NODE_RANK"]
761
+ # Total number of processes across all nodes
762
+ # Here we assume all nodes are having the same shape
763
+ self.num_processes = (self.gpu_count if self.gpu_count else 1) * self.node_count
764
+
765
+ self.main_process_port = self.RDZV_PORT
766
+ # Host IP is not ready at initialization
767
+ self.main_process_ip = None
768
+
769
+ def use_deepspeed(self):
770
+ """Indicate if DeepSpeed is used."""
771
+ # Accelerate support using DeepSpeed by adding the "--use_deepspeed" argument.
772
+ if os.environ.get(CONST_ENV_DEEPSPEED) or self.launch_cmd_contains(
773
+ "use_deepspeed"
774
+ ):
775
+ return True
776
+ return False
777
+
778
+ def accelerate_args(self):
779
+ """Gets the default arguments for the accelerate command.
780
+ The value of the default arguments are assigned in ``__init__()``.
781
+ """
782
+ args = []
783
+ for arg in self.DEFAULT_ARGS:
784
+ arg_val = getattr(self, arg, None)
785
+ logger.debug("%s=%s", arg, arg_val)
786
+ if arg_val is True:
787
+ args.append(f"--{arg}")
788
+ elif arg_val:
789
+ args.extend([f"--{arg}", str(arg_val)])
790
+ return args
791
+
792
+ def run_with_torchrun(self):
793
+ """Runs the job with torchrun."""
794
+ launch_args = self.accelerate_args()
795
+ for arg in self.TORCHRUN_ARGS:
796
+ if not self.launch_cmd_contains(arg):
797
+ launch_args.extend([f"--{arg}", f"{getattr(self, arg)}"])
798
+ cmd = self.prepare_cmd(launch_args, prefix=self.env_ld_preload())
799
+ self.time_cmd(cmd=cmd)
800
+
801
+ def run_with_deepspeed(self):
802
+ """Runs the job with DeepSpeed."""
803
+ if self.is_host:
804
+ launch_args = self.accelerate_args()
805
+ if self.num_machines > 1:
806
+ launch_args.append(f"--deepspeed_hostfile={self.HOST_FILE}")
807
+ self.run_deepspeed_host(launch_args)
808
+ else:
809
+ self.run_deepspeed_worker()
810
+
811
+ def run(self):
812
+ self.main_process_ip = self.host_ip
813
+ # Check if any default argument is provided by the user
814
+ for arg in self.DEFAULT_ARGS:
815
+ if self.launch_cmd_contains(arg):
816
+ logger.debug("%s found in command.", arg)
817
+ setattr(self, arg, None)
818
+ if self.use_deepspeed():
819
+ self.run_with_deepspeed()
820
+ else:
821
+ self.run_with_torchrun()
822
+
823
+
824
+ def main():
825
+ launch_cmd = os.environ.get(CONST_ENV_LAUNCH_CMD)
826
+ if not launch_cmd or launch_cmd.startswith("torchrun "):
827
+ # Use torchrun as default if launch cmd is not provided
828
+ runner_class = TorchRunner
829
+ elif launch_cmd.startswith("deepspeed "):
830
+ runner_class = DeepSpeedRunner
831
+ elif launch_cmd.startswith("accelerate "):
832
+ runner_class = AccelerateRunner
833
+ else:
834
+ runner_class = GenericRunner
835
+
836
+ runner = runner_class()
837
+ runner: Runner
838
+ runner.fetch_code().set_working_dir().setup_python_path().install_dependencies()
839
+
840
+ driver_utils.OCIHelper.copy_inputs()
841
+
842
+ runner.wait_for_host_ip_address().run()
843
+ driver_utils.OCIHelper.copy_outputs()
844
+
845
+
846
+ if __name__ == "__main__":
847
+ # Collect GPU metrics only if GPU is available and user defined METRIC_NAMESPACE
848
+ if METRIC_NAMESPACE and torch.cuda.device_count():
849
+ p = multiprocessing.Process(target=collect_metrics)
850
+ p.daemon = True
851
+ p.start()
852
+ main()