oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (858) hide show
  1. ads/aqua/__init__.py +40 -0
  2. ads/aqua/app.py +507 -0
  3. ads/aqua/cli.py +96 -0
  4. ads/aqua/client/__init__.py +3 -0
  5. ads/aqua/client/client.py +836 -0
  6. ads/aqua/client/openai_client.py +305 -0
  7. ads/aqua/common/__init__.py +5 -0
  8. ads/aqua/common/decorator.py +125 -0
  9. ads/aqua/common/entities.py +274 -0
  10. ads/aqua/common/enums.py +134 -0
  11. ads/aqua/common/errors.py +109 -0
  12. ads/aqua/common/utils.py +1295 -0
  13. ads/aqua/config/__init__.py +4 -0
  14. ads/aqua/config/container_config.py +246 -0
  15. ads/aqua/config/evaluation/__init__.py +4 -0
  16. ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
  17. ads/aqua/config/utils/__init__.py +4 -0
  18. ads/aqua/config/utils/serializer.py +339 -0
  19. ads/aqua/constants.py +116 -0
  20. ads/aqua/data.py +14 -0
  21. ads/aqua/dummy_data/icon.txt +1 -0
  22. ads/aqua/dummy_data/oci_model_deployments.json +56 -0
  23. ads/aqua/dummy_data/oci_models.json +1 -0
  24. ads/aqua/dummy_data/readme.md +26 -0
  25. ads/aqua/evaluation/__init__.py +8 -0
  26. ads/aqua/evaluation/constants.py +53 -0
  27. ads/aqua/evaluation/entities.py +186 -0
  28. ads/aqua/evaluation/errors.py +70 -0
  29. ads/aqua/evaluation/evaluation.py +1814 -0
  30. ads/aqua/extension/__init__.py +42 -0
  31. ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
  32. ads/aqua/extension/base_handler.py +90 -0
  33. ads/aqua/extension/common_handler.py +121 -0
  34. ads/aqua/extension/common_ws_msg_handler.py +36 -0
  35. ads/aqua/extension/deployment_handler.py +381 -0
  36. ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
  37. ads/aqua/extension/errors.py +30 -0
  38. ads/aqua/extension/evaluation_handler.py +129 -0
  39. ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
  40. ads/aqua/extension/finetune_handler.py +96 -0
  41. ads/aqua/extension/model_handler.py +390 -0
  42. ads/aqua/extension/models/__init__.py +0 -0
  43. ads/aqua/extension/models/ws_models.py +145 -0
  44. ads/aqua/extension/models_ws_msg_handler.py +50 -0
  45. ads/aqua/extension/ui_handler.py +300 -0
  46. ads/aqua/extension/ui_websocket_handler.py +130 -0
  47. ads/aqua/extension/utils.py +133 -0
  48. ads/aqua/finetuning/__init__.py +7 -0
  49. ads/aqua/finetuning/constants.py +23 -0
  50. ads/aqua/finetuning/entities.py +181 -0
  51. ads/aqua/finetuning/finetuning.py +749 -0
  52. ads/aqua/model/__init__.py +8 -0
  53. ads/aqua/model/constants.py +60 -0
  54. ads/aqua/model/entities.py +385 -0
  55. ads/aqua/model/enums.py +32 -0
  56. ads/aqua/model/model.py +2134 -0
  57. ads/aqua/model/utils.py +52 -0
  58. ads/aqua/modeldeployment/__init__.py +6 -0
  59. ads/aqua/modeldeployment/constants.py +10 -0
  60. ads/aqua/modeldeployment/deployment.py +1315 -0
  61. ads/aqua/modeldeployment/entities.py +653 -0
  62. ads/aqua/modeldeployment/utils.py +543 -0
  63. ads/aqua/resources/gpu_shapes_index.json +94 -0
  64. ads/aqua/server/__init__.py +4 -0
  65. ads/aqua/server/__main__.py +24 -0
  66. ads/aqua/server/app.py +47 -0
  67. ads/aqua/server/aqua_spec.yml +1291 -0
  68. ads/aqua/training/__init__.py +4 -0
  69. ads/aqua/training/exceptions.py +476 -0
  70. ads/aqua/ui.py +519 -0
  71. ads/automl/__init__.py +9 -0
  72. ads/automl/driver.py +330 -0
  73. ads/automl/provider.py +975 -0
  74. ads/bds/__init__.py +5 -0
  75. ads/bds/auth.py +127 -0
  76. ads/bds/big_data_service.py +255 -0
  77. ads/catalog/__init__.py +19 -0
  78. ads/catalog/model.py +1576 -0
  79. ads/catalog/notebook.py +461 -0
  80. ads/catalog/project.py +468 -0
  81. ads/catalog/summary.py +178 -0
  82. ads/common/__init__.py +11 -0
  83. ads/common/analyzer.py +65 -0
  84. ads/common/artifact/.model-ignore +63 -0
  85. ads/common/artifact/__init__.py +10 -0
  86. ads/common/auth.py +1122 -0
  87. ads/common/card_identifier.py +83 -0
  88. ads/common/config.py +647 -0
  89. ads/common/data.py +165 -0
  90. ads/common/decorator/__init__.py +9 -0
  91. ads/common/decorator/argument_to_case.py +88 -0
  92. ads/common/decorator/deprecate.py +69 -0
  93. ads/common/decorator/require_nonempty_arg.py +65 -0
  94. ads/common/decorator/runtime_dependency.py +178 -0
  95. ads/common/decorator/threaded.py +97 -0
  96. ads/common/decorator/utils.py +35 -0
  97. ads/common/dsc_file_system.py +303 -0
  98. ads/common/error.py +14 -0
  99. ads/common/extended_enum.py +81 -0
  100. ads/common/function/__init__.py +5 -0
  101. ads/common/function/fn_util.py +142 -0
  102. ads/common/function/func_conf.yaml +25 -0
  103. ads/common/ipython.py +76 -0
  104. ads/common/model.py +679 -0
  105. ads/common/model_artifact.py +1759 -0
  106. ads/common/model_artifact_schema.json +107 -0
  107. ads/common/model_export_util.py +664 -0
  108. ads/common/model_metadata.py +24 -0
  109. ads/common/object_storage_details.py +296 -0
  110. ads/common/oci_client.py +179 -0
  111. ads/common/oci_datascience.py +46 -0
  112. ads/common/oci_logging.py +1144 -0
  113. ads/common/oci_mixin.py +957 -0
  114. ads/common/oci_resource.py +136 -0
  115. ads/common/serializer.py +559 -0
  116. ads/common/utils.py +1852 -0
  117. ads/common/word_lists.py +1491 -0
  118. ads/common/work_request.py +189 -0
  119. ads/config.py +1 -0
  120. ads/data_labeling/__init__.py +13 -0
  121. ads/data_labeling/boundingbox.py +253 -0
  122. ads/data_labeling/constants.py +47 -0
  123. ads/data_labeling/data_labeling_service.py +244 -0
  124. ads/data_labeling/interface/__init__.py +5 -0
  125. ads/data_labeling/interface/loader.py +16 -0
  126. ads/data_labeling/interface/parser.py +16 -0
  127. ads/data_labeling/interface/reader.py +23 -0
  128. ads/data_labeling/loader/__init__.py +5 -0
  129. ads/data_labeling/loader/file_loader.py +241 -0
  130. ads/data_labeling/metadata.py +110 -0
  131. ads/data_labeling/mixin/__init__.py +5 -0
  132. ads/data_labeling/mixin/data_labeling.py +232 -0
  133. ads/data_labeling/ner.py +129 -0
  134. ads/data_labeling/parser/__init__.py +5 -0
  135. ads/data_labeling/parser/dls_record_parser.py +388 -0
  136. ads/data_labeling/parser/export_metadata_parser.py +94 -0
  137. ads/data_labeling/parser/export_record_parser.py +473 -0
  138. ads/data_labeling/reader/__init__.py +5 -0
  139. ads/data_labeling/reader/dataset_reader.py +574 -0
  140. ads/data_labeling/reader/dls_record_reader.py +121 -0
  141. ads/data_labeling/reader/export_record_reader.py +62 -0
  142. ads/data_labeling/reader/jsonl_reader.py +75 -0
  143. ads/data_labeling/reader/metadata_reader.py +203 -0
  144. ads/data_labeling/reader/record_reader.py +263 -0
  145. ads/data_labeling/record.py +52 -0
  146. ads/data_labeling/visualizer/__init__.py +5 -0
  147. ads/data_labeling/visualizer/image_visualizer.py +525 -0
  148. ads/data_labeling/visualizer/text_visualizer.py +357 -0
  149. ads/database/__init__.py +5 -0
  150. ads/database/connection.py +338 -0
  151. ads/dataset/__init__.py +10 -0
  152. ads/dataset/capabilities.md +51 -0
  153. ads/dataset/classification_dataset.py +339 -0
  154. ads/dataset/correlation.py +226 -0
  155. ads/dataset/correlation_plot.py +563 -0
  156. ads/dataset/dask_series.py +173 -0
  157. ads/dataset/dataframe_transformer.py +110 -0
  158. ads/dataset/dataset.py +1979 -0
  159. ads/dataset/dataset_browser.py +360 -0
  160. ads/dataset/dataset_with_target.py +995 -0
  161. ads/dataset/exception.py +25 -0
  162. ads/dataset/factory.py +987 -0
  163. ads/dataset/feature_engineering_transformer.py +35 -0
  164. ads/dataset/feature_selection.py +107 -0
  165. ads/dataset/forecasting_dataset.py +26 -0
  166. ads/dataset/helper.py +1450 -0
  167. ads/dataset/label_encoder.py +99 -0
  168. ads/dataset/mixin/__init__.py +5 -0
  169. ads/dataset/mixin/dataset_accessor.py +134 -0
  170. ads/dataset/pipeline.py +58 -0
  171. ads/dataset/plot.py +710 -0
  172. ads/dataset/progress.py +86 -0
  173. ads/dataset/recommendation.py +297 -0
  174. ads/dataset/recommendation_transformer.py +502 -0
  175. ads/dataset/regression_dataset.py +14 -0
  176. ads/dataset/sampled_dataset.py +1050 -0
  177. ads/dataset/target.py +98 -0
  178. ads/dataset/timeseries.py +18 -0
  179. ads/dbmixin/__init__.py +5 -0
  180. ads/dbmixin/db_pandas_accessor.py +153 -0
  181. ads/environment/__init__.py +9 -0
  182. ads/environment/ml_runtime.py +66 -0
  183. ads/evaluations/README.md +14 -0
  184. ads/evaluations/__init__.py +109 -0
  185. ads/evaluations/evaluation_plot.py +983 -0
  186. ads/evaluations/evaluator.py +1334 -0
  187. ads/evaluations/statistical_metrics.py +543 -0
  188. ads/experiments/__init__.py +9 -0
  189. ads/experiments/capabilities.md +0 -0
  190. ads/explanations/__init__.py +21 -0
  191. ads/explanations/base_explainer.py +142 -0
  192. ads/explanations/capabilities.md +83 -0
  193. ads/explanations/explainer.py +190 -0
  194. ads/explanations/mlx_global_explainer.py +1050 -0
  195. ads/explanations/mlx_interface.py +386 -0
  196. ads/explanations/mlx_local_explainer.py +287 -0
  197. ads/explanations/mlx_whatif_explainer.py +201 -0
  198. ads/feature_engineering/__init__.py +20 -0
  199. ads/feature_engineering/accessor/__init__.py +5 -0
  200. ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
  201. ads/feature_engineering/accessor/mixin/__init__.py +5 -0
  202. ads/feature_engineering/accessor/mixin/correlation.py +166 -0
  203. ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
  204. ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
  205. ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
  206. ads/feature_engineering/accessor/mixin/utils.py +65 -0
  207. ads/feature_engineering/accessor/series_accessor.py +431 -0
  208. ads/feature_engineering/adsimage/__init__.py +5 -0
  209. ads/feature_engineering/adsimage/image.py +192 -0
  210. ads/feature_engineering/adsimage/image_reader.py +170 -0
  211. ads/feature_engineering/adsimage/interface/__init__.py +5 -0
  212. ads/feature_engineering/adsimage/interface/reader.py +19 -0
  213. ads/feature_engineering/adsstring/__init__.py +7 -0
  214. ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
  215. ads/feature_engineering/adsstring/string/__init__.py +8 -0
  216. ads/feature_engineering/data_schema.json +57 -0
  217. ads/feature_engineering/dataset/__init__.py +5 -0
  218. ads/feature_engineering/dataset/zip_code_data.py +42062 -0
  219. ads/feature_engineering/exceptions.py +40 -0
  220. ads/feature_engineering/feature_type/__init__.py +133 -0
  221. ads/feature_engineering/feature_type/address.py +184 -0
  222. ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
  223. ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
  224. ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
  225. ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
  226. ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
  227. ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
  228. ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
  229. ads/feature_engineering/feature_type/adsstring/string.py +258 -0
  230. ads/feature_engineering/feature_type/base.py +58 -0
  231. ads/feature_engineering/feature_type/boolean.py +183 -0
  232. ads/feature_engineering/feature_type/category.py +146 -0
  233. ads/feature_engineering/feature_type/constant.py +137 -0
  234. ads/feature_engineering/feature_type/continuous.py +151 -0
  235. ads/feature_engineering/feature_type/creditcard.py +314 -0
  236. ads/feature_engineering/feature_type/datetime.py +190 -0
  237. ads/feature_engineering/feature_type/discrete.py +134 -0
  238. ads/feature_engineering/feature_type/document.py +43 -0
  239. ads/feature_engineering/feature_type/gis.py +251 -0
  240. ads/feature_engineering/feature_type/handler/__init__.py +5 -0
  241. ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
  242. ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
  243. ads/feature_engineering/feature_type/handler/warnings.py +128 -0
  244. ads/feature_engineering/feature_type/integer.py +142 -0
  245. ads/feature_engineering/feature_type/ip_address.py +144 -0
  246. ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
  247. ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
  248. ads/feature_engineering/feature_type/lat_long.py +256 -0
  249. ads/feature_engineering/feature_type/object.py +43 -0
  250. ads/feature_engineering/feature_type/ordinal.py +132 -0
  251. ads/feature_engineering/feature_type/phone_number.py +135 -0
  252. ads/feature_engineering/feature_type/string.py +171 -0
  253. ads/feature_engineering/feature_type/text.py +93 -0
  254. ads/feature_engineering/feature_type/unknown.py +43 -0
  255. ads/feature_engineering/feature_type/zip_code.py +164 -0
  256. ads/feature_engineering/feature_type_manager.py +406 -0
  257. ads/feature_engineering/schema.py +795 -0
  258. ads/feature_engineering/utils.py +245 -0
  259. ads/feature_store/.readthedocs.yaml +19 -0
  260. ads/feature_store/README.md +65 -0
  261. ads/feature_store/__init__.py +9 -0
  262. ads/feature_store/common/__init__.py +0 -0
  263. ads/feature_store/common/enums.py +339 -0
  264. ads/feature_store/common/exceptions.py +18 -0
  265. ads/feature_store/common/spark_session_singleton.py +125 -0
  266. ads/feature_store/common/utils/__init__.py +0 -0
  267. ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
  268. ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
  269. ads/feature_store/common/utils/transformation_utils.py +82 -0
  270. ads/feature_store/common/utils/utility.py +403 -0
  271. ads/feature_store/data_validation/__init__.py +0 -0
  272. ads/feature_store/data_validation/great_expectation.py +129 -0
  273. ads/feature_store/dataset.py +1230 -0
  274. ads/feature_store/dataset_job.py +530 -0
  275. ads/feature_store/docs/Dockerfile +7 -0
  276. ads/feature_store/docs/Makefile +44 -0
  277. ads/feature_store/docs/conf.py +28 -0
  278. ads/feature_store/docs/requirements.txt +14 -0
  279. ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
  280. ads/feature_store/docs/source/cicd.rst +137 -0
  281. ads/feature_store/docs/source/conf.py +86 -0
  282. ads/feature_store/docs/source/data_versioning.rst +33 -0
  283. ads/feature_store/docs/source/dataset.rst +388 -0
  284. ads/feature_store/docs/source/dataset_job.rst +27 -0
  285. ads/feature_store/docs/source/demo.rst +70 -0
  286. ads/feature_store/docs/source/entity.rst +78 -0
  287. ads/feature_store/docs/source/feature_group.rst +624 -0
  288. ads/feature_store/docs/source/feature_group_job.rst +29 -0
  289. ads/feature_store/docs/source/feature_store.rst +122 -0
  290. ads/feature_store/docs/source/feature_store_class.rst +123 -0
  291. ads/feature_store/docs/source/feature_validation.rst +66 -0
  292. ads/feature_store/docs/source/figures/cicd.png +0 -0
  293. ads/feature_store/docs/source/figures/data_validation.png +0 -0
  294. ads/feature_store/docs/source/figures/data_versioning.png +0 -0
  295. ads/feature_store/docs/source/figures/dataset.gif +0 -0
  296. ads/feature_store/docs/source/figures/dataset.png +0 -0
  297. ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
  298. ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
  299. ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
  300. ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
  301. ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
  302. ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
  303. ads/feature_store/docs/source/figures/entity.png +0 -0
  304. ads/feature_store/docs/source/figures/feature_group.png +0 -0
  305. ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
  306. ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
  307. ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
  308. ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
  309. ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
  310. ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
  311. ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
  312. ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
  313. ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
  314. ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
  315. ads/feature_store/docs/source/figures/overview.png +0 -0
  316. ads/feature_store/docs/source/figures/resource_manager.png +0 -0
  317. ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
  318. ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
  319. ads/feature_store/docs/source/figures/stats_1.png +0 -0
  320. ads/feature_store/docs/source/figures/stats_2.png +0 -0
  321. ads/feature_store/docs/source/figures/stats_d.png +0 -0
  322. ads/feature_store/docs/source/figures/stats_fg.png +0 -0
  323. ads/feature_store/docs/source/figures/transformation.png +0 -0
  324. ads/feature_store/docs/source/figures/transformations.gif +0 -0
  325. ads/feature_store/docs/source/figures/validation.png +0 -0
  326. ads/feature_store/docs/source/figures/validation_fg.png +0 -0
  327. ads/feature_store/docs/source/figures/validation_results.png +0 -0
  328. ads/feature_store/docs/source/figures/validation_summary.png +0 -0
  329. ads/feature_store/docs/source/index.rst +81 -0
  330. ads/feature_store/docs/source/module.rst +8 -0
  331. ads/feature_store/docs/source/notebook.rst +94 -0
  332. ads/feature_store/docs/source/overview.rst +47 -0
  333. ads/feature_store/docs/source/quickstart.rst +176 -0
  334. ads/feature_store/docs/source/release_notes.rst +194 -0
  335. ads/feature_store/docs/source/setup_feature_store.rst +81 -0
  336. ads/feature_store/docs/source/statistics.rst +58 -0
  337. ads/feature_store/docs/source/transformation.rst +199 -0
  338. ads/feature_store/docs/source/ui.rst +65 -0
  339. ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
  340. ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
  341. ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
  342. ads/feature_store/entity.py +718 -0
  343. ads/feature_store/execution_strategy/__init__.py +0 -0
  344. ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
  345. ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
  346. ads/feature_store/execution_strategy/engine/__init__.py +0 -0
  347. ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
  348. ads/feature_store/execution_strategy/execution_strategy.py +113 -0
  349. ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
  350. ads/feature_store/execution_strategy/spark/__init__.py +0 -0
  351. ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
  352. ads/feature_store/feature.py +192 -0
  353. ads/feature_store/feature_group.py +1494 -0
  354. ads/feature_store/feature_group_expectation.py +346 -0
  355. ads/feature_store/feature_group_job.py +602 -0
  356. ads/feature_store/feature_lineage/__init__.py +0 -0
  357. ads/feature_store/feature_lineage/graphviz_service.py +180 -0
  358. ads/feature_store/feature_option_details.py +50 -0
  359. ads/feature_store/feature_statistics/__init__.py +0 -0
  360. ads/feature_store/feature_statistics/statistics_service.py +99 -0
  361. ads/feature_store/feature_store.py +699 -0
  362. ads/feature_store/feature_store_registrar.py +518 -0
  363. ads/feature_store/input_feature_detail.py +149 -0
  364. ads/feature_store/mixin/__init__.py +4 -0
  365. ads/feature_store/mixin/oci_feature_store.py +145 -0
  366. ads/feature_store/model_details.py +73 -0
  367. ads/feature_store/query/__init__.py +0 -0
  368. ads/feature_store/query/filter.py +266 -0
  369. ads/feature_store/query/generator/__init__.py +0 -0
  370. ads/feature_store/query/generator/query_generator.py +298 -0
  371. ads/feature_store/query/join.py +161 -0
  372. ads/feature_store/query/query.py +403 -0
  373. ads/feature_store/query/validator/__init__.py +0 -0
  374. ads/feature_store/query/validator/query_validator.py +57 -0
  375. ads/feature_store/response/__init__.py +0 -0
  376. ads/feature_store/response/response_builder.py +68 -0
  377. ads/feature_store/service/__init__.py +0 -0
  378. ads/feature_store/service/oci_dataset.py +139 -0
  379. ads/feature_store/service/oci_dataset_job.py +199 -0
  380. ads/feature_store/service/oci_entity.py +125 -0
  381. ads/feature_store/service/oci_feature_group.py +164 -0
  382. ads/feature_store/service/oci_feature_group_job.py +214 -0
  383. ads/feature_store/service/oci_feature_store.py +182 -0
  384. ads/feature_store/service/oci_lineage.py +87 -0
  385. ads/feature_store/service/oci_transformation.py +104 -0
  386. ads/feature_store/statistics/__init__.py +0 -0
  387. ads/feature_store/statistics/abs_feature_value.py +49 -0
  388. ads/feature_store/statistics/charts/__init__.py +0 -0
  389. ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
  390. ads/feature_store/statistics/charts/box_plot.py +148 -0
  391. ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
  392. ads/feature_store/statistics/charts/probability_distribution.py +68 -0
  393. ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
  394. ads/feature_store/statistics/feature_stat.py +126 -0
  395. ads/feature_store/statistics/generic_feature_value.py +33 -0
  396. ads/feature_store/statistics/statistics.py +41 -0
  397. ads/feature_store/statistics_config.py +101 -0
  398. ads/feature_store/templates/feature_store_template.yaml +45 -0
  399. ads/feature_store/transformation.py +499 -0
  400. ads/feature_store/validation_output.py +57 -0
  401. ads/hpo/__init__.py +9 -0
  402. ads/hpo/_imports.py +91 -0
  403. ads/hpo/ads_search_space.py +439 -0
  404. ads/hpo/distributions.py +325 -0
  405. ads/hpo/objective.py +280 -0
  406. ads/hpo/search_cv.py +1657 -0
  407. ads/hpo/stopping_criterion.py +75 -0
  408. ads/hpo/tuner_artifact.py +413 -0
  409. ads/hpo/utils.py +91 -0
  410. ads/hpo/validation.py +140 -0
  411. ads/hpo/visualization/__init__.py +5 -0
  412. ads/hpo/visualization/_contour.py +23 -0
  413. ads/hpo/visualization/_edf.py +20 -0
  414. ads/hpo/visualization/_intermediate_values.py +21 -0
  415. ads/hpo/visualization/_optimization_history.py +25 -0
  416. ads/hpo/visualization/_parallel_coordinate.py +169 -0
  417. ads/hpo/visualization/_param_importances.py +26 -0
  418. ads/jobs/__init__.py +53 -0
  419. ads/jobs/ads_job.py +663 -0
  420. ads/jobs/builders/__init__.py +5 -0
  421. ads/jobs/builders/base.py +156 -0
  422. ads/jobs/builders/infrastructure/__init__.py +6 -0
  423. ads/jobs/builders/infrastructure/base.py +165 -0
  424. ads/jobs/builders/infrastructure/dataflow.py +1252 -0
  425. ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
  426. ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
  427. ads/jobs/builders/infrastructure/utils.py +65 -0
  428. ads/jobs/builders/runtimes/__init__.py +5 -0
  429. ads/jobs/builders/runtimes/artifact.py +338 -0
  430. ads/jobs/builders/runtimes/base.py +325 -0
  431. ads/jobs/builders/runtimes/container_runtime.py +242 -0
  432. ads/jobs/builders/runtimes/python_runtime.py +1016 -0
  433. ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
  434. ads/jobs/cli.py +104 -0
  435. ads/jobs/env_var_parser.py +131 -0
  436. ads/jobs/extension.py +160 -0
  437. ads/jobs/schema/__init__.py +5 -0
  438. ads/jobs/schema/infrastructure_schema.json +116 -0
  439. ads/jobs/schema/job_schema.json +42 -0
  440. ads/jobs/schema/runtime_schema.json +183 -0
  441. ads/jobs/schema/validator.py +141 -0
  442. ads/jobs/serializer.py +296 -0
  443. ads/jobs/templates/__init__.py +5 -0
  444. ads/jobs/templates/container.py +6 -0
  445. ads/jobs/templates/driver_notebook.py +177 -0
  446. ads/jobs/templates/driver_oci.py +500 -0
  447. ads/jobs/templates/driver_python.py +48 -0
  448. ads/jobs/templates/driver_pytorch.py +852 -0
  449. ads/jobs/templates/driver_utils.py +615 -0
  450. ads/jobs/templates/hostname_from_env.c +55 -0
  451. ads/jobs/templates/oci_metrics.py +181 -0
  452. ads/jobs/utils.py +104 -0
  453. ads/llm/__init__.py +28 -0
  454. ads/llm/autogen/__init__.py +2 -0
  455. ads/llm/autogen/constants.py +15 -0
  456. ads/llm/autogen/reports/__init__.py +2 -0
  457. ads/llm/autogen/reports/base.py +67 -0
  458. ads/llm/autogen/reports/data.py +103 -0
  459. ads/llm/autogen/reports/session.py +526 -0
  460. ads/llm/autogen/reports/templates/chat_box.html +13 -0
  461. ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
  462. ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
  463. ads/llm/autogen/reports/utils.py +56 -0
  464. ads/llm/autogen/v02/__init__.py +4 -0
  465. ads/llm/autogen/v02/client.py +295 -0
  466. ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
  467. ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
  468. ads/llm/autogen/v02/loggers/__init__.py +6 -0
  469. ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
  470. ads/llm/autogen/v02/loggers/session_logger.py +580 -0
  471. ads/llm/autogen/v02/loggers/utils.py +86 -0
  472. ads/llm/autogen/v02/runtime_logging.py +163 -0
  473. ads/llm/chain.py +268 -0
  474. ads/llm/chat_template.py +31 -0
  475. ads/llm/deploy.py +63 -0
  476. ads/llm/guardrails/__init__.py +5 -0
  477. ads/llm/guardrails/base.py +442 -0
  478. ads/llm/guardrails/huggingface.py +44 -0
  479. ads/llm/langchain/__init__.py +5 -0
  480. ads/llm/langchain/plugins/__init__.py +5 -0
  481. ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
  482. ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
  483. ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
  484. ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
  485. ads/llm/langchain/plugins/llms/__init__.py +5 -0
  486. ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
  487. ads/llm/requirements.txt +3 -0
  488. ads/llm/serialize.py +219 -0
  489. ads/llm/serializers/__init__.py +0 -0
  490. ads/llm/serializers/retrieval_qa.py +153 -0
  491. ads/llm/serializers/runnable_parallel.py +27 -0
  492. ads/llm/templates/score_chain.jinja2 +155 -0
  493. ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
  494. ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
  495. ads/model/__init__.py +52 -0
  496. ads/model/artifact.py +573 -0
  497. ads/model/artifact_downloader.py +254 -0
  498. ads/model/artifact_uploader.py +267 -0
  499. ads/model/base_properties.py +238 -0
  500. ads/model/common/.model-ignore +66 -0
  501. ads/model/common/__init__.py +5 -0
  502. ads/model/common/utils.py +142 -0
  503. ads/model/datascience_model.py +2635 -0
  504. ads/model/deployment/__init__.py +20 -0
  505. ads/model/deployment/common/__init__.py +5 -0
  506. ads/model/deployment/common/utils.py +308 -0
  507. ads/model/deployment/model_deployer.py +466 -0
  508. ads/model/deployment/model_deployment.py +1846 -0
  509. ads/model/deployment/model_deployment_infrastructure.py +671 -0
  510. ads/model/deployment/model_deployment_properties.py +493 -0
  511. ads/model/deployment/model_deployment_runtime.py +838 -0
  512. ads/model/extractor/__init__.py +5 -0
  513. ads/model/extractor/automl_extractor.py +74 -0
  514. ads/model/extractor/embedding_onnx_extractor.py +80 -0
  515. ads/model/extractor/huggingface_extractor.py +88 -0
  516. ads/model/extractor/keras_extractor.py +84 -0
  517. ads/model/extractor/lightgbm_extractor.py +93 -0
  518. ads/model/extractor/model_info_extractor.py +114 -0
  519. ads/model/extractor/model_info_extractor_factory.py +105 -0
  520. ads/model/extractor/pytorch_extractor.py +87 -0
  521. ads/model/extractor/sklearn_extractor.py +112 -0
  522. ads/model/extractor/spark_extractor.py +89 -0
  523. ads/model/extractor/tensorflow_extractor.py +85 -0
  524. ads/model/extractor/xgboost_extractor.py +94 -0
  525. ads/model/framework/__init__.py +5 -0
  526. ads/model/framework/automl_model.py +178 -0
  527. ads/model/framework/embedding_onnx_model.py +438 -0
  528. ads/model/framework/huggingface_model.py +399 -0
  529. ads/model/framework/lightgbm_model.py +266 -0
  530. ads/model/framework/pytorch_model.py +266 -0
  531. ads/model/framework/sklearn_model.py +250 -0
  532. ads/model/framework/spark_model.py +326 -0
  533. ads/model/framework/tensorflow_model.py +254 -0
  534. ads/model/framework/xgboost_model.py +258 -0
  535. ads/model/generic_model.py +3518 -0
  536. ads/model/model_artifact_boilerplate/README.md +381 -0
  537. ads/model/model_artifact_boilerplate/__init__.py +5 -0
  538. ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
  539. ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
  540. ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
  541. ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
  542. ads/model/model_artifact_boilerplate/score.py +61 -0
  543. ads/model/model_file_description_schema.json +68 -0
  544. ads/model/model_introspect.py +331 -0
  545. ads/model/model_metadata.py +1810 -0
  546. ads/model/model_metadata_mixin.py +460 -0
  547. ads/model/model_properties.py +63 -0
  548. ads/model/model_version_set.py +739 -0
  549. ads/model/runtime/__init__.py +5 -0
  550. ads/model/runtime/env_info.py +306 -0
  551. ads/model/runtime/model_deployment_details.py +37 -0
  552. ads/model/runtime/model_provenance_details.py +58 -0
  553. ads/model/runtime/runtime_info.py +81 -0
  554. ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
  555. ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
  556. ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
  557. ads/model/runtime/utils.py +201 -0
  558. ads/model/serde/__init__.py +5 -0
  559. ads/model/serde/common.py +40 -0
  560. ads/model/serde/model_input.py +547 -0
  561. ads/model/serde/model_serializer.py +1184 -0
  562. ads/model/service/__init__.py +5 -0
  563. ads/model/service/oci_datascience_model.py +1076 -0
  564. ads/model/service/oci_datascience_model_deployment.py +500 -0
  565. ads/model/service/oci_datascience_model_version_set.py +176 -0
  566. ads/model/transformer/__init__.py +5 -0
  567. ads/model/transformer/onnx_transformer.py +324 -0
  568. ads/mysqldb/__init__.py +5 -0
  569. ads/mysqldb/mysql_db.py +227 -0
  570. ads/opctl/__init__.py +18 -0
  571. ads/opctl/anomaly_detection.py +11 -0
  572. ads/opctl/backend/__init__.py +5 -0
  573. ads/opctl/backend/ads_dataflow.py +353 -0
  574. ads/opctl/backend/ads_ml_job.py +710 -0
  575. ads/opctl/backend/ads_ml_pipeline.py +164 -0
  576. ads/opctl/backend/ads_model_deployment.py +209 -0
  577. ads/opctl/backend/base.py +146 -0
  578. ads/opctl/backend/local.py +1053 -0
  579. ads/opctl/backend/marketplace/__init__.py +9 -0
  580. ads/opctl/backend/marketplace/helm_helper.py +173 -0
  581. ads/opctl/backend/marketplace/local_marketplace.py +271 -0
  582. ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
  583. ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
  584. ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
  585. ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
  586. ads/opctl/backend/marketplace/models/__init__.py +5 -0
  587. ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
  588. ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
  589. ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
  590. ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
  591. ads/opctl/cli.py +707 -0
  592. ads/opctl/cmds.py +869 -0
  593. ads/opctl/conda/__init__.py +5 -0
  594. ads/opctl/conda/cli.py +193 -0
  595. ads/opctl/conda/cmds.py +749 -0
  596. ads/opctl/conda/config.yaml +34 -0
  597. ads/opctl/conda/manifest_template.yaml +13 -0
  598. ads/opctl/conda/multipart_uploader.py +188 -0
  599. ads/opctl/conda/pack.py +89 -0
  600. ads/opctl/config/__init__.py +5 -0
  601. ads/opctl/config/base.py +57 -0
  602. ads/opctl/config/diagnostics/__init__.py +5 -0
  603. ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
  604. ads/opctl/config/merger.py +255 -0
  605. ads/opctl/config/resolver.py +297 -0
  606. ads/opctl/config/utils.py +79 -0
  607. ads/opctl/config/validator.py +17 -0
  608. ads/opctl/config/versioner.py +68 -0
  609. ads/opctl/config/yaml_parsers/__init__.py +7 -0
  610. ads/opctl/config/yaml_parsers/base.py +58 -0
  611. ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
  612. ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
  613. ads/opctl/constants.py +66 -0
  614. ads/opctl/decorator/__init__.py +5 -0
  615. ads/opctl/decorator/common.py +129 -0
  616. ads/opctl/diagnostics/__init__.py +5 -0
  617. ads/opctl/diagnostics/__main__.py +25 -0
  618. ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
  619. ads/opctl/diagnostics/check_requirements.py +144 -0
  620. ads/opctl/diagnostics/requirement_exception.py +9 -0
  621. ads/opctl/distributed/README.md +109 -0
  622. ads/opctl/distributed/__init__.py +5 -0
  623. ads/opctl/distributed/certificates.py +32 -0
  624. ads/opctl/distributed/cli.py +207 -0
  625. ads/opctl/distributed/cmds.py +731 -0
  626. ads/opctl/distributed/common/__init__.py +5 -0
  627. ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
  628. ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
  629. ads/opctl/distributed/common/cluster_config_helper.py +103 -0
  630. ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
  631. ads/opctl/distributed/common/cluster_runner.py +54 -0
  632. ads/opctl/distributed/common/framework_factory.py +29 -0
  633. ads/opctl/docker/Dockerfile.job +103 -0
  634. ads/opctl/docker/Dockerfile.job.arm +107 -0
  635. ads/opctl/docker/Dockerfile.job.gpu +175 -0
  636. ads/opctl/docker/base-env.yaml +13 -0
  637. ads/opctl/docker/cuda.repo +6 -0
  638. ads/opctl/docker/operator/.dockerignore +0 -0
  639. ads/opctl/docker/operator/Dockerfile +41 -0
  640. ads/opctl/docker/operator/Dockerfile.gpu +85 -0
  641. ads/opctl/docker/operator/cuda.repo +6 -0
  642. ads/opctl/docker/operator/environment.yaml +8 -0
  643. ads/opctl/forecast.py +11 -0
  644. ads/opctl/index.yaml +3 -0
  645. ads/opctl/model/__init__.py +5 -0
  646. ads/opctl/model/cli.py +65 -0
  647. ads/opctl/model/cmds.py +73 -0
  648. ads/opctl/operator/README.md +4 -0
  649. ads/opctl/operator/__init__.py +31 -0
  650. ads/opctl/operator/cli.py +344 -0
  651. ads/opctl/operator/cmd.py +596 -0
  652. ads/opctl/operator/common/__init__.py +5 -0
  653. ads/opctl/operator/common/backend_factory.py +460 -0
  654. ads/opctl/operator/common/const.py +27 -0
  655. ads/opctl/operator/common/data/synthetic.csv +16001 -0
  656. ads/opctl/operator/common/dictionary_merger.py +148 -0
  657. ads/opctl/operator/common/errors.py +42 -0
  658. ads/opctl/operator/common/operator_config.py +99 -0
  659. ads/opctl/operator/common/operator_loader.py +811 -0
  660. ads/opctl/operator/common/operator_schema.yaml +130 -0
  661. ads/opctl/operator/common/operator_yaml_generator.py +152 -0
  662. ads/opctl/operator/common/utils.py +208 -0
  663. ads/opctl/operator/lowcode/__init__.py +5 -0
  664. ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
  665. ads/opctl/operator/lowcode/anomaly/README.md +207 -0
  666. ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
  667. ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
  668. ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
  669. ads/opctl/operator/lowcode/anomaly/const.py +167 -0
  670. ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
  671. ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
  672. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
  673. ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
  674. ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
  675. ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
  676. ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
  677. ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
  678. ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
  679. ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
  680. ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
  681. ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
  682. ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
  683. ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
  684. ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
  685. ads/opctl/operator/lowcode/common/__init__.py +5 -0
  686. ads/opctl/operator/lowcode/common/const.py +10 -0
  687. ads/opctl/operator/lowcode/common/data.py +116 -0
  688. ads/opctl/operator/lowcode/common/errors.py +47 -0
  689. ads/opctl/operator/lowcode/common/transformations.py +296 -0
  690. ads/opctl/operator/lowcode/common/utils.py +384 -0
  691. ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
  692. ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
  693. ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
  694. ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
  695. ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
  696. ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
  697. ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
  698. ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
  699. ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
  700. ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
  701. ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
  702. ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
  703. ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
  704. ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
  705. ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
  706. ads/opctl/operator/lowcode/forecast/README.md +209 -0
  707. ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
  708. ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
  709. ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
  710. ads/opctl/operator/lowcode/forecast/const.py +92 -0
  711. ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
  712. ads/opctl/operator/lowcode/forecast/errors.py +26 -0
  713. ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
  714. ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
  715. ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
  716. ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
  717. ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
  718. ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
  719. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
  720. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
  721. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
  722. ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
  723. ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
  724. ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
  725. ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
  726. ads/opctl/operator/lowcode/forecast/utils.py +397 -0
  727. ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
  728. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
  729. ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
  730. ads/opctl/operator/lowcode/pii/MLoperator +17 -0
  731. ads/opctl/operator/lowcode/pii/README.md +208 -0
  732. ads/opctl/operator/lowcode/pii/__init__.py +5 -0
  733. ads/opctl/operator/lowcode/pii/__main__.py +78 -0
  734. ads/opctl/operator/lowcode/pii/cmd.py +39 -0
  735. ads/opctl/operator/lowcode/pii/constant.py +84 -0
  736. ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
  737. ads/opctl/operator/lowcode/pii/errors.py +27 -0
  738. ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
  739. ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
  740. ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
  741. ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
  742. ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
  743. ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
  744. ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
  745. ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
  746. ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
  747. ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
  748. ads/opctl/operator/lowcode/pii/model/report.py +487 -0
  749. ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
  750. ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
  751. ads/opctl/operator/lowcode/pii/utils.py +43 -0
  752. ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
  753. ads/opctl/operator/lowcode/recommender/README.md +206 -0
  754. ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
  755. ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
  756. ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
  757. ads/opctl/operator/lowcode/recommender/constant.py +30 -0
  758. ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
  759. ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
  760. ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
  761. ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
  762. ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
  763. ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
  764. ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
  765. ads/opctl/operator/lowcode/recommender/utils.py +13 -0
  766. ads/opctl/operator/runtime/__init__.py +5 -0
  767. ads/opctl/operator/runtime/const.py +17 -0
  768. ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
  769. ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
  770. ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
  771. ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
  772. ads/opctl/operator/runtime/runtime.py +115 -0
  773. ads/opctl/schema.yaml.yml +36 -0
  774. ads/opctl/script.py +40 -0
  775. ads/opctl/spark/__init__.py +5 -0
  776. ads/opctl/spark/cli.py +43 -0
  777. ads/opctl/spark/cmds.py +147 -0
  778. ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
  779. ads/opctl/utils.py +344 -0
  780. ads/oracledb/__init__.py +5 -0
  781. ads/oracledb/oracle_db.py +346 -0
  782. ads/pipeline/__init__.py +39 -0
  783. ads/pipeline/ads_pipeline.py +2279 -0
  784. ads/pipeline/ads_pipeline_run.py +772 -0
  785. ads/pipeline/ads_pipeline_step.py +605 -0
  786. ads/pipeline/builders/__init__.py +5 -0
  787. ads/pipeline/builders/infrastructure/__init__.py +5 -0
  788. ads/pipeline/builders/infrastructure/custom_script.py +32 -0
  789. ads/pipeline/cli.py +119 -0
  790. ads/pipeline/extension.py +291 -0
  791. ads/pipeline/schema/__init__.py +5 -0
  792. ads/pipeline/schema/cs_step_schema.json +35 -0
  793. ads/pipeline/schema/ml_step_schema.json +31 -0
  794. ads/pipeline/schema/pipeline_schema.json +71 -0
  795. ads/pipeline/visualizer/__init__.py +5 -0
  796. ads/pipeline/visualizer/base.py +570 -0
  797. ads/pipeline/visualizer/graph_renderer.py +272 -0
  798. ads/pipeline/visualizer/text_renderer.py +84 -0
  799. ads/secrets/__init__.py +11 -0
  800. ads/secrets/adb.py +386 -0
  801. ads/secrets/auth_token.py +86 -0
  802. ads/secrets/big_data_service.py +365 -0
  803. ads/secrets/mysqldb.py +149 -0
  804. ads/secrets/oracledb.py +160 -0
  805. ads/secrets/secrets.py +407 -0
  806. ads/telemetry/__init__.py +7 -0
  807. ads/telemetry/base.py +69 -0
  808. ads/telemetry/client.py +122 -0
  809. ads/telemetry/telemetry.py +257 -0
  810. ads/templates/dataflow_pyspark.jinja2 +13 -0
  811. ads/templates/dataflow_sparksql.jinja2 +22 -0
  812. ads/templates/func.jinja2 +20 -0
  813. ads/templates/schemas/openapi.json +1740 -0
  814. ads/templates/score-pkl.jinja2 +173 -0
  815. ads/templates/score.jinja2 +322 -0
  816. ads/templates/score_embedding_onnx.jinja2 +202 -0
  817. ads/templates/score_generic.jinja2 +165 -0
  818. ads/templates/score_huggingface_pipeline.jinja2 +217 -0
  819. ads/templates/score_lightgbm.jinja2 +185 -0
  820. ads/templates/score_onnx.jinja2 +407 -0
  821. ads/templates/score_onnx_new.jinja2 +473 -0
  822. ads/templates/score_oracle_automl.jinja2 +185 -0
  823. ads/templates/score_pyspark.jinja2 +154 -0
  824. ads/templates/score_pytorch.jinja2 +219 -0
  825. ads/templates/score_scikit-learn.jinja2 +184 -0
  826. ads/templates/score_tensorflow.jinja2 +184 -0
  827. ads/templates/score_xgboost.jinja2 +178 -0
  828. ads/text_dataset/__init__.py +5 -0
  829. ads/text_dataset/backends.py +211 -0
  830. ads/text_dataset/dataset.py +445 -0
  831. ads/text_dataset/extractor.py +207 -0
  832. ads/text_dataset/options.py +53 -0
  833. ads/text_dataset/udfs.py +22 -0
  834. ads/text_dataset/utils.py +49 -0
  835. ads/type_discovery/__init__.py +9 -0
  836. ads/type_discovery/abstract_detector.py +21 -0
  837. ads/type_discovery/constant_detector.py +41 -0
  838. ads/type_discovery/continuous_detector.py +54 -0
  839. ads/type_discovery/credit_card_detector.py +99 -0
  840. ads/type_discovery/datetime_detector.py +92 -0
  841. ads/type_discovery/discrete_detector.py +118 -0
  842. ads/type_discovery/document_detector.py +146 -0
  843. ads/type_discovery/ip_detector.py +68 -0
  844. ads/type_discovery/latlon_detector.py +90 -0
  845. ads/type_discovery/phone_number_detector.py +63 -0
  846. ads/type_discovery/type_discovery_driver.py +87 -0
  847. ads/type_discovery/typed_feature.py +594 -0
  848. ads/type_discovery/unknown_detector.py +41 -0
  849. ads/type_discovery/zipcode_detector.py +48 -0
  850. ads/vault/__init__.py +7 -0
  851. ads/vault/vault.py +237 -0
  852. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/METADATA +150 -149
  853. oracle_ads-2.13.10.dist-info/RECORD +858 -0
  854. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/WHEEL +1 -2
  855. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/entry_points.txt +2 -1
  856. oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
  857. oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
  858. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/licenses/LICENSE.txt +0 -0
ads/dataset/factory.py ADDED
@@ -0,0 +1,987 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2020, 2024 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ from __future__ import print_function, absolute_import
8
+
9
+ import os
10
+ import re
11
+ import warnings
12
+ import oci
13
+ import datetime
14
+ import pandas as pd
15
+ from fsspec.utils import infer_storage_options
16
+ import inspect
17
+ import fsspec
18
+
19
+ from ads.common import utils
20
+ from ads.common.utils import is_same_class
21
+ from ads.dataset import logger
22
+ from ads.dataset.classification_dataset import (
23
+ BinaryClassificationDataset,
24
+ MultiClassClassificationDataset,
25
+ BinaryTextClassificationDataset,
26
+ MultiClassTextClassificationDataset,
27
+ )
28
+ from ads.dataset.dataset import ADSDataset
29
+ from ads.dataset.forecasting_dataset import ForecastingDataset
30
+ from ads.dataset.helper import (
31
+ get_feature_type,
32
+ is_text_data,
33
+ generate_sample,
34
+ DatasetDefaults,
35
+ ElaboratedPath,
36
+ DatasetLoadException,
37
+ )
38
+ from ads.dataset.regression_dataset import RegressionDataset
39
+ from ads.type_discovery.type_discovery_driver import TypeDiscoveryDriver
40
+ from ads.type_discovery.typed_feature import (
41
+ ContinuousTypedFeature,
42
+ DateTimeTypedFeature,
43
+ CategoricalTypedFeature,
44
+ OrdinalTypedFeature,
45
+ GISTypedFeature,
46
+ DocumentTypedFeature,
47
+ )
48
+ from ads.type_discovery.typed_feature import TypedFeature
49
+ from typing import Callable, Tuple
50
+ from ocifs import OCIFileSystem
51
+ from ads.common.decorator.runtime_dependency import (
52
+ runtime_dependency,
53
+ OptionalDependency,
54
+ )
55
+ from ads.common.decorator.deprecate import deprecated
56
+
57
+ default_snapshots_dir = None
58
+ default_storage_options = None
59
+ mindate = datetime.date(datetime.MINYEAR, 1, 1)
60
+
61
+
62
+ warnings.warn(
63
+ (
64
+ "The `ads.dataset.factory` is deprecated in `oracle-ads 2.8.8` and will be removed in `oracle-ads 3.0`."
65
+ "Use Pandas to read from local files or object storage directly. "
66
+ "Check https://accelerated-data-science.readthedocs.io/en/latest/user_guide/loading_data/connect.html."
67
+ ),
68
+ DeprecationWarning,
69
+ stacklevel=2,
70
+ )
71
+
72
+
73
+ class DatasetFactory:
74
+ @staticmethod
75
+ @deprecated(
76
+ "2.6.6",
77
+ details="Deprecated in favor of using Pandas. Pandas supports reading from object storage directly. Check https://accelerated-data-science.readthedocs.io/en/latest/user_guide/loading_data/connect.html",
78
+ )
79
+ def open(
80
+ source,
81
+ target=None,
82
+ format="infer",
83
+ reader_fn: Callable = None,
84
+ name: str = None,
85
+ description="",
86
+ npartitions: int = None,
87
+ type_discovery=True,
88
+ html_table_index=None,
89
+ column_names="infer",
90
+ sample_max_rows=10000,
91
+ positive_class=None,
92
+ transformer_pipeline=None,
93
+ types={},
94
+ **kwargs,
95
+ ):
96
+ """
97
+ Returns an object of ADSDataset or ADSDatasetWithTarget read from the given path
98
+
99
+ .. deprecated:: 2.6.6
100
+ "Deprecated in favor of using Pandas. Pandas supports reading from object storage directly.
101
+ Check https://accelerated-data-science.readthedocs.io/en/latest/user_guide/loading_data/connect.html",
102
+
103
+ Parameters
104
+ ----------
105
+ source: Union[str, pandas.DataFrame, h2o.DataFrame, pyspark.sql.dataframe.DataFrame]
106
+ If str, URI for the dataset. The dataset could be read from local or network file system, hdfs, s3, gcs and optionally pyspark in pyspark
107
+ conda env
108
+ target: str, optional
109
+ Name of the target in dataset.
110
+ If set an ADSDatasetWithTarget object is returned, otherwise an ADSDataset object is returned which can be
111
+ used to understand the dataset through visualizations
112
+ format: str, default: infer
113
+ Format of the dataset.
114
+ Supported formats: CSV, TSV, Parquet, libsvm, JSON, XLS/XLSX (Excel), HDF5, SQL, XML,
115
+ Apache server log files (clf, log), ARFF.
116
+ By default, the format would be inferred from the ending of the dataset file path.
117
+ reader_fn: Callable, default: None
118
+ The user may pass in their own custom reader function.
119
+ It must accept `(path, **kwarg)` and return a pandas DataFrame
120
+ name: str, optional default: ""
121
+ description: str, optional default: ""
122
+ Text describing the dataset
123
+ npartitions: int, deprecated
124
+ Number of partitions to split the data
125
+ By default this is set to the max number of cores supported by the backend compute accelerator
126
+ type_discovery: bool, default: True
127
+ If false, the data types of the dataframe are used as such.
128
+ By default, the dataframe columns are associated with the best suited data types. Associating the features
129
+ with the disovered datatypes would impact visualizations and model prediction.
130
+ html_table_index: int, optional
131
+ The index of the dataframe table in html content. This is used when the format of dataset is html
132
+ column_names: 'infer', list of str or None, default: 'infer'
133
+ Supported only for CSV and TSV.
134
+ List of column names to use.
135
+ By default, column names are inferred from the first line of the file.
136
+ If set to None, column names would be auto-generated instead of inferring from file.
137
+ If the file already contains a column header, specify header=0 to ignore the existing column names.
138
+ sample_max_rows: int, default: 10000, use -1 auto calculate sample size, use 0 (zero) for no sampling
139
+ Sample size of the dataframe to use for visualization and optimization.
140
+ positive_class: Any, optional
141
+ Label in target for binary classification problems which should be identified as positive for modeling.
142
+ By default, the first unique value is considered as the positive label.
143
+ types: dict, optional
144
+ Dictionary of <feature_name> : <data_type> to override the data type of features.
145
+ transformer_pipeline: datasets.pipeline.TransformerPipeline, optional
146
+ A pipeline of transformations done outside the sdk and need to be applied at the time of scoring
147
+ storage_options: dict, default: varies by source type
148
+ Parameters passed on to the backend filesystem class.
149
+ sep: str
150
+ Delimiting character for parsing the input file.
151
+ kwargs: additional keyword arguments that would be passed to underlying dataframe read API
152
+ based on the format of the dataset
153
+
154
+ Returns
155
+ -------
156
+ dataset : An instance of ADSDataset
157
+ (or)
158
+ dataset_with_target : An instance of ADSDatasetWithTarget
159
+
160
+ Examples
161
+ --------
162
+ >>> ds = DatasetFactory.open("/path/to/data.data", format='csv', delimiter=" ",
163
+ ... na_values="n/a", skipinitialspace=True)
164
+
165
+ >>> ds = DatasetFactory.open("/path/to/data.csv", target="col_1", prefix="col_",
166
+ ... skiprows=1, encoding="ISO-8859-1")
167
+
168
+ >>> ds = DatasetFactory.open("oci://bucket@namespace/path/to/data.tsv",
169
+ ... column_names=["col1", "col2", "col3"], header=0)
170
+
171
+ >>> ds = DatasetFactory.open("oci://bucket@namespace/path/to/data.csv",
172
+ ... storage_options={"config": "~/.oci/config",
173
+ ... "profile": "USER_2"}, delimiter = ';')
174
+
175
+ >>> ds = DatasetFactory.open("/path/to/data.parquet", engine='pyarrow',
176
+ ... types={"col1": "ordinal",
177
+ ... "col2": "categorical",
178
+ ... "col3" : "continuous",
179
+ ... "col4" : "float64"})
180
+
181
+ >>> ds = DatasetFactory.open(df, target="class", sample_max_rows=5000,
182
+ ... positive_class="yes")
183
+
184
+ >>> ds = DatasetFactory.open("s3://path/to/data.json.gz", format="json",
185
+ ... compression="gzip", orient="records")
186
+ """
187
+ if npartitions:
188
+ warnings.warn(
189
+ "Variable `npartitions` is deprecated and will not be used",
190
+ DeprecationWarning,
191
+ stacklevel=2,
192
+ )
193
+ if (
194
+ "storage_options" not in kwargs
195
+ and type(source) is str
196
+ and len(source) > 6
197
+ and source[:6] == "oci://"
198
+ ):
199
+ kwargs["storage_options"] = {"config": {}}
200
+
201
+ if isinstance(source, str) or isinstance(source, list):
202
+ progress = utils.get_progress_bar(4)
203
+ progress.update("Opening data")
204
+ path = ElaboratedPath(source, format=format, **kwargs)
205
+ reader_fn = (
206
+ get_format_reader(path=path, **kwargs)
207
+ if reader_fn is None
208
+ else reader_fn
209
+ )
210
+ df = load_dataset(path=path, reader_fn=reader_fn, **kwargs)
211
+ name = path.name
212
+ elif isinstance(source, pd.DataFrame):
213
+ progress = utils.get_progress_bar(4)
214
+ progress.update("Partitioning data")
215
+ df = source
216
+ name = "User Provided DataFrame" if name is None else name
217
+ else:
218
+ raise TypeError(
219
+ f"The Source type: {type(source)} is not supported for DatasetFactory."
220
+ )
221
+ shape = df.shape
222
+ return DatasetFactory._build_dataset(
223
+ df=df,
224
+ shape=shape,
225
+ target=target,
226
+ sample_max_rows=sample_max_rows,
227
+ type_discovery=type_discovery,
228
+ types=types,
229
+ positive_class=positive_class,
230
+ name=name,
231
+ transformer_pipeline=transformer_pipeline,
232
+ description=description,
233
+ progress=progress,
234
+ **utils.inject_and_copy_kwargs(
235
+ kwargs,
236
+ **{"html_table_index": html_table_index, "column_names": column_names},
237
+ ),
238
+ )
239
+
240
+ @staticmethod
241
+ def open_to_pandas(
242
+ source: str, format: str = None, reader_fn: Callable = None, **kwargs
243
+ ) -> pd.DataFrame:
244
+ path = ElaboratedPath(source, format=format, **kwargs)
245
+ reader_fn = (
246
+ get_format_reader(path=path, **kwargs) if reader_fn is None else reader_fn
247
+ )
248
+ df = load_dataset(path=path, reader_fn=reader_fn, **kwargs)
249
+ return df
250
+
251
+ @staticmethod
252
+ def from_dataframe(df, target: str = None, **kwargs):
253
+ """
254
+ Returns an object of ADSDatasetWithTarget or ADSDataset given a pandas.DataFrame
255
+
256
+ Parameters
257
+ ----------
258
+ df: pandas.DataFrame
259
+ target: str
260
+ kwargs: dict
261
+ See DatasetFactory.open() for supported kwargs
262
+
263
+ Returns
264
+ -------
265
+ dataset: an object of ADSDataset target is not specified, otherwise an object of ADSDatasetWithTarget tagged
266
+ according to the type of target
267
+
268
+ Examples
269
+ --------
270
+ >>> df = pd.DataFrame(data)
271
+ >>> ds = from_dataframe(df)
272
+ """
273
+ return DatasetFactory.open(df, target=target, **kwargs)
274
+
275
+ @staticmethod
276
+ @runtime_dependency(module="IPython", install_from=OptionalDependency.NOTEBOOK)
277
+ @runtime_dependency(
278
+ module="ipywidgets",
279
+ object="HTML",
280
+ is_for_notebook_only=True,
281
+ install_from=OptionalDependency.NOTEBOOK,
282
+ )
283
+ def list_snapshots(snapshot_dir=None, name="", storage_options=None, **kwargs):
284
+ """
285
+ Displays the URIs for dataset snapshots under the given directory path.
286
+
287
+ Parameters
288
+ ----------
289
+ snapshot_dir: str
290
+ Return all dataset snapshots created using ADSDataset.snapshot() within this directory.
291
+ The path can contain protocols such as oci, s3.
292
+ name: str, optional
293
+ The list of snapshots in the directory gets filtered by the name. Accepts glob expressions.
294
+ default = `"ads_"`
295
+ storage_options: dict
296
+ Parameters passed on to the backend filesystem class.
297
+
298
+ Example
299
+ --------
300
+ >>> DatasetFactory.list_snapshots(snapshot_dir="oci://my_bucket/snapshots_dir",
301
+ ... name="ads_iris_")
302
+
303
+ Returns a list of all snapshots (recursively) saved to obj storage bucket `"my_bucket"` with prefix
304
+ `"/snapshots_dir/ads_iris_**"` sorted by time created.
305
+ """
306
+ if snapshot_dir is None:
307
+ snapshot_dir = default_snapshots_dir
308
+ if snapshot_dir is None:
309
+ raise ValueError(
310
+ "Specify snapshot_dir or use DatasetFactory.set_default_storage() to set default \
311
+ storage options"
312
+ )
313
+ else:
314
+ logger.info("Using default snapshots dir %s" % snapshot_dir)
315
+ if storage_options is None:
316
+ if default_storage_options is not None:
317
+ storage_options = default_storage_options
318
+ logger.info("Using default storage options")
319
+ else:
320
+ storage_options = dict()
321
+ assert isinstance(storage_options, dict), (
322
+ "The storage options parameter must be a dictionary. You can set "
323
+ "this gloabally by calling DatasetFactory.set_default_storage("
324
+ "storage_options={'config': 'location'}). "
325
+ )
326
+ url_options = infer_storage_options(snapshot_dir)
327
+ protocol = url_options.pop("protocol", None)
328
+
329
+ fs = OCIFileSystem(config=storage_options.get("config", None))
330
+ kwargs.update({"refresh": True})
331
+ obj_list = [
332
+ (k, v.get("timeCreated", mindate).strftime("%Y-%m-%d %H:%M:%S"))
333
+ for k, v in fs.glob(
334
+ os.path.join(snapshot_dir, name + "**"), detail=True, **kwargs
335
+ ).items()
336
+ if v["type"] == "file"
337
+ ]
338
+
339
+ files = []
340
+ for file, file_time in obj_list:
341
+ if protocol in ["oci"]:
342
+ r1 = re.compile(r"/part\.[0-9]{1,6}\.parquet$")
343
+ parquet_part = r1.search(file)
344
+ if parquet_part is not None:
345
+ parquet_filename = file[: parquet_part.start()]
346
+ elif file.endswith("/_common_metadata"):
347
+ parquet_filename = file[: -len("/_common_metadata")]
348
+ elif file.endswith("/_metadata"):
349
+ parquet_filename = file[: -len("/_metadata")]
350
+ else:
351
+ parquet_filename = file
352
+ else:
353
+ parquet_filename = file
354
+ parent_path = "%s://" % protocol
355
+ files.append((parent_path + parquet_filename, file_time))
356
+ files.sort(key=lambda x: x[1] or mindate, reverse=True)
357
+ list_df = pd.DataFrame(files, columns=["Name", "Created Time"])
358
+ list_df = list_df.drop_duplicates(subset=["Name"]).reset_index()
359
+ if len(list_df) == 0:
360
+ print(f"No snapshots found at: {os.path.join(snapshot_dir, name)}")
361
+
362
+ # display in HTML format if sdk is run in notebook mode
363
+ if utils.is_notebook():
364
+ from IPython.core.display import display
365
+
366
+ display(
367
+ HTML(
368
+ list_df.style.set_table_attributes("class=table")
369
+ .hide()
370
+ .to_html()
371
+ )
372
+ )
373
+ return list_df
374
+
375
+ @staticmethod
376
+ def download(remote_path, local_path, storage=None, overwrite=False):
377
+ """
378
+ Download a remote file or directory to local storage.
379
+
380
+ Parameters
381
+ ---------
382
+ remote_path: str
383
+ Supports protocols like oci, s3, also supports glob expressions
384
+ local_path: str
385
+ Supports glob expressions
386
+ storage: dict
387
+ Parameters passed on to the backend remote filesystem class.
388
+ overwrite: bool, default False
389
+ If True, the method will overwrite any existing files in the local_path
390
+
391
+ Examples
392
+ ---------
393
+ >>> DatasetFactory.download("oci://Bucket/prefix/to/data/*.csv",
394
+ ... "/home/datascience/data/")
395
+ """
396
+ if storage is None:
397
+ if default_storage_options is not None:
398
+ storage = default_storage_options
399
+ logger.info("Using default storage options")
400
+ else:
401
+ storage = dict()
402
+
403
+ remote_files = fsspec.open_files(
404
+ remote_path, mode="rb", name_function=lambda i: "", **storage
405
+ )
406
+ if len(remote_files) < 1:
407
+ raise FileNotFoundError(remote_path)
408
+ display_error, error_msg = DatasetFactory._download_files(
409
+ remote_files=remote_files, local_path=local_path, overwrite=overwrite
410
+ )
411
+ if display_error:
412
+ logger.error(error_msg)
413
+ else:
414
+ logger.info(f"Download {remote_path} to {local_path}.")
415
+
416
+ @staticmethod
417
+ def _download_files(remote_files, local_path, overwrite=False):
418
+ display_error, error_msg = False, ""
419
+ for remote_file in remote_files:
420
+ bucket_idx = remote_file.path.find("/")
421
+ suffix = remote_file.path[bucket_idx + 1 :]
422
+ try:
423
+ with remote_file as f1:
424
+ local_filepath = (
425
+ os.path.join(local_path, suffix) if suffix else local_path
426
+ )
427
+ if os.path.exists(local_filepath) and not overwrite:
428
+ raise FileExistsError(
429
+ f"Trying to overwrite files in {local_filepath}. If you'd like to "
430
+ f"overwrite these files, set force_overwrite to True."
431
+ )
432
+ os.makedirs(os.path.dirname(local_filepath), exist_ok=True)
433
+ with open(local_filepath, "wb") as f2:
434
+ f2.write(f1.read())
435
+ except oci.exceptions.ServiceError as e:
436
+ raise FileNotFoundError(f"Unable to open file: {remote_file.path}")
437
+ return display_error, error_msg
438
+
439
+ @staticmethod
440
+ def upload(local_file_or_dir, remote_file_or_dir, storage_options=None):
441
+ """
442
+ Upload local file or directory to remote storage
443
+
444
+ Parameters
445
+ ---------
446
+ local_file_or_dir: str
447
+ Supports glob expressions
448
+ remote_file_or_dir: str
449
+ Supports protocols like oci, s3, also supports glob expressions
450
+ storage_options: dict
451
+ Parameters passed on to the backend remote filesystem class.
452
+ """
453
+ if not os.path.exists(local_file_or_dir):
454
+ raise ValueError("File/Directory does not exist: %s" % local_file_or_dir)
455
+ if storage_options is None and default_storage_options is not None:
456
+ storage_options = default_storage_options
457
+ logger.info("Using default storage options")
458
+
459
+ if os.path.isdir(local_file_or_dir):
460
+ for subdir, dirs, files in os.walk(local_file_or_dir):
461
+ for file in files:
462
+ if os.path.abspath(subdir) == os.path.abspath(local_file_or_dir):
463
+ path = file
464
+ else:
465
+ path = os.path.join(
466
+ os.path.abspath(subdir).split("/", 2)[2], file
467
+ )
468
+ DatasetFactory._upload_file(
469
+ os.path.join(subdir, file),
470
+ os.path.join(remote_file_or_dir, path),
471
+ storage_options=storage_options,
472
+ )
473
+ else:
474
+ DatasetFactory._upload_file(
475
+ local_file_or_dir, remote_file_or_dir, storage_options=storage_options
476
+ )
477
+
478
+ @staticmethod
479
+ def set_default_storage(snapshots_dir=None, storage_options=None):
480
+ """
481
+ Set default storage directory and options.
482
+
483
+ Both snapshots_dir and storage_options can be overridden at the API scope.
484
+
485
+ Parameters
486
+ ----------
487
+ snapshots_dir: str
488
+ Path for the snapshots directory. Can contain protocols such as oci, s3
489
+ storage_options: dict, optional
490
+ Parameters passed on to the backend filesystem class.
491
+ """
492
+ global default_snapshots_dir
493
+ default_snapshots_dir = snapshots_dir
494
+ global default_storage_options
495
+ if storage_options is not None:
496
+ assert isinstance(storage_options, dict), (
497
+ f"The storage options parameter must be a dictionary. Instead "
498
+ f"we got the type: {type(storage_options)} "
499
+ )
500
+ default_storage_options = storage_options
501
+
502
+ @classmethod
503
+ def _upload_file(cls, local_file, remote_file, storage_options=None):
504
+ kwargs = {}
505
+ if storage_options is not None:
506
+ kwargs = {"storage_options": storage_options}
507
+ remote_file_handler = fsspec.open_files(
508
+ remote_file + "*", mode="wb", name_function=lambda i: "", **kwargs
509
+ )[0]
510
+ with remote_file_handler as f1:
511
+ with open(local_file, "rb") as f2:
512
+ for line in f2:
513
+ f1.write(line)
514
+ print("Uploaded %s to %s" % (local_file, remote_file))
515
+
516
+ @classmethod
517
+ def _build_dataset(
518
+ cls,
519
+ df: pd.DataFrame,
520
+ shape: Tuple[int, int],
521
+ target: str = None,
522
+ progress=None,
523
+ **kwargs,
524
+ ):
525
+ n = shape[0]
526
+ if progress:
527
+ progress.update("Generating data sample")
528
+
529
+ sampled_df = generate_sample(
530
+ df,
531
+ n,
532
+ DatasetDefaults.sampling_confidence_level,
533
+ DatasetDefaults.sampling_confidence_interval,
534
+ **kwargs,
535
+ )
536
+
537
+ if target is None:
538
+ if progress:
539
+ progress.update("Building the dataset with no target.")
540
+ result = ADSDataset(df=df, sampled_df=sampled_df, shape=shape, **kwargs)
541
+ if progress:
542
+ progress.update("Done")
543
+ logger.info(
544
+ "Use `set_target()` to type the dataset for a particular learning task."
545
+ )
546
+ return result
547
+
548
+ if progress:
549
+ progress.update("Building dataset")
550
+
551
+ discover_target_type = kwargs["type_discovery"]
552
+ if target in kwargs["types"]:
553
+ sampled_df[target] = sampled_df[target].astype(kwargs["types"][target])
554
+ discover_target_type = False
555
+
556
+ # if type discovery is turned off, infer type from pandas dtype
557
+ target_type = DatasetFactory.infer_target_type(
558
+ target, sampled_df[target], discover_target_type
559
+ )
560
+
561
+ result = DatasetFactory._get_dataset(
562
+ df=df,
563
+ sampled_df=sampled_df,
564
+ target=target,
565
+ target_type=target_type,
566
+ shape=shape,
567
+ **kwargs,
568
+ )
569
+ if progress:
570
+ progress.update("Done")
571
+ logger.info(
572
+ "Use `suggest_recommendations()` to view and apply recommendations for dataset optimization."
573
+ )
574
+ return result
575
+
576
+ @classmethod
577
+ def infer_target_type(cls, target, target_series, discover_target_type=True):
578
+ # if type discovery is turned off, infer type from pandas dtype
579
+ if discover_target_type:
580
+ target_type = TypeDiscoveryDriver().discover(
581
+ target, target_series, is_target=True
582
+ )
583
+ else:
584
+ target_type = get_feature_type(target, target_series)
585
+ return target_type
586
+
587
+ @classmethod
588
+ def _get_dataset(
589
+ cls,
590
+ df: pd.DataFrame,
591
+ sampled_df: pd.DataFrame,
592
+ target: str,
593
+ target_type: TypedFeature,
594
+ shape: Tuple[int, int],
595
+ positive_class=None,
596
+ **init_kwargs,
597
+ ):
598
+ if len(df[target].dropna()) == 0:
599
+ logger.warning(
600
+ "It is not recommended to use an empty column as the target variable."
601
+ )
602
+ raise ValueError(
603
+ f"We do not support using empty columns as the chosen target"
604
+ )
605
+ if is_same_class(target_type, ContinuousTypedFeature):
606
+ return RegressionDataset(
607
+ df=df,
608
+ sampled_df=sampled_df,
609
+ target=target,
610
+ target_type=target_type,
611
+ shape=shape,
612
+ **init_kwargs,
613
+ )
614
+ elif is_same_class(
615
+ target_type, DateTimeTypedFeature
616
+ ) or df.index.dtype.name.startswith("datetime"):
617
+ return ForecastingDataset(
618
+ df=df,
619
+ sampled_df=sampled_df,
620
+ target=target,
621
+ target_type=target_type,
622
+ shape=shape,
623
+ **init_kwargs,
624
+ )
625
+
626
+ # Adding ordinal typed feature, but ultimately we should rethink how we want to model this type
627
+ elif is_same_class(target_type, CategoricalTypedFeature) or is_same_class(
628
+ target_type, OrdinalTypedFeature
629
+ ):
630
+ if target_type.meta_data["internal"]["unique"] == 2:
631
+ if is_text_data(sampled_df, target):
632
+ return BinaryTextClassificationDataset(
633
+ df=df,
634
+ sampled_df=sampled_df,
635
+ target=target,
636
+ shape=shape,
637
+ target_type=target_type,
638
+ positive_class=positive_class,
639
+ **init_kwargs,
640
+ )
641
+
642
+ return BinaryClassificationDataset(
643
+ df=df,
644
+ sampled_df=sampled_df,
645
+ target=target,
646
+ shape=shape,
647
+ target_type=target_type,
648
+ positive_class=positive_class,
649
+ **init_kwargs,
650
+ )
651
+ else:
652
+ if is_text_data(sampled_df, target):
653
+ return MultiClassTextClassificationDataset(
654
+ df=df,
655
+ sampled_df=sampled_df,
656
+ target=target,
657
+ target_type=target_type,
658
+ shape=shape,
659
+ **init_kwargs,
660
+ )
661
+ return MultiClassClassificationDataset(
662
+ df=df,
663
+ sampled_df=sampled_df,
664
+ target=target,
665
+ target_type=target_type,
666
+ shape=shape,
667
+ **init_kwargs,
668
+ )
669
+ elif (
670
+ is_same_class(target, DocumentTypedFeature)
671
+ or "text" in target_type["type"]
672
+ or "text" in target
673
+ ):
674
+ raise ValueError(
675
+ f"The column {target} cannot be used as the target column."
676
+ )
677
+ elif (
678
+ is_same_class(target_type, GISTypedFeature)
679
+ or "coord" in target_type["type"]
680
+ or "coord" in target
681
+ ):
682
+ raise ValueError(
683
+ f"The column {target} cannot be used as the target column."
684
+ )
685
+ # This is to catch constant columns that are boolean. Added as a fix for pd.isnull(), and datasets with a
686
+ # binary target, but only data on one instance
687
+ elif target_type["low_level_type"] == "bool":
688
+ return BinaryClassificationDataset(
689
+ df=df,
690
+ sampled_df=sampled_df,
691
+ target=target,
692
+ shape=shape,
693
+ target_type=target_type,
694
+ positive_class=positive_class,
695
+ **init_kwargs,
696
+ )
697
+ raise ValueError(
698
+ f"Unable to identify problem type. Specify the data type of {target} using 'types'. "
699
+ f"For example, types = {{{target}: 'category'}}"
700
+ )
701
+
702
+
703
+ class CustomFormatReaders:
704
+ @staticmethod
705
+ def read_tsv(path: str, **kwargs) -> pd.DataFrame:
706
+ return pd.read_csv(
707
+ path, **utils.inject_and_copy_kwargs(kwargs, **{"sep": "\t"})
708
+ )
709
+
710
+ @staticmethod
711
+ def read_json(path: str, **kwargs) -> pd.DataFrame:
712
+ try:
713
+ return pd.read_json(path, **kwargs)
714
+ except ValueError as e:
715
+ return pd.read_json(
716
+ path, **utils.inject_and_copy_kwargs(kwargs, **{"lines": True})
717
+ )
718
+
719
+ @staticmethod
720
+ def read_libsvm(path: str, **kwargs) -> pd.DataFrame:
721
+ from sklearn.datasets import load_svmlight_file
722
+ from joblib import Memory
723
+
724
+ mem = Memory("./mycache")
725
+
726
+ @mem.cache
727
+ def get_data(path):
728
+ X, y = load_svmlight_file(path)
729
+ df = pd.DataFrame(X.todense())
730
+ df["target"] = y
731
+ return df
732
+
733
+ return get_data(path)
734
+
735
+ @staticmethod
736
+ @runtime_dependency(
737
+ module="pandavro", object="read_avro", install_from=OptionalDependency.DATA
738
+ )
739
+ def read_avro(path: str, **kwargs) -> pd.DataFrame:
740
+ return read_avro(path, **kwargs)
741
+
742
+ DEFAULT_SQL_CHUNKSIZE = 12007
743
+ DEFAULT_SQL_ARRAYSIZE = 50000
744
+ DEFAULT_SQL_MIL = 128
745
+ DEFAULT_SQL_CTU = False
746
+
747
+ @classmethod
748
+ def read_sql(cls, path: str, table: str = None, **kwargs) -> pd.DataFrame:
749
+ """
750
+
751
+ :param path: str
752
+ This is the connection URL that gets passed to sqlalchemy's create_engine method
753
+ :param table: str
754
+ This is either the name of a table to select * from or a sql query to be run
755
+ :param kwargs:
756
+ :return: pd.DataFrame
757
+ """
758
+ if table is None:
759
+ raise ValueError(
760
+ "In order to read from a database you need to specify the table using the `table` "
761
+ "argument."
762
+ )
763
+ # check if it's oracle dialect
764
+ if str(path).lower().startswith("oracle"):
765
+ kwargs = utils.inject_and_copy_kwargs(
766
+ kwargs,
767
+ **{
768
+ "arraysize": cls.DEFAULT_SQL_ARRAYSIZE,
769
+ "max_identifier_length": cls.DEFAULT_SQL_MIL,
770
+ "coerce_to_unicode": cls.DEFAULT_SQL_CTU,
771
+ },
772
+ )
773
+ engine = utils.get_sqlalchemy_engine(path, **kwargs)
774
+
775
+ table_name = table.strip()
776
+ with engine.connect() as connection:
777
+ # if it's a query expression:
778
+ if table_name.lower().startswith("select"):
779
+ sql_query = table_name
780
+ else:
781
+ sql_query = f"select * from {table_name}"
782
+
783
+ chunks = pd.read_sql_query(
784
+ sql_query,
785
+ con=connection,
786
+ **_validate_kwargs(
787
+ pd.read_sql_query,
788
+ utils.inject_and_copy_kwargs(
789
+ kwargs, **{"chunksize": cls.DEFAULT_SQL_CHUNKSIZE}
790
+ ),
791
+ ),
792
+ )
793
+ df = pd.DataFrame()
794
+ from tqdm import tqdm
795
+
796
+ with tqdm(chunks, unit=" rows") as t:
797
+ for chunk in chunks:
798
+ df = pd.concat([df, chunk])
799
+ t.update(len(chunk))
800
+
801
+ df = df.reset_index(drop=True)
802
+ if df.shape[0] == 0:
803
+ logger.warning(
804
+ "The SQL expression returned zero rows. Therefore, no `ADSdataset` object was created."
805
+ )
806
+ raise Exception("The SQL expression returned no rows")
807
+ return df
808
+
809
+ @staticmethod
810
+ def read_log(path, **kwargs):
811
+ from ads.dataset.helper import parse_apache_log_str, parse_apache_log_datetime
812
+
813
+ df = pd.read_csv(
814
+ path,
815
+ # assume_missing=True,
816
+ sep=r'\s(?=(?:[^"]*"[^"]*")*[^"]*$)(?![^\[]*\])',
817
+ engine="python",
818
+ na_values="-",
819
+ header=None,
820
+ names=[
821
+ "host",
822
+ "identity",
823
+ "user",
824
+ "time",
825
+ "request",
826
+ "http_code",
827
+ "response_bytes",
828
+ "referer",
829
+ "user_agent",
830
+ "unknown",
831
+ ],
832
+ converters={
833
+ "time": parse_apache_log_datetime,
834
+ "request": parse_apache_log_str,
835
+ "status": int,
836
+ "size": int,
837
+ "referer": parse_apache_log_str,
838
+ "user_agent": parse_apache_log_str,
839
+ },
840
+ **kwargs,
841
+ )
842
+ return df
843
+
844
+ @staticmethod
845
+ def read_html(path, html_table_index: int = None, **kwargs):
846
+ if html_table_index is None:
847
+ return pd.concat(df for df in pd.read_html(path, **kwargs))
848
+ else:
849
+ return pd.read_html(path, **kwargs)[html_table_index]
850
+
851
+ @staticmethod
852
+ @runtime_dependency(module="scipy", install_from=OptionalDependency.VIZ)
853
+ def read_arff(path, **kwargs):
854
+ from scipy.io import arff
855
+ import requests
856
+ from io import BytesIO, TextIOWrapper
857
+
858
+ data = None
859
+ if os.path.isfile(path):
860
+ data, _ = arff.loadarff(path)
861
+ else:
862
+ with requests.get(path) as r:
863
+ if r.status_code == requests.codes.ok:
864
+ f = TextIOWrapper(BytesIO(r.content))
865
+ data, _ = arff.loadarff(f)
866
+ return pd.DataFrame(data)
867
+
868
+ @staticmethod
869
+ def read_xml(path: str, **kwargs) -> pd.DataFrame:
870
+ """
871
+ Load data from xml file.
872
+
873
+ Parameters
874
+ ----------
875
+ path: str
876
+ Path to XML file
877
+ storage_options: dict, optional
878
+ Storage options passed to Pandas to read the file.
879
+
880
+ Returns
881
+ -------
882
+ dataframe : pandas.DataFrame
883
+ """
884
+ import xml.etree.cElementTree as et
885
+
886
+ def get_children(df, node, parent, i):
887
+ for name in node.attrib.keys():
888
+ df.at[i, parent + name] = node.attrib[name]
889
+ for child in list(node):
890
+ if len(list(child)) > 0:
891
+ get_children(df, child, parent + child.tag + "/", i)
892
+ else:
893
+ df.at[i, parent + child.tag] = child.text
894
+
895
+ storage_options = kwargs.get("storage_options", {})
896
+
897
+ file_handles = fsspec.open_files(path, mode="rb", **storage_options)
898
+ ret_df = pd.DataFrame()
899
+ last_i = 0
900
+ for file_handle in file_handles:
901
+ with file_handle:
902
+ parsed_xml = et.parse(path)
903
+ for i, node in enumerate(parsed_xml.getroot()):
904
+ get_children(ret_df, node, node.tag + "/", last_i + i)
905
+ last_i = i
906
+ return ret_df
907
+
908
+
909
+ reader_fns = {
910
+ "csv": pd.read_csv,
911
+ "tsv": CustomFormatReaders.read_tsv,
912
+ "json": CustomFormatReaders.read_json,
913
+ "jsonl": CustomFormatReaders.read_json,
914
+ "excel": pd.read_excel,
915
+ "xls": pd.read_excel,
916
+ "xlsx": pd.read_excel,
917
+ "parquet": pd.read_parquet,
918
+ "libsvm": CustomFormatReaders.read_libsvm,
919
+ "hdf": pd.read_hdf, # Todo: re.match(format, "hdf\d*") or format == "h5"
920
+ "hdf3": pd.read_hdf,
921
+ "hdf4": pd.read_hdf,
922
+ "h5": pd.read_hdf,
923
+ "avro": CustomFormatReaders.read_avro,
924
+ "avsc": CustomFormatReaders.read_avro,
925
+ "sql": CustomFormatReaders.read_sql,
926
+ "db": CustomFormatReaders.read_sql,
927
+ "log": CustomFormatReaders.read_log,
928
+ "clf": CustomFormatReaders.read_log,
929
+ "html": CustomFormatReaders.read_html,
930
+ "arff": CustomFormatReaders.read_arff,
931
+ "xml": CustomFormatReaders.read_xml,
932
+ }
933
+
934
+
935
+ def _validate_kwargs(func: Callable, kwargs):
936
+ valid_params = inspect.signature(func).parameters
937
+ if "kwargs" in valid_params:
938
+ return kwargs
939
+ else:
940
+ return {k: v for k, v in kwargs.items() if k in valid_params}
941
+
942
+
943
+ def get_format_reader(path: ElaboratedPath, **kwargs) -> Callable:
944
+ format_key = path.format
945
+ try:
946
+ reader_fn = reader_fns[format_key]
947
+ except (KeyError, NameError):
948
+ raise ValueError(
949
+ f"We were unable to load the specified dataset. We have interpreted the format "
950
+ f"as {format_key}, if this is not correct, call again and set the `format` parameter = "
951
+ f"to the desired format. Read more here: https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
952
+ f"-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
953
+ )
954
+
955
+ return reader_fn
956
+
957
+
958
+ def load_dataset(path: ElaboratedPath, reader_fn: Callable, **kwargs) -> pd.DataFrame:
959
+ dfs = []
960
+ for filename in path.paths:
961
+ data = reader_fn(filename, **_validate_kwargs(reader_fn, kwargs))
962
+ if not isinstance(data, pd.DataFrame):
963
+ fn_name = f"{reader_fn.__module__}.{reader_fn.__qualname__}"
964
+ raise ValueError(
965
+ f"{fn_name} is used to load the data. "
966
+ f"However, {fn_name} returned {type(data)} instead of pandas DataFrame. "
967
+ f"Refer to the usage of {fn_name} to set the correct arguments."
968
+ )
969
+ dfs.append(data)
970
+ if len(dfs) == 0:
971
+ raise ValueError(
972
+ f"We were unable to load the specified dataset. Read more here: "
973
+ f"https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
974
+ f"-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
975
+ )
976
+
977
+ df = pd.concat(dfs)
978
+
979
+ if df is None:
980
+ raise ValueError(
981
+ f"We were unable to load the specified dataset. Read more here: "
982
+ f"https://docs.cloud.oracle.com/en-us/iaas/tools/ads"
983
+ f"-sdk/latest/user_guide/loading_data/loading_data.html#specify-data-types-in-load-dataset"
984
+ )
985
+ if df.empty:
986
+ raise DatasetLoadException("Empty DataFrame, not producing a ADSDataset")
987
+ return df