oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (858) hide show
  1. ads/aqua/__init__.py +40 -0
  2. ads/aqua/app.py +507 -0
  3. ads/aqua/cli.py +96 -0
  4. ads/aqua/client/__init__.py +3 -0
  5. ads/aqua/client/client.py +836 -0
  6. ads/aqua/client/openai_client.py +305 -0
  7. ads/aqua/common/__init__.py +5 -0
  8. ads/aqua/common/decorator.py +125 -0
  9. ads/aqua/common/entities.py +274 -0
  10. ads/aqua/common/enums.py +134 -0
  11. ads/aqua/common/errors.py +109 -0
  12. ads/aqua/common/utils.py +1295 -0
  13. ads/aqua/config/__init__.py +4 -0
  14. ads/aqua/config/container_config.py +246 -0
  15. ads/aqua/config/evaluation/__init__.py +4 -0
  16. ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
  17. ads/aqua/config/utils/__init__.py +4 -0
  18. ads/aqua/config/utils/serializer.py +339 -0
  19. ads/aqua/constants.py +116 -0
  20. ads/aqua/data.py +14 -0
  21. ads/aqua/dummy_data/icon.txt +1 -0
  22. ads/aqua/dummy_data/oci_model_deployments.json +56 -0
  23. ads/aqua/dummy_data/oci_models.json +1 -0
  24. ads/aqua/dummy_data/readme.md +26 -0
  25. ads/aqua/evaluation/__init__.py +8 -0
  26. ads/aqua/evaluation/constants.py +53 -0
  27. ads/aqua/evaluation/entities.py +186 -0
  28. ads/aqua/evaluation/errors.py +70 -0
  29. ads/aqua/evaluation/evaluation.py +1814 -0
  30. ads/aqua/extension/__init__.py +42 -0
  31. ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
  32. ads/aqua/extension/base_handler.py +90 -0
  33. ads/aqua/extension/common_handler.py +121 -0
  34. ads/aqua/extension/common_ws_msg_handler.py +36 -0
  35. ads/aqua/extension/deployment_handler.py +381 -0
  36. ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
  37. ads/aqua/extension/errors.py +30 -0
  38. ads/aqua/extension/evaluation_handler.py +129 -0
  39. ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
  40. ads/aqua/extension/finetune_handler.py +96 -0
  41. ads/aqua/extension/model_handler.py +390 -0
  42. ads/aqua/extension/models/__init__.py +0 -0
  43. ads/aqua/extension/models/ws_models.py +145 -0
  44. ads/aqua/extension/models_ws_msg_handler.py +50 -0
  45. ads/aqua/extension/ui_handler.py +300 -0
  46. ads/aqua/extension/ui_websocket_handler.py +130 -0
  47. ads/aqua/extension/utils.py +133 -0
  48. ads/aqua/finetuning/__init__.py +7 -0
  49. ads/aqua/finetuning/constants.py +23 -0
  50. ads/aqua/finetuning/entities.py +181 -0
  51. ads/aqua/finetuning/finetuning.py +749 -0
  52. ads/aqua/model/__init__.py +8 -0
  53. ads/aqua/model/constants.py +60 -0
  54. ads/aqua/model/entities.py +385 -0
  55. ads/aqua/model/enums.py +32 -0
  56. ads/aqua/model/model.py +2134 -0
  57. ads/aqua/model/utils.py +52 -0
  58. ads/aqua/modeldeployment/__init__.py +6 -0
  59. ads/aqua/modeldeployment/constants.py +10 -0
  60. ads/aqua/modeldeployment/deployment.py +1315 -0
  61. ads/aqua/modeldeployment/entities.py +653 -0
  62. ads/aqua/modeldeployment/utils.py +543 -0
  63. ads/aqua/resources/gpu_shapes_index.json +94 -0
  64. ads/aqua/server/__init__.py +4 -0
  65. ads/aqua/server/__main__.py +24 -0
  66. ads/aqua/server/app.py +47 -0
  67. ads/aqua/server/aqua_spec.yml +1291 -0
  68. ads/aqua/training/__init__.py +4 -0
  69. ads/aqua/training/exceptions.py +476 -0
  70. ads/aqua/ui.py +519 -0
  71. ads/automl/__init__.py +9 -0
  72. ads/automl/driver.py +330 -0
  73. ads/automl/provider.py +975 -0
  74. ads/bds/__init__.py +5 -0
  75. ads/bds/auth.py +127 -0
  76. ads/bds/big_data_service.py +255 -0
  77. ads/catalog/__init__.py +19 -0
  78. ads/catalog/model.py +1576 -0
  79. ads/catalog/notebook.py +461 -0
  80. ads/catalog/project.py +468 -0
  81. ads/catalog/summary.py +178 -0
  82. ads/common/__init__.py +11 -0
  83. ads/common/analyzer.py +65 -0
  84. ads/common/artifact/.model-ignore +63 -0
  85. ads/common/artifact/__init__.py +10 -0
  86. ads/common/auth.py +1122 -0
  87. ads/common/card_identifier.py +83 -0
  88. ads/common/config.py +647 -0
  89. ads/common/data.py +165 -0
  90. ads/common/decorator/__init__.py +9 -0
  91. ads/common/decorator/argument_to_case.py +88 -0
  92. ads/common/decorator/deprecate.py +69 -0
  93. ads/common/decorator/require_nonempty_arg.py +65 -0
  94. ads/common/decorator/runtime_dependency.py +178 -0
  95. ads/common/decorator/threaded.py +97 -0
  96. ads/common/decorator/utils.py +35 -0
  97. ads/common/dsc_file_system.py +303 -0
  98. ads/common/error.py +14 -0
  99. ads/common/extended_enum.py +81 -0
  100. ads/common/function/__init__.py +5 -0
  101. ads/common/function/fn_util.py +142 -0
  102. ads/common/function/func_conf.yaml +25 -0
  103. ads/common/ipython.py +76 -0
  104. ads/common/model.py +679 -0
  105. ads/common/model_artifact.py +1759 -0
  106. ads/common/model_artifact_schema.json +107 -0
  107. ads/common/model_export_util.py +664 -0
  108. ads/common/model_metadata.py +24 -0
  109. ads/common/object_storage_details.py +296 -0
  110. ads/common/oci_client.py +179 -0
  111. ads/common/oci_datascience.py +46 -0
  112. ads/common/oci_logging.py +1144 -0
  113. ads/common/oci_mixin.py +957 -0
  114. ads/common/oci_resource.py +136 -0
  115. ads/common/serializer.py +559 -0
  116. ads/common/utils.py +1852 -0
  117. ads/common/word_lists.py +1491 -0
  118. ads/common/work_request.py +189 -0
  119. ads/config.py +1 -0
  120. ads/data_labeling/__init__.py +13 -0
  121. ads/data_labeling/boundingbox.py +253 -0
  122. ads/data_labeling/constants.py +47 -0
  123. ads/data_labeling/data_labeling_service.py +244 -0
  124. ads/data_labeling/interface/__init__.py +5 -0
  125. ads/data_labeling/interface/loader.py +16 -0
  126. ads/data_labeling/interface/parser.py +16 -0
  127. ads/data_labeling/interface/reader.py +23 -0
  128. ads/data_labeling/loader/__init__.py +5 -0
  129. ads/data_labeling/loader/file_loader.py +241 -0
  130. ads/data_labeling/metadata.py +110 -0
  131. ads/data_labeling/mixin/__init__.py +5 -0
  132. ads/data_labeling/mixin/data_labeling.py +232 -0
  133. ads/data_labeling/ner.py +129 -0
  134. ads/data_labeling/parser/__init__.py +5 -0
  135. ads/data_labeling/parser/dls_record_parser.py +388 -0
  136. ads/data_labeling/parser/export_metadata_parser.py +94 -0
  137. ads/data_labeling/parser/export_record_parser.py +473 -0
  138. ads/data_labeling/reader/__init__.py +5 -0
  139. ads/data_labeling/reader/dataset_reader.py +574 -0
  140. ads/data_labeling/reader/dls_record_reader.py +121 -0
  141. ads/data_labeling/reader/export_record_reader.py +62 -0
  142. ads/data_labeling/reader/jsonl_reader.py +75 -0
  143. ads/data_labeling/reader/metadata_reader.py +203 -0
  144. ads/data_labeling/reader/record_reader.py +263 -0
  145. ads/data_labeling/record.py +52 -0
  146. ads/data_labeling/visualizer/__init__.py +5 -0
  147. ads/data_labeling/visualizer/image_visualizer.py +525 -0
  148. ads/data_labeling/visualizer/text_visualizer.py +357 -0
  149. ads/database/__init__.py +5 -0
  150. ads/database/connection.py +338 -0
  151. ads/dataset/__init__.py +10 -0
  152. ads/dataset/capabilities.md +51 -0
  153. ads/dataset/classification_dataset.py +339 -0
  154. ads/dataset/correlation.py +226 -0
  155. ads/dataset/correlation_plot.py +563 -0
  156. ads/dataset/dask_series.py +173 -0
  157. ads/dataset/dataframe_transformer.py +110 -0
  158. ads/dataset/dataset.py +1979 -0
  159. ads/dataset/dataset_browser.py +360 -0
  160. ads/dataset/dataset_with_target.py +995 -0
  161. ads/dataset/exception.py +25 -0
  162. ads/dataset/factory.py +987 -0
  163. ads/dataset/feature_engineering_transformer.py +35 -0
  164. ads/dataset/feature_selection.py +107 -0
  165. ads/dataset/forecasting_dataset.py +26 -0
  166. ads/dataset/helper.py +1450 -0
  167. ads/dataset/label_encoder.py +99 -0
  168. ads/dataset/mixin/__init__.py +5 -0
  169. ads/dataset/mixin/dataset_accessor.py +134 -0
  170. ads/dataset/pipeline.py +58 -0
  171. ads/dataset/plot.py +710 -0
  172. ads/dataset/progress.py +86 -0
  173. ads/dataset/recommendation.py +297 -0
  174. ads/dataset/recommendation_transformer.py +502 -0
  175. ads/dataset/regression_dataset.py +14 -0
  176. ads/dataset/sampled_dataset.py +1050 -0
  177. ads/dataset/target.py +98 -0
  178. ads/dataset/timeseries.py +18 -0
  179. ads/dbmixin/__init__.py +5 -0
  180. ads/dbmixin/db_pandas_accessor.py +153 -0
  181. ads/environment/__init__.py +9 -0
  182. ads/environment/ml_runtime.py +66 -0
  183. ads/evaluations/README.md +14 -0
  184. ads/evaluations/__init__.py +109 -0
  185. ads/evaluations/evaluation_plot.py +983 -0
  186. ads/evaluations/evaluator.py +1334 -0
  187. ads/evaluations/statistical_metrics.py +543 -0
  188. ads/experiments/__init__.py +9 -0
  189. ads/experiments/capabilities.md +0 -0
  190. ads/explanations/__init__.py +21 -0
  191. ads/explanations/base_explainer.py +142 -0
  192. ads/explanations/capabilities.md +83 -0
  193. ads/explanations/explainer.py +190 -0
  194. ads/explanations/mlx_global_explainer.py +1050 -0
  195. ads/explanations/mlx_interface.py +386 -0
  196. ads/explanations/mlx_local_explainer.py +287 -0
  197. ads/explanations/mlx_whatif_explainer.py +201 -0
  198. ads/feature_engineering/__init__.py +20 -0
  199. ads/feature_engineering/accessor/__init__.py +5 -0
  200. ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
  201. ads/feature_engineering/accessor/mixin/__init__.py +5 -0
  202. ads/feature_engineering/accessor/mixin/correlation.py +166 -0
  203. ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
  204. ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
  205. ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
  206. ads/feature_engineering/accessor/mixin/utils.py +65 -0
  207. ads/feature_engineering/accessor/series_accessor.py +431 -0
  208. ads/feature_engineering/adsimage/__init__.py +5 -0
  209. ads/feature_engineering/adsimage/image.py +192 -0
  210. ads/feature_engineering/adsimage/image_reader.py +170 -0
  211. ads/feature_engineering/adsimage/interface/__init__.py +5 -0
  212. ads/feature_engineering/adsimage/interface/reader.py +19 -0
  213. ads/feature_engineering/adsstring/__init__.py +7 -0
  214. ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
  215. ads/feature_engineering/adsstring/string/__init__.py +8 -0
  216. ads/feature_engineering/data_schema.json +57 -0
  217. ads/feature_engineering/dataset/__init__.py +5 -0
  218. ads/feature_engineering/dataset/zip_code_data.py +42062 -0
  219. ads/feature_engineering/exceptions.py +40 -0
  220. ads/feature_engineering/feature_type/__init__.py +133 -0
  221. ads/feature_engineering/feature_type/address.py +184 -0
  222. ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
  223. ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
  224. ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
  225. ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
  226. ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
  227. ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
  228. ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
  229. ads/feature_engineering/feature_type/adsstring/string.py +258 -0
  230. ads/feature_engineering/feature_type/base.py +58 -0
  231. ads/feature_engineering/feature_type/boolean.py +183 -0
  232. ads/feature_engineering/feature_type/category.py +146 -0
  233. ads/feature_engineering/feature_type/constant.py +137 -0
  234. ads/feature_engineering/feature_type/continuous.py +151 -0
  235. ads/feature_engineering/feature_type/creditcard.py +314 -0
  236. ads/feature_engineering/feature_type/datetime.py +190 -0
  237. ads/feature_engineering/feature_type/discrete.py +134 -0
  238. ads/feature_engineering/feature_type/document.py +43 -0
  239. ads/feature_engineering/feature_type/gis.py +251 -0
  240. ads/feature_engineering/feature_type/handler/__init__.py +5 -0
  241. ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
  242. ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
  243. ads/feature_engineering/feature_type/handler/warnings.py +128 -0
  244. ads/feature_engineering/feature_type/integer.py +142 -0
  245. ads/feature_engineering/feature_type/ip_address.py +144 -0
  246. ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
  247. ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
  248. ads/feature_engineering/feature_type/lat_long.py +256 -0
  249. ads/feature_engineering/feature_type/object.py +43 -0
  250. ads/feature_engineering/feature_type/ordinal.py +132 -0
  251. ads/feature_engineering/feature_type/phone_number.py +135 -0
  252. ads/feature_engineering/feature_type/string.py +171 -0
  253. ads/feature_engineering/feature_type/text.py +93 -0
  254. ads/feature_engineering/feature_type/unknown.py +43 -0
  255. ads/feature_engineering/feature_type/zip_code.py +164 -0
  256. ads/feature_engineering/feature_type_manager.py +406 -0
  257. ads/feature_engineering/schema.py +795 -0
  258. ads/feature_engineering/utils.py +245 -0
  259. ads/feature_store/.readthedocs.yaml +19 -0
  260. ads/feature_store/README.md +65 -0
  261. ads/feature_store/__init__.py +9 -0
  262. ads/feature_store/common/__init__.py +0 -0
  263. ads/feature_store/common/enums.py +339 -0
  264. ads/feature_store/common/exceptions.py +18 -0
  265. ads/feature_store/common/spark_session_singleton.py +125 -0
  266. ads/feature_store/common/utils/__init__.py +0 -0
  267. ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
  268. ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
  269. ads/feature_store/common/utils/transformation_utils.py +82 -0
  270. ads/feature_store/common/utils/utility.py +403 -0
  271. ads/feature_store/data_validation/__init__.py +0 -0
  272. ads/feature_store/data_validation/great_expectation.py +129 -0
  273. ads/feature_store/dataset.py +1230 -0
  274. ads/feature_store/dataset_job.py +530 -0
  275. ads/feature_store/docs/Dockerfile +7 -0
  276. ads/feature_store/docs/Makefile +44 -0
  277. ads/feature_store/docs/conf.py +28 -0
  278. ads/feature_store/docs/requirements.txt +14 -0
  279. ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
  280. ads/feature_store/docs/source/cicd.rst +137 -0
  281. ads/feature_store/docs/source/conf.py +86 -0
  282. ads/feature_store/docs/source/data_versioning.rst +33 -0
  283. ads/feature_store/docs/source/dataset.rst +388 -0
  284. ads/feature_store/docs/source/dataset_job.rst +27 -0
  285. ads/feature_store/docs/source/demo.rst +70 -0
  286. ads/feature_store/docs/source/entity.rst +78 -0
  287. ads/feature_store/docs/source/feature_group.rst +624 -0
  288. ads/feature_store/docs/source/feature_group_job.rst +29 -0
  289. ads/feature_store/docs/source/feature_store.rst +122 -0
  290. ads/feature_store/docs/source/feature_store_class.rst +123 -0
  291. ads/feature_store/docs/source/feature_validation.rst +66 -0
  292. ads/feature_store/docs/source/figures/cicd.png +0 -0
  293. ads/feature_store/docs/source/figures/data_validation.png +0 -0
  294. ads/feature_store/docs/source/figures/data_versioning.png +0 -0
  295. ads/feature_store/docs/source/figures/dataset.gif +0 -0
  296. ads/feature_store/docs/source/figures/dataset.png +0 -0
  297. ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
  298. ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
  299. ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
  300. ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
  301. ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
  302. ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
  303. ads/feature_store/docs/source/figures/entity.png +0 -0
  304. ads/feature_store/docs/source/figures/feature_group.png +0 -0
  305. ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
  306. ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
  307. ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
  308. ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
  309. ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
  310. ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
  311. ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
  312. ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
  313. ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
  314. ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
  315. ads/feature_store/docs/source/figures/overview.png +0 -0
  316. ads/feature_store/docs/source/figures/resource_manager.png +0 -0
  317. ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
  318. ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
  319. ads/feature_store/docs/source/figures/stats_1.png +0 -0
  320. ads/feature_store/docs/source/figures/stats_2.png +0 -0
  321. ads/feature_store/docs/source/figures/stats_d.png +0 -0
  322. ads/feature_store/docs/source/figures/stats_fg.png +0 -0
  323. ads/feature_store/docs/source/figures/transformation.png +0 -0
  324. ads/feature_store/docs/source/figures/transformations.gif +0 -0
  325. ads/feature_store/docs/source/figures/validation.png +0 -0
  326. ads/feature_store/docs/source/figures/validation_fg.png +0 -0
  327. ads/feature_store/docs/source/figures/validation_results.png +0 -0
  328. ads/feature_store/docs/source/figures/validation_summary.png +0 -0
  329. ads/feature_store/docs/source/index.rst +81 -0
  330. ads/feature_store/docs/source/module.rst +8 -0
  331. ads/feature_store/docs/source/notebook.rst +94 -0
  332. ads/feature_store/docs/source/overview.rst +47 -0
  333. ads/feature_store/docs/source/quickstart.rst +176 -0
  334. ads/feature_store/docs/source/release_notes.rst +194 -0
  335. ads/feature_store/docs/source/setup_feature_store.rst +81 -0
  336. ads/feature_store/docs/source/statistics.rst +58 -0
  337. ads/feature_store/docs/source/transformation.rst +199 -0
  338. ads/feature_store/docs/source/ui.rst +65 -0
  339. ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
  340. ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
  341. ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
  342. ads/feature_store/entity.py +718 -0
  343. ads/feature_store/execution_strategy/__init__.py +0 -0
  344. ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
  345. ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
  346. ads/feature_store/execution_strategy/engine/__init__.py +0 -0
  347. ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
  348. ads/feature_store/execution_strategy/execution_strategy.py +113 -0
  349. ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
  350. ads/feature_store/execution_strategy/spark/__init__.py +0 -0
  351. ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
  352. ads/feature_store/feature.py +192 -0
  353. ads/feature_store/feature_group.py +1494 -0
  354. ads/feature_store/feature_group_expectation.py +346 -0
  355. ads/feature_store/feature_group_job.py +602 -0
  356. ads/feature_store/feature_lineage/__init__.py +0 -0
  357. ads/feature_store/feature_lineage/graphviz_service.py +180 -0
  358. ads/feature_store/feature_option_details.py +50 -0
  359. ads/feature_store/feature_statistics/__init__.py +0 -0
  360. ads/feature_store/feature_statistics/statistics_service.py +99 -0
  361. ads/feature_store/feature_store.py +699 -0
  362. ads/feature_store/feature_store_registrar.py +518 -0
  363. ads/feature_store/input_feature_detail.py +149 -0
  364. ads/feature_store/mixin/__init__.py +4 -0
  365. ads/feature_store/mixin/oci_feature_store.py +145 -0
  366. ads/feature_store/model_details.py +73 -0
  367. ads/feature_store/query/__init__.py +0 -0
  368. ads/feature_store/query/filter.py +266 -0
  369. ads/feature_store/query/generator/__init__.py +0 -0
  370. ads/feature_store/query/generator/query_generator.py +298 -0
  371. ads/feature_store/query/join.py +161 -0
  372. ads/feature_store/query/query.py +403 -0
  373. ads/feature_store/query/validator/__init__.py +0 -0
  374. ads/feature_store/query/validator/query_validator.py +57 -0
  375. ads/feature_store/response/__init__.py +0 -0
  376. ads/feature_store/response/response_builder.py +68 -0
  377. ads/feature_store/service/__init__.py +0 -0
  378. ads/feature_store/service/oci_dataset.py +139 -0
  379. ads/feature_store/service/oci_dataset_job.py +199 -0
  380. ads/feature_store/service/oci_entity.py +125 -0
  381. ads/feature_store/service/oci_feature_group.py +164 -0
  382. ads/feature_store/service/oci_feature_group_job.py +214 -0
  383. ads/feature_store/service/oci_feature_store.py +182 -0
  384. ads/feature_store/service/oci_lineage.py +87 -0
  385. ads/feature_store/service/oci_transformation.py +104 -0
  386. ads/feature_store/statistics/__init__.py +0 -0
  387. ads/feature_store/statistics/abs_feature_value.py +49 -0
  388. ads/feature_store/statistics/charts/__init__.py +0 -0
  389. ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
  390. ads/feature_store/statistics/charts/box_plot.py +148 -0
  391. ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
  392. ads/feature_store/statistics/charts/probability_distribution.py +68 -0
  393. ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
  394. ads/feature_store/statistics/feature_stat.py +126 -0
  395. ads/feature_store/statistics/generic_feature_value.py +33 -0
  396. ads/feature_store/statistics/statistics.py +41 -0
  397. ads/feature_store/statistics_config.py +101 -0
  398. ads/feature_store/templates/feature_store_template.yaml +45 -0
  399. ads/feature_store/transformation.py +499 -0
  400. ads/feature_store/validation_output.py +57 -0
  401. ads/hpo/__init__.py +9 -0
  402. ads/hpo/_imports.py +91 -0
  403. ads/hpo/ads_search_space.py +439 -0
  404. ads/hpo/distributions.py +325 -0
  405. ads/hpo/objective.py +280 -0
  406. ads/hpo/search_cv.py +1657 -0
  407. ads/hpo/stopping_criterion.py +75 -0
  408. ads/hpo/tuner_artifact.py +413 -0
  409. ads/hpo/utils.py +91 -0
  410. ads/hpo/validation.py +140 -0
  411. ads/hpo/visualization/__init__.py +5 -0
  412. ads/hpo/visualization/_contour.py +23 -0
  413. ads/hpo/visualization/_edf.py +20 -0
  414. ads/hpo/visualization/_intermediate_values.py +21 -0
  415. ads/hpo/visualization/_optimization_history.py +25 -0
  416. ads/hpo/visualization/_parallel_coordinate.py +169 -0
  417. ads/hpo/visualization/_param_importances.py +26 -0
  418. ads/jobs/__init__.py +53 -0
  419. ads/jobs/ads_job.py +663 -0
  420. ads/jobs/builders/__init__.py +5 -0
  421. ads/jobs/builders/base.py +156 -0
  422. ads/jobs/builders/infrastructure/__init__.py +6 -0
  423. ads/jobs/builders/infrastructure/base.py +165 -0
  424. ads/jobs/builders/infrastructure/dataflow.py +1252 -0
  425. ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
  426. ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
  427. ads/jobs/builders/infrastructure/utils.py +65 -0
  428. ads/jobs/builders/runtimes/__init__.py +5 -0
  429. ads/jobs/builders/runtimes/artifact.py +338 -0
  430. ads/jobs/builders/runtimes/base.py +325 -0
  431. ads/jobs/builders/runtimes/container_runtime.py +242 -0
  432. ads/jobs/builders/runtimes/python_runtime.py +1016 -0
  433. ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
  434. ads/jobs/cli.py +104 -0
  435. ads/jobs/env_var_parser.py +131 -0
  436. ads/jobs/extension.py +160 -0
  437. ads/jobs/schema/__init__.py +5 -0
  438. ads/jobs/schema/infrastructure_schema.json +116 -0
  439. ads/jobs/schema/job_schema.json +42 -0
  440. ads/jobs/schema/runtime_schema.json +183 -0
  441. ads/jobs/schema/validator.py +141 -0
  442. ads/jobs/serializer.py +296 -0
  443. ads/jobs/templates/__init__.py +5 -0
  444. ads/jobs/templates/container.py +6 -0
  445. ads/jobs/templates/driver_notebook.py +177 -0
  446. ads/jobs/templates/driver_oci.py +500 -0
  447. ads/jobs/templates/driver_python.py +48 -0
  448. ads/jobs/templates/driver_pytorch.py +852 -0
  449. ads/jobs/templates/driver_utils.py +615 -0
  450. ads/jobs/templates/hostname_from_env.c +55 -0
  451. ads/jobs/templates/oci_metrics.py +181 -0
  452. ads/jobs/utils.py +104 -0
  453. ads/llm/__init__.py +28 -0
  454. ads/llm/autogen/__init__.py +2 -0
  455. ads/llm/autogen/constants.py +15 -0
  456. ads/llm/autogen/reports/__init__.py +2 -0
  457. ads/llm/autogen/reports/base.py +67 -0
  458. ads/llm/autogen/reports/data.py +103 -0
  459. ads/llm/autogen/reports/session.py +526 -0
  460. ads/llm/autogen/reports/templates/chat_box.html +13 -0
  461. ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
  462. ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
  463. ads/llm/autogen/reports/utils.py +56 -0
  464. ads/llm/autogen/v02/__init__.py +4 -0
  465. ads/llm/autogen/v02/client.py +295 -0
  466. ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
  467. ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
  468. ads/llm/autogen/v02/loggers/__init__.py +6 -0
  469. ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
  470. ads/llm/autogen/v02/loggers/session_logger.py +580 -0
  471. ads/llm/autogen/v02/loggers/utils.py +86 -0
  472. ads/llm/autogen/v02/runtime_logging.py +163 -0
  473. ads/llm/chain.py +268 -0
  474. ads/llm/chat_template.py +31 -0
  475. ads/llm/deploy.py +63 -0
  476. ads/llm/guardrails/__init__.py +5 -0
  477. ads/llm/guardrails/base.py +442 -0
  478. ads/llm/guardrails/huggingface.py +44 -0
  479. ads/llm/langchain/__init__.py +5 -0
  480. ads/llm/langchain/plugins/__init__.py +5 -0
  481. ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
  482. ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
  483. ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
  484. ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
  485. ads/llm/langchain/plugins/llms/__init__.py +5 -0
  486. ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
  487. ads/llm/requirements.txt +3 -0
  488. ads/llm/serialize.py +219 -0
  489. ads/llm/serializers/__init__.py +0 -0
  490. ads/llm/serializers/retrieval_qa.py +153 -0
  491. ads/llm/serializers/runnable_parallel.py +27 -0
  492. ads/llm/templates/score_chain.jinja2 +155 -0
  493. ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
  494. ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
  495. ads/model/__init__.py +52 -0
  496. ads/model/artifact.py +573 -0
  497. ads/model/artifact_downloader.py +254 -0
  498. ads/model/artifact_uploader.py +267 -0
  499. ads/model/base_properties.py +238 -0
  500. ads/model/common/.model-ignore +66 -0
  501. ads/model/common/__init__.py +5 -0
  502. ads/model/common/utils.py +142 -0
  503. ads/model/datascience_model.py +2635 -0
  504. ads/model/deployment/__init__.py +20 -0
  505. ads/model/deployment/common/__init__.py +5 -0
  506. ads/model/deployment/common/utils.py +308 -0
  507. ads/model/deployment/model_deployer.py +466 -0
  508. ads/model/deployment/model_deployment.py +1846 -0
  509. ads/model/deployment/model_deployment_infrastructure.py +671 -0
  510. ads/model/deployment/model_deployment_properties.py +493 -0
  511. ads/model/deployment/model_deployment_runtime.py +838 -0
  512. ads/model/extractor/__init__.py +5 -0
  513. ads/model/extractor/automl_extractor.py +74 -0
  514. ads/model/extractor/embedding_onnx_extractor.py +80 -0
  515. ads/model/extractor/huggingface_extractor.py +88 -0
  516. ads/model/extractor/keras_extractor.py +84 -0
  517. ads/model/extractor/lightgbm_extractor.py +93 -0
  518. ads/model/extractor/model_info_extractor.py +114 -0
  519. ads/model/extractor/model_info_extractor_factory.py +105 -0
  520. ads/model/extractor/pytorch_extractor.py +87 -0
  521. ads/model/extractor/sklearn_extractor.py +112 -0
  522. ads/model/extractor/spark_extractor.py +89 -0
  523. ads/model/extractor/tensorflow_extractor.py +85 -0
  524. ads/model/extractor/xgboost_extractor.py +94 -0
  525. ads/model/framework/__init__.py +5 -0
  526. ads/model/framework/automl_model.py +178 -0
  527. ads/model/framework/embedding_onnx_model.py +438 -0
  528. ads/model/framework/huggingface_model.py +399 -0
  529. ads/model/framework/lightgbm_model.py +266 -0
  530. ads/model/framework/pytorch_model.py +266 -0
  531. ads/model/framework/sklearn_model.py +250 -0
  532. ads/model/framework/spark_model.py +326 -0
  533. ads/model/framework/tensorflow_model.py +254 -0
  534. ads/model/framework/xgboost_model.py +258 -0
  535. ads/model/generic_model.py +3518 -0
  536. ads/model/model_artifact_boilerplate/README.md +381 -0
  537. ads/model/model_artifact_boilerplate/__init__.py +5 -0
  538. ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
  539. ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
  540. ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
  541. ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
  542. ads/model/model_artifact_boilerplate/score.py +61 -0
  543. ads/model/model_file_description_schema.json +68 -0
  544. ads/model/model_introspect.py +331 -0
  545. ads/model/model_metadata.py +1810 -0
  546. ads/model/model_metadata_mixin.py +460 -0
  547. ads/model/model_properties.py +63 -0
  548. ads/model/model_version_set.py +739 -0
  549. ads/model/runtime/__init__.py +5 -0
  550. ads/model/runtime/env_info.py +306 -0
  551. ads/model/runtime/model_deployment_details.py +37 -0
  552. ads/model/runtime/model_provenance_details.py +58 -0
  553. ads/model/runtime/runtime_info.py +81 -0
  554. ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
  555. ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
  556. ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
  557. ads/model/runtime/utils.py +201 -0
  558. ads/model/serde/__init__.py +5 -0
  559. ads/model/serde/common.py +40 -0
  560. ads/model/serde/model_input.py +547 -0
  561. ads/model/serde/model_serializer.py +1184 -0
  562. ads/model/service/__init__.py +5 -0
  563. ads/model/service/oci_datascience_model.py +1076 -0
  564. ads/model/service/oci_datascience_model_deployment.py +500 -0
  565. ads/model/service/oci_datascience_model_version_set.py +176 -0
  566. ads/model/transformer/__init__.py +5 -0
  567. ads/model/transformer/onnx_transformer.py +324 -0
  568. ads/mysqldb/__init__.py +5 -0
  569. ads/mysqldb/mysql_db.py +227 -0
  570. ads/opctl/__init__.py +18 -0
  571. ads/opctl/anomaly_detection.py +11 -0
  572. ads/opctl/backend/__init__.py +5 -0
  573. ads/opctl/backend/ads_dataflow.py +353 -0
  574. ads/opctl/backend/ads_ml_job.py +710 -0
  575. ads/opctl/backend/ads_ml_pipeline.py +164 -0
  576. ads/opctl/backend/ads_model_deployment.py +209 -0
  577. ads/opctl/backend/base.py +146 -0
  578. ads/opctl/backend/local.py +1053 -0
  579. ads/opctl/backend/marketplace/__init__.py +9 -0
  580. ads/opctl/backend/marketplace/helm_helper.py +173 -0
  581. ads/opctl/backend/marketplace/local_marketplace.py +271 -0
  582. ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
  583. ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
  584. ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
  585. ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
  586. ads/opctl/backend/marketplace/models/__init__.py +5 -0
  587. ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
  588. ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
  589. ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
  590. ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
  591. ads/opctl/cli.py +707 -0
  592. ads/opctl/cmds.py +869 -0
  593. ads/opctl/conda/__init__.py +5 -0
  594. ads/opctl/conda/cli.py +193 -0
  595. ads/opctl/conda/cmds.py +749 -0
  596. ads/opctl/conda/config.yaml +34 -0
  597. ads/opctl/conda/manifest_template.yaml +13 -0
  598. ads/opctl/conda/multipart_uploader.py +188 -0
  599. ads/opctl/conda/pack.py +89 -0
  600. ads/opctl/config/__init__.py +5 -0
  601. ads/opctl/config/base.py +57 -0
  602. ads/opctl/config/diagnostics/__init__.py +5 -0
  603. ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
  604. ads/opctl/config/merger.py +255 -0
  605. ads/opctl/config/resolver.py +297 -0
  606. ads/opctl/config/utils.py +79 -0
  607. ads/opctl/config/validator.py +17 -0
  608. ads/opctl/config/versioner.py +68 -0
  609. ads/opctl/config/yaml_parsers/__init__.py +7 -0
  610. ads/opctl/config/yaml_parsers/base.py +58 -0
  611. ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
  612. ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
  613. ads/opctl/constants.py +66 -0
  614. ads/opctl/decorator/__init__.py +5 -0
  615. ads/opctl/decorator/common.py +129 -0
  616. ads/opctl/diagnostics/__init__.py +5 -0
  617. ads/opctl/diagnostics/__main__.py +25 -0
  618. ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
  619. ads/opctl/diagnostics/check_requirements.py +144 -0
  620. ads/opctl/diagnostics/requirement_exception.py +9 -0
  621. ads/opctl/distributed/README.md +109 -0
  622. ads/opctl/distributed/__init__.py +5 -0
  623. ads/opctl/distributed/certificates.py +32 -0
  624. ads/opctl/distributed/cli.py +207 -0
  625. ads/opctl/distributed/cmds.py +731 -0
  626. ads/opctl/distributed/common/__init__.py +5 -0
  627. ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
  628. ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
  629. ads/opctl/distributed/common/cluster_config_helper.py +103 -0
  630. ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
  631. ads/opctl/distributed/common/cluster_runner.py +54 -0
  632. ads/opctl/distributed/common/framework_factory.py +29 -0
  633. ads/opctl/docker/Dockerfile.job +103 -0
  634. ads/opctl/docker/Dockerfile.job.arm +107 -0
  635. ads/opctl/docker/Dockerfile.job.gpu +175 -0
  636. ads/opctl/docker/base-env.yaml +13 -0
  637. ads/opctl/docker/cuda.repo +6 -0
  638. ads/opctl/docker/operator/.dockerignore +0 -0
  639. ads/opctl/docker/operator/Dockerfile +41 -0
  640. ads/opctl/docker/operator/Dockerfile.gpu +85 -0
  641. ads/opctl/docker/operator/cuda.repo +6 -0
  642. ads/opctl/docker/operator/environment.yaml +8 -0
  643. ads/opctl/forecast.py +11 -0
  644. ads/opctl/index.yaml +3 -0
  645. ads/opctl/model/__init__.py +5 -0
  646. ads/opctl/model/cli.py +65 -0
  647. ads/opctl/model/cmds.py +73 -0
  648. ads/opctl/operator/README.md +4 -0
  649. ads/opctl/operator/__init__.py +31 -0
  650. ads/opctl/operator/cli.py +344 -0
  651. ads/opctl/operator/cmd.py +596 -0
  652. ads/opctl/operator/common/__init__.py +5 -0
  653. ads/opctl/operator/common/backend_factory.py +460 -0
  654. ads/opctl/operator/common/const.py +27 -0
  655. ads/opctl/operator/common/data/synthetic.csv +16001 -0
  656. ads/opctl/operator/common/dictionary_merger.py +148 -0
  657. ads/opctl/operator/common/errors.py +42 -0
  658. ads/opctl/operator/common/operator_config.py +99 -0
  659. ads/opctl/operator/common/operator_loader.py +811 -0
  660. ads/opctl/operator/common/operator_schema.yaml +130 -0
  661. ads/opctl/operator/common/operator_yaml_generator.py +152 -0
  662. ads/opctl/operator/common/utils.py +208 -0
  663. ads/opctl/operator/lowcode/__init__.py +5 -0
  664. ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
  665. ads/opctl/operator/lowcode/anomaly/README.md +207 -0
  666. ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
  667. ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
  668. ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
  669. ads/opctl/operator/lowcode/anomaly/const.py +167 -0
  670. ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
  671. ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
  672. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
  673. ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
  674. ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
  675. ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
  676. ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
  677. ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
  678. ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
  679. ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
  680. ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
  681. ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
  682. ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
  683. ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
  684. ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
  685. ads/opctl/operator/lowcode/common/__init__.py +5 -0
  686. ads/opctl/operator/lowcode/common/const.py +10 -0
  687. ads/opctl/operator/lowcode/common/data.py +116 -0
  688. ads/opctl/operator/lowcode/common/errors.py +47 -0
  689. ads/opctl/operator/lowcode/common/transformations.py +296 -0
  690. ads/opctl/operator/lowcode/common/utils.py +384 -0
  691. ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
  692. ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
  693. ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
  694. ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
  695. ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
  696. ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
  697. ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
  698. ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
  699. ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
  700. ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
  701. ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
  702. ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
  703. ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
  704. ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
  705. ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
  706. ads/opctl/operator/lowcode/forecast/README.md +209 -0
  707. ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
  708. ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
  709. ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
  710. ads/opctl/operator/lowcode/forecast/const.py +92 -0
  711. ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
  712. ads/opctl/operator/lowcode/forecast/errors.py +26 -0
  713. ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
  714. ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
  715. ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
  716. ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
  717. ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
  718. ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
  719. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
  720. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
  721. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
  722. ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
  723. ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
  724. ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
  725. ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
  726. ads/opctl/operator/lowcode/forecast/utils.py +397 -0
  727. ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
  728. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
  729. ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
  730. ads/opctl/operator/lowcode/pii/MLoperator +17 -0
  731. ads/opctl/operator/lowcode/pii/README.md +208 -0
  732. ads/opctl/operator/lowcode/pii/__init__.py +5 -0
  733. ads/opctl/operator/lowcode/pii/__main__.py +78 -0
  734. ads/opctl/operator/lowcode/pii/cmd.py +39 -0
  735. ads/opctl/operator/lowcode/pii/constant.py +84 -0
  736. ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
  737. ads/opctl/operator/lowcode/pii/errors.py +27 -0
  738. ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
  739. ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
  740. ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
  741. ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
  742. ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
  743. ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
  744. ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
  745. ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
  746. ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
  747. ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
  748. ads/opctl/operator/lowcode/pii/model/report.py +487 -0
  749. ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
  750. ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
  751. ads/opctl/operator/lowcode/pii/utils.py +43 -0
  752. ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
  753. ads/opctl/operator/lowcode/recommender/README.md +206 -0
  754. ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
  755. ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
  756. ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
  757. ads/opctl/operator/lowcode/recommender/constant.py +30 -0
  758. ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
  759. ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
  760. ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
  761. ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
  762. ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
  763. ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
  764. ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
  765. ads/opctl/operator/lowcode/recommender/utils.py +13 -0
  766. ads/opctl/operator/runtime/__init__.py +5 -0
  767. ads/opctl/operator/runtime/const.py +17 -0
  768. ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
  769. ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
  770. ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
  771. ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
  772. ads/opctl/operator/runtime/runtime.py +115 -0
  773. ads/opctl/schema.yaml.yml +36 -0
  774. ads/opctl/script.py +40 -0
  775. ads/opctl/spark/__init__.py +5 -0
  776. ads/opctl/spark/cli.py +43 -0
  777. ads/opctl/spark/cmds.py +147 -0
  778. ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
  779. ads/opctl/utils.py +344 -0
  780. ads/oracledb/__init__.py +5 -0
  781. ads/oracledb/oracle_db.py +346 -0
  782. ads/pipeline/__init__.py +39 -0
  783. ads/pipeline/ads_pipeline.py +2279 -0
  784. ads/pipeline/ads_pipeline_run.py +772 -0
  785. ads/pipeline/ads_pipeline_step.py +605 -0
  786. ads/pipeline/builders/__init__.py +5 -0
  787. ads/pipeline/builders/infrastructure/__init__.py +5 -0
  788. ads/pipeline/builders/infrastructure/custom_script.py +32 -0
  789. ads/pipeline/cli.py +119 -0
  790. ads/pipeline/extension.py +291 -0
  791. ads/pipeline/schema/__init__.py +5 -0
  792. ads/pipeline/schema/cs_step_schema.json +35 -0
  793. ads/pipeline/schema/ml_step_schema.json +31 -0
  794. ads/pipeline/schema/pipeline_schema.json +71 -0
  795. ads/pipeline/visualizer/__init__.py +5 -0
  796. ads/pipeline/visualizer/base.py +570 -0
  797. ads/pipeline/visualizer/graph_renderer.py +272 -0
  798. ads/pipeline/visualizer/text_renderer.py +84 -0
  799. ads/secrets/__init__.py +11 -0
  800. ads/secrets/adb.py +386 -0
  801. ads/secrets/auth_token.py +86 -0
  802. ads/secrets/big_data_service.py +365 -0
  803. ads/secrets/mysqldb.py +149 -0
  804. ads/secrets/oracledb.py +160 -0
  805. ads/secrets/secrets.py +407 -0
  806. ads/telemetry/__init__.py +7 -0
  807. ads/telemetry/base.py +69 -0
  808. ads/telemetry/client.py +122 -0
  809. ads/telemetry/telemetry.py +257 -0
  810. ads/templates/dataflow_pyspark.jinja2 +13 -0
  811. ads/templates/dataflow_sparksql.jinja2 +22 -0
  812. ads/templates/func.jinja2 +20 -0
  813. ads/templates/schemas/openapi.json +1740 -0
  814. ads/templates/score-pkl.jinja2 +173 -0
  815. ads/templates/score.jinja2 +322 -0
  816. ads/templates/score_embedding_onnx.jinja2 +202 -0
  817. ads/templates/score_generic.jinja2 +165 -0
  818. ads/templates/score_huggingface_pipeline.jinja2 +217 -0
  819. ads/templates/score_lightgbm.jinja2 +185 -0
  820. ads/templates/score_onnx.jinja2 +407 -0
  821. ads/templates/score_onnx_new.jinja2 +473 -0
  822. ads/templates/score_oracle_automl.jinja2 +185 -0
  823. ads/templates/score_pyspark.jinja2 +154 -0
  824. ads/templates/score_pytorch.jinja2 +219 -0
  825. ads/templates/score_scikit-learn.jinja2 +184 -0
  826. ads/templates/score_tensorflow.jinja2 +184 -0
  827. ads/templates/score_xgboost.jinja2 +178 -0
  828. ads/text_dataset/__init__.py +5 -0
  829. ads/text_dataset/backends.py +211 -0
  830. ads/text_dataset/dataset.py +445 -0
  831. ads/text_dataset/extractor.py +207 -0
  832. ads/text_dataset/options.py +53 -0
  833. ads/text_dataset/udfs.py +22 -0
  834. ads/text_dataset/utils.py +49 -0
  835. ads/type_discovery/__init__.py +9 -0
  836. ads/type_discovery/abstract_detector.py +21 -0
  837. ads/type_discovery/constant_detector.py +41 -0
  838. ads/type_discovery/continuous_detector.py +54 -0
  839. ads/type_discovery/credit_card_detector.py +99 -0
  840. ads/type_discovery/datetime_detector.py +92 -0
  841. ads/type_discovery/discrete_detector.py +118 -0
  842. ads/type_discovery/document_detector.py +146 -0
  843. ads/type_discovery/ip_detector.py +68 -0
  844. ads/type_discovery/latlon_detector.py +90 -0
  845. ads/type_discovery/phone_number_detector.py +63 -0
  846. ads/type_discovery/type_discovery_driver.py +87 -0
  847. ads/type_discovery/typed_feature.py +594 -0
  848. ads/type_discovery/unknown_detector.py +41 -0
  849. ads/type_discovery/zipcode_detector.py +48 -0
  850. ads/vault/__init__.py +7 -0
  851. ads/vault/vault.py +237 -0
  852. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/METADATA +150 -149
  853. oracle_ads-2.13.10.dist-info/RECORD +858 -0
  854. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/WHEEL +1 -2
  855. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/entry_points.txt +2 -1
  856. oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
  857. oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
  858. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,1494 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2023, 2024 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ import json
8
+ import logging
9
+ from copy import deepcopy
10
+ from datetime import datetime
11
+ from typing import Dict, List, Optional, Union
12
+
13
+ import pandas as pd
14
+ from great_expectations.core import ExpectationSuite
15
+
16
+ from ads import deprecated
17
+ from ads.common import utils
18
+ from ads.common.decorator.runtime_dependency import OptionalDependency
19
+ from ads.common.oci_mixin import OCIModelMixin
20
+ from ads.feature_store.common.enums import (
21
+ ExpectationType,
22
+ EntityType,
23
+ StreamingIngestionMode,
24
+ BatchIngestionMode,
25
+ )
26
+ from ads.feature_store.common.exceptions import (
27
+ NotMaterializedError,
28
+ )
29
+ from ads.feature_store.common.utils.base64_encoder_decoder import Base64EncoderDecoder
30
+ from ads.feature_store.common.utils.utility import (
31
+ get_metastore_id,
32
+ get_execution_engine_type,
33
+ validate_delta_format_parameters,
34
+ get_schema_from_df,
35
+ convert_expectation_suite_to_expectation,
36
+ )
37
+ from ads.feature_store.execution_strategy.engine.spark_engine import SparkEngine
38
+ from ads.feature_store.execution_strategy.execution_strategy_provider import (
39
+ OciExecutionStrategyProvider,
40
+ ExecutionEngine,
41
+ )
42
+ from ads.feature_store.feature import Feature
43
+ from ads.feature_store.feature_group_expectation import Expectation
44
+ from ads.feature_store.feature_group_job import FeatureGroupJob
45
+ from ads.feature_store.feature_option_details import FeatureOptionDetails
46
+ from ads.feature_store.input_feature_detail import FeatureDetail, FeatureType
47
+ from ads.feature_store.query.filter import Filter, Logic
48
+ from ads.feature_store.query.query import Query
49
+ from ads.feature_store.service.oci_feature_group import OCIFeatureGroup
50
+ from ads.feature_store.service.oci_feature_group_job import OCIFeatureGroupJob
51
+ from ads.feature_store.service.oci_lineage import OCILineage
52
+ from ads.feature_store.statistics.statistics import Statistics
53
+ from ads.feature_store.statistics_config import StatisticsConfig
54
+ from ads.feature_store.validation_output import ValidationOutput
55
+
56
+ from ads.jobs.builders.base import Builder
57
+ from ads.feature_store.feature_lineage.graphviz_service import (
58
+ GraphService,
59
+ GraphOrientation,
60
+ )
61
+
62
+ try:
63
+ from pyspark.sql import DataFrame
64
+ except ModuleNotFoundError:
65
+ raise ModuleNotFoundError(
66
+ f"The `pyspark` module was not found. Please run `pip install "
67
+ f"{OptionalDependency.SPARK}`."
68
+ )
69
+ except Exception as e:
70
+ raise
71
+
72
+ logger = logging.getLogger(__name__)
73
+
74
+
75
+ class FeatureGroup(Builder):
76
+ """ Represents a FeatureGroup Resource.
77
+
78
+ Methods
79
+ -------
80
+ create(self, **kwargs) -> "FeatureGroup"
81
+ Creates feature group resource.
82
+ delete(self) -> "FeatureGroup":
83
+ Removes feature group resource.
84
+ to_dict(self) -> dict
85
+ Serializes feature group to a dictionary.
86
+ from_id(cls, id: str) -> "Transformation"
87
+ Gets an existing feature group resource by id.
88
+ list(cls, compartment_id: str = None, **kwargs) -> List["FeatureGroup"]
89
+ Lists feature groups resources in a given compartment.
90
+ list_df(cls, compartment_id: str = None, **kwargs) -> "pandas.DataFrame"
91
+ Lists feature groups resources as a pandas dataframe.
92
+ with_description(self, description: str) -> "FeatureGroup"
93
+ Sets the description.
94
+ with_compartment_id(self, compartment_id: str) -> "FeatureGroup"
95
+ Sets the compartment ID.
96
+ with_feature_store_id(self, feature_store_id: str) -> "FeatureGroup"
97
+ Sets the feature store ID.
98
+ with_name(self, name: str) -> "FeatureGroup"
99
+ Sets the name.
100
+ with_entity_id(self, entity_id: str) -> "FeatureGroup"
101
+ Sets the entity id.
102
+ with_input_feature_details(self, **schema_details: Dict[str, str]) -> "FeatureGroup"
103
+ Sets the raw input feature details for the feature group.
104
+ with_statistics_config(self, statistics_config: Union[StatisticsConfig, bool]) -> "FeatureGroup"
105
+ Sets the statistics config details
106
+
107
+ Examples
108
+ --------
109
+ >>> from ads.feature_store import feature_group
110
+ >>> import oci
111
+ >>> import os
112
+ >>> input_feature_detail = [FeatureDetail("cc_num").with_feature_type(FeatureType.STRING).with_order_number(1)]
113
+ >>> feature_group = feature_group.FeatureGroup()
114
+ >>> .with_description("feature group description")
115
+ >>> .with_compartment_id(os.environ["PROJECT_COMPARTMENT_OCID"])
116
+ >>> .with_name("FeatureGroup")
117
+ >>> .with_entity_id("<entity_id>") \
118
+ >>> .with_feature_store_id("<feature_store_id>") \
119
+ >>> .with_primary_keys(["key1", "key2"]) \
120
+ >>> .with_input_feature_details(input_feature_detail) \
121
+ >>> .with_statistics_config(StatisticsConfig(True,columns=["column1","column2"]))
122
+ >>> feature_group.create()
123
+ """
124
+
125
+ _PREFIX = "featuregroup_resource"
126
+
127
+ CONST_ID = "id"
128
+ CONST_COMPARTMENT_ID = "compartmentId"
129
+ CONST_NAME = "name"
130
+ CONST_DESCRIPTION = "description"
131
+ CONST_FEATURE_STORE_ID = "featureStoreId"
132
+ CONST_ENTITY_ID = "entityId"
133
+ CONST_ITEMS = "items"
134
+ CONST_PRIMARY_KEYS = "primaryKeys"
135
+ CONST_PARTITION_KEYS = "partitionKeys"
136
+ CONST_EXPECTATION_DETAILS = "expectationDetails"
137
+ CONST_INPUT_FEATURE_DETAILS = "inputFeatureDetails"
138
+ CONST_OUTPUT_FEATURE_DETAILS = "outputFeatureDetails"
139
+ CONST_FREEFORM_TAG = "freeformTags"
140
+ CONST_DEFINED_TAG = "definedTags"
141
+ CONST_TRANSFORMATION_ID = "transformationId"
142
+ CONST_STATISTICS_CONFIG = "statisticsConfig"
143
+ CONST_LIFECYCLE_STATE = "lifecycleState"
144
+ CONST_LAST_JOB_ID = "jobId"
145
+ CONST_INFER_SCHEMA = "isInferSchema"
146
+ CONST_TRANSFORMATION_KWARGS = "transformationParameters"
147
+
148
+ attribute_map = {
149
+ CONST_ID: "id",
150
+ CONST_COMPARTMENT_ID: "compartment_id",
151
+ CONST_NAME: "name",
152
+ CONST_DESCRIPTION: "description",
153
+ CONST_FEATURE_STORE_ID: "feature_store_id",
154
+ CONST_ENTITY_ID: "entity_id",
155
+ CONST_PRIMARY_KEYS: "primary_keys",
156
+ CONST_EXPECTATION_DETAILS: "expectation_details",
157
+ CONST_ITEMS: "items",
158
+ CONST_INPUT_FEATURE_DETAILS: "input_feature_details",
159
+ CONST_FREEFORM_TAG: "freeform_tags",
160
+ CONST_DEFINED_TAG: "defined_tags",
161
+ CONST_TRANSFORMATION_ID: "transformation_id",
162
+ CONST_LIFECYCLE_STATE: "lifecycle_state",
163
+ CONST_OUTPUT_FEATURE_DETAILS: "output_feature_details",
164
+ CONST_STATISTICS_CONFIG: "statistics_config",
165
+ CONST_INFER_SCHEMA: "is_infer_schema",
166
+ CONST_PARTITION_KEYS: "partition_keys",
167
+ CONST_TRANSFORMATION_KWARGS: "transformation_parameters",
168
+ }
169
+
170
+ def __init__(self, spec: Dict = None, **kwargs) -> None:
171
+ """Initializes FeatureGroup Resource.
172
+
173
+ Parameters
174
+ ----------
175
+ spec: (Dict, optional). Defaults to None.
176
+ Object specification.
177
+
178
+ kwargs: Dict
179
+ Specification as keyword arguments.
180
+ If 'spec' contains the same key as the one in kwargs,
181
+ the value from kwargs will be used.
182
+ """
183
+ super().__init__(spec=spec, **deepcopy(kwargs))
184
+ # Specify oci FeatureGroup instance
185
+ self.feature_group_job = None
186
+ self._spark_engine = None
187
+ self.oci_feature_group: OCIFeatureGroup = self._to_oci_feature_group(**kwargs)
188
+ self.dsc_job = OCIFeatureGroupJob()
189
+ self.lineage = OCILineage(**kwargs)
190
+
191
+ def _to_oci_feature_group(self, **kwargs):
192
+ """Creates an `OCIFeatureGroup` instance from the `FeatureGroup`.
193
+
194
+ kwargs
195
+ Additional kwargs arguments.
196
+ Can be any attribute that `oci.feature_group.models.FeatureGroup` accepts.
197
+
198
+ Returns
199
+ -------
200
+ OCIFeatureGroup
201
+ The instance of the OCIFeatureGroup.
202
+ """
203
+
204
+ fs_spec = {}
205
+
206
+ for infra_attr, dsc_attr in self.attribute_map.items():
207
+ value = self.get_spec(infra_attr)
208
+ fs_spec[dsc_attr] = value
209
+ fs_spec.update(**kwargs)
210
+ return OCIFeatureGroup(**fs_spec)
211
+
212
+ @property
213
+ def spark_engine(self):
214
+ if not self._spark_engine:
215
+ self._spark_engine = SparkEngine(get_metastore_id(self.feature_store_id))
216
+ return self._spark_engine
217
+
218
+ @property
219
+ def kind(self) -> str:
220
+ """The kind of the object as showing in a YAML."""
221
+ return "FeatureGroup"
222
+
223
+ @property
224
+ def compartment_id(self) -> str:
225
+ return self.get_spec(self.CONST_COMPARTMENT_ID)
226
+
227
+ @compartment_id.setter
228
+ def compartment_id(self, value: str):
229
+ self.with_compartment_id(value)
230
+
231
+ def with_compartment_id(self, compartment_id: str) -> "FeatureGroup":
232
+ """Sets the compartment_id.
233
+
234
+ Parameters
235
+ ----------
236
+ compartment_id: str
237
+ The compartment_id.
238
+
239
+ Returns
240
+ -------
241
+ FeatureGroup
242
+ The FeatureGroup instance (self)
243
+ """
244
+ return self.set_spec(self.CONST_COMPARTMENT_ID, compartment_id)
245
+
246
+ @property
247
+ def name(self) -> str:
248
+ return self.get_spec(self.CONST_NAME)
249
+
250
+ @name.setter
251
+ def name(self, name: str):
252
+ self.with_name(name)
253
+
254
+ def with_name(self, name: str) -> "FeatureGroup":
255
+ """Sets the name.
256
+
257
+ Parameters
258
+ ----------
259
+ name: str
260
+ The name of feature group.
261
+
262
+ Returns
263
+ -------
264
+ FeatureGroup
265
+ The FeatureGroup instance (self)
266
+ """
267
+ return self.set_spec(self.CONST_NAME, name)
268
+
269
+ @property
270
+ def id(self) -> str:
271
+ """The id of the feature group.
272
+
273
+ Returns
274
+ -------
275
+ str
276
+ The id of the feature group.
277
+ """
278
+ return self.get_spec(self.CONST_ID)
279
+
280
+ def with_id(self, id: str) -> "FeatureGroup":
281
+ return self.set_spec(self.CONST_ID, id)
282
+
283
+ @property
284
+ def description(self) -> str:
285
+ return self.get_spec(self.CONST_DESCRIPTION)
286
+
287
+ @description.setter
288
+ def description(self, value: str):
289
+ self.with_description(value)
290
+
291
+ def with_description(self, description: str) -> "FeatureGroup":
292
+ """Sets the description.
293
+
294
+ Parameters
295
+ ----------
296
+ description: str
297
+ The description of the feature group.
298
+
299
+ Returns
300
+ -------
301
+ FeatureGroup
302
+ The FeatureGroup instance (self)
303
+ """
304
+ return self.set_spec(self.CONST_DESCRIPTION, description)
305
+
306
+ @property
307
+ def primary_keys(self) -> List[str]:
308
+ return self.get_spec(self.CONST_PRIMARY_KEYS)
309
+
310
+ @primary_keys.setter
311
+ def primary_keys(self, value: List[str]):
312
+ self.with_primary_keys(value)
313
+
314
+ def with_primary_keys(self, primary_keys: List[str]) -> "FeatureGroup":
315
+ """Sets the primary keys of the feature group.
316
+
317
+ Parameters
318
+ ----------
319
+ primary_keys: str
320
+ The description of the feature group.
321
+
322
+ Returns
323
+ -------
324
+ FeatureGroup
325
+ The FeatureGroup instance (self)
326
+ """
327
+ return self.set_spec(
328
+ self.CONST_PRIMARY_KEYS,
329
+ {
330
+ self.CONST_ITEMS: [
331
+ {self.CONST_NAME: primary_key} for primary_key in primary_keys or []
332
+ ]
333
+ },
334
+ )
335
+
336
+ @property
337
+ def transformation_kwargs(self) -> str:
338
+ return self.get_spec(self.CONST_TRANSFORMATION_KWARGS)
339
+
340
+ @transformation_kwargs.setter
341
+ def transformation_kwargs(self, value: Dict):
342
+ self.with_transformation_kwargs(value)
343
+
344
+ def with_transformation_kwargs(
345
+ self, transformation_kwargs: Dict = ()
346
+ ) -> "FeatureGroup":
347
+ """Sets the primary keys of the feature group.
348
+
349
+ Parameters
350
+ ----------
351
+ transformation_kwargs: Dict
352
+ Dictionary containing the transformation arguments.
353
+
354
+ Returns
355
+ -------
356
+ FeatureGroup
357
+ The FeatureGroup instance (self)
358
+ """
359
+ return self.set_spec(
360
+ self.CONST_TRANSFORMATION_KWARGS,
361
+ Base64EncoderDecoder.encode(json.dumps(transformation_kwargs or {})),
362
+ )
363
+
364
+ @property
365
+ def partition_keys(self) -> List[str]:
366
+ return self.get_spec(self.CONST_PARTITION_KEYS)
367
+
368
+ @partition_keys.setter
369
+ def partition_keys(self, value: List[str]):
370
+ self.with_partition_keys(value)
371
+
372
+ def with_partition_keys(self, partition_keys: List[str]) -> "FeatureGroup":
373
+ """Sets the partition keys of the feature group.
374
+
375
+ Parameters
376
+ ----------
377
+ partition_keys: List[str]
378
+ The List of partition keys for the feature group.
379
+
380
+ Returns
381
+ -------
382
+ FeatureGroup
383
+ The FeatureGroup instance (self)
384
+ """
385
+ return self.set_spec(
386
+ self.CONST_PARTITION_KEYS,
387
+ {
388
+ self.CONST_ITEMS: [
389
+ {self.CONST_NAME: partition_key}
390
+ for partition_key in partition_keys or []
391
+ ]
392
+ },
393
+ )
394
+
395
+ @property
396
+ def feature_store_id(self) -> str:
397
+ return self.get_spec(self.CONST_FEATURE_STORE_ID)
398
+
399
+ @feature_store_id.setter
400
+ def feature_store_id(self, value: str):
401
+ self.with_feature_store_id(value)
402
+
403
+ def with_feature_store_id(self, feature_store_id: str) -> "FeatureGroup":
404
+ """Sets the feature_store_id.
405
+
406
+ Parameters
407
+ ----------
408
+ feature_store_id: str
409
+ The feature_store_id.
410
+
411
+ Returns
412
+ -------
413
+ FeatureGroup
414
+ The FeatureGroup instance (self)
415
+ """
416
+ return self.set_spec(self.CONST_FEATURE_STORE_ID, feature_store_id)
417
+
418
+ @property
419
+ def transformation_id(self) -> str:
420
+ return self.get_spec(self.CONST_TRANSFORMATION_ID)
421
+
422
+ @transformation_id.setter
423
+ def transformation_id(self, value: str):
424
+ self.with_feature_store_id(value)
425
+
426
+ def with_transformation_id(self, transformation_id: str) -> "FeatureGroup":
427
+ """Sets the transformation_id.
428
+
429
+ Parameters
430
+ ----------
431
+ transformation_id: str
432
+ The transformation_id.
433
+
434
+ Returns
435
+ -------
436
+ FeatureGroup
437
+ The FeatureGroup instance (self)
438
+ """
439
+
440
+ # Initialize the empty dictionary as transformation arguemnts if not specified
441
+ if not self.transformation_kwargs:
442
+ self.with_transformation_kwargs()
443
+
444
+ return self.set_spec(self.CONST_TRANSFORMATION_ID, transformation_id)
445
+
446
+ def _with_lifecycle_state(self, lifecycle_state: str) -> "FeatureGroup":
447
+ """Sets the lifecycle_state.
448
+
449
+ Parameters
450
+ ----------
451
+ lifecycle_state: str
452
+ The lifecycle_state.
453
+
454
+ Returns
455
+ -------
456
+ FeatureGroup
457
+ The FeatureGroup instance (self)
458
+ """
459
+ return self.set_spec(self.CONST_LIFECYCLE_STATE, lifecycle_state)
460
+
461
+ @property
462
+ def entity_id(self) -> str:
463
+ return self.get_spec(self.CONST_ENTITY_ID)
464
+
465
+ @entity_id.setter
466
+ def entity_id(self, value: str):
467
+ self.with_entity_id(value)
468
+
469
+ def with_entity_id(self, entity_id: str) -> "FeatureGroup":
470
+ """Sets the entity_id.
471
+
472
+ Parameters
473
+ ----------
474
+ entity_id: str
475
+ The entity_id.
476
+
477
+ Returns
478
+ -------
479
+ FeatureGroup
480
+ The FeatureGroup instance (self)
481
+ """
482
+ return self.set_spec(self.CONST_ENTITY_ID, entity_id)
483
+
484
+ @property
485
+ def expectation_details(self) -> "Expectation":
486
+ """The expectation details of the feature group.
487
+
488
+ Returns
489
+ -------
490
+ list
491
+ The step details of the feature group.
492
+ """
493
+ return self.get_spec(self.CONST_EXPECTATION_DETAILS)
494
+
495
+ def with_expectation_suite(
496
+ self, expectation_suite: ExpectationSuite, expectation_type: ExpectationType
497
+ ) -> "FeatureGroup":
498
+ """Sets the expectation details for the feature group.
499
+
500
+ Parameters
501
+ ----------
502
+ expectation_suite: ExpectationSuite
503
+ A list of rules in the feature store.
504
+ expectation_type: ExpectationType
505
+ Type of the expectation.
506
+
507
+ Returns
508
+ -------
509
+ FeatureGroup
510
+ The FeatureGroup instance (self).
511
+ """
512
+ return self.set_spec(
513
+ self.CONST_EXPECTATION_DETAILS,
514
+ convert_expectation_suite_to_expectation(
515
+ expectation_suite, expectation_type
516
+ ).to_dict(),
517
+ )
518
+
519
+ @property
520
+ def input_feature_details(self) -> list:
521
+ return self.get_spec(self.CONST_INPUT_FEATURE_DETAILS)
522
+
523
+ @input_feature_details.setter
524
+ def input_feature_details(self, input_feature_details: List[FeatureDetail]):
525
+ self.with_input_feature_details(input_feature_details)
526
+
527
+ def with_input_feature_details(
528
+ self, input_feature_details: List[FeatureDetail]
529
+ ) -> "FeatureGroup":
530
+ """Sets the input feature details.
531
+
532
+ Parameters
533
+ ----------
534
+ input_feature_details: List[FeatureDetail]
535
+ The input_feature_details for the Feature Group.
536
+ Returns
537
+ -------
538
+ FeatureGroup
539
+ The FeatureGroup instance (self)
540
+ """
541
+ if not self.is_infer_schema:
542
+ self.with_is_infer_schema(False)
543
+ return self.set_spec(
544
+ self.CONST_INPUT_FEATURE_DETAILS,
545
+ [feature_details.to_dict() for feature_details in input_feature_details],
546
+ )
547
+
548
+ def with_schema_details_from_dataframe(
549
+ self, data_frame: Union[DataFrame, pd.DataFrame]
550
+ ) -> "FeatureGroup":
551
+ if not self.feature_store_id:
552
+ raise ValueError(
553
+ "FeatureStore id must be set before calling `with_schema_details_from_dataframe`"
554
+ )
555
+
556
+ schema_details = get_schema_from_df(data_frame, self.feature_store_id)
557
+ feature_details = []
558
+
559
+ for schema_detail in schema_details:
560
+ feature_details.append(FeatureDetail(**schema_detail))
561
+ self.with_is_infer_schema(True)
562
+ return self.with_input_feature_details(feature_details)
563
+
564
+ def _with_features(self, features: List[Feature]):
565
+ """Sets the output_features.
566
+
567
+ Parameters
568
+ ----------
569
+ features: List[Feature]
570
+ The features for the Feature Group.
571
+ Returns
572
+ -------
573
+ FeatureGroup
574
+ The FeatureGroup instance (self)
575
+ """
576
+ return self.set_spec(
577
+ self.CONST_OUTPUT_FEATURE_DETAILS,
578
+ {self.CONST_ITEMS: [feature.to_dict() for feature in features]},
579
+ )
580
+
581
+ @property
582
+ def statistics_config(self) -> "StatisticsConfig":
583
+ """The statistics config deatils of the feature group.
584
+
585
+ Returns
586
+ -------
587
+ list
588
+ The step details of the feature group.
589
+ """
590
+ return self.get_spec(self.CONST_STATISTICS_CONFIG)
591
+
592
+ @statistics_config.setter
593
+ def statistics_config(self, statistics_config: Union[StatisticsConfig, bool]):
594
+ self.with_statistics_config(statistics_config)
595
+
596
+ def with_statistics_config(
597
+ self, statistics_config: Union[StatisticsConfig, bool]
598
+ ) -> "FeatureGroup":
599
+ """Sets the expectation details for the feature group.
600
+
601
+ Parameters
602
+ ----------
603
+ statistics_config: StatisticsConfig
604
+ statistics config
605
+
606
+ Returns
607
+ -------
608
+ FeatureGroup
609
+ The FeatureGroup instance (self).
610
+ """
611
+ if isinstance(statistics_config, StatisticsConfig):
612
+ statistics_config_in = statistics_config
613
+ elif isinstance(statistics_config, bool):
614
+ statistics_config_in = StatisticsConfig(statistics_config)
615
+ else:
616
+ raise TypeError(
617
+ "The argument `statistics_config` has to be of type `StatisticsConfig` or `bool`, "
618
+ "but is of type: `{}`".format(type(statistics_config))
619
+ )
620
+ return self.set_spec(
621
+ self.CONST_STATISTICS_CONFIG, statistics_config_in.to_dict()
622
+ )
623
+
624
+ @property
625
+ def features(self) -> List[Feature]:
626
+ return [
627
+ Feature(**feature_dict)
628
+ for feature_dict in self.get_spec(self.CONST_OUTPUT_FEATURE_DETAILS)[
629
+ self.CONST_ITEMS
630
+ ]
631
+ or []
632
+ ]
633
+
634
+ def with_job_id(self, feature_group_job_id: str) -> "FeatureGroup":
635
+ """Sets the job_id for the last running job.
636
+
637
+ Parameters
638
+ ----------
639
+ feature_group_job_id: str
640
+ FeatureGroup job id.
641
+ Returns
642
+ -------
643
+ FeatureGroup
644
+ The FeatureGroup instance (self)
645
+ """
646
+ return self.set_spec(self.CONST_LAST_JOB_ID, feature_group_job_id)
647
+
648
+ @property
649
+ def is_infer_schema(self) -> bool:
650
+ return self.get_spec(self.CONST_INFER_SCHEMA)
651
+
652
+ @is_infer_schema.setter
653
+ def is_infer_schema(self, value: bool):
654
+ self.with_is_infer_schema(value)
655
+
656
+ def with_is_infer_schema(self, is_infer_schema: bool) -> "FeatureGroup":
657
+ """Sets the job_id for the last running job.
658
+
659
+ Parameters
660
+ ----------
661
+ is_infer_schema: bool
662
+ Infer Schema or not.
663
+ Returns
664
+ -------
665
+ FeatureGroup
666
+ The FeatureGroup instance (self)
667
+ """
668
+ return self.set_spec(self.CONST_INFER_SCHEMA, is_infer_schema)
669
+
670
+ def target_delta_table(self):
671
+ """
672
+ Returns the fully-qualified name of the target table for storing delta data.
673
+
674
+ The name of the target table is constructed by concatenating the entity ID
675
+ and the name of the table, separated by a dot. The resulting string has the
676
+ format 'entity_id.table_name'.
677
+
678
+ Returns:
679
+ str: The fully-qualified name of the target delta table.
680
+ """
681
+ target_table = f"{self.entity_id}.{self.name}"
682
+ return target_table
683
+
684
+ @property
685
+ def job_id(self) -> str:
686
+ return self.get_spec(self.CONST_LAST_JOB_ID)
687
+
688
+ def create(self, **kwargs) -> "FeatureGroup":
689
+ """Creates feature group resource.
690
+
691
+ !!! note "Lazy"
692
+ This method is lazy and does not persist any metadata or feature data in the
693
+ feature store on its own. To persist the feature group and save feature data
694
+ along the metadata in the feature store, call the `materialise()` method with a
695
+ DataFrame or with a Datasource.
696
+
697
+ Parameters
698
+ ----------
699
+ kwargs
700
+ Additional kwargs arguments.
701
+ Can be any attribute that `feature_store.models.FeatureGroup` accepts.
702
+
703
+ Returns
704
+ -------
705
+ FeatureGroup
706
+ The FeatureGroup instance (self)
707
+
708
+ Raises
709
+ ------
710
+ ValueError
711
+ If compartment id not provided.
712
+ """
713
+ self.compartment_id = OCIModelMixin.check_compartment_id(self.compartment_id)
714
+
715
+ if not self.feature_store_id:
716
+ raise ValueError("FeatureStore id must be provided.")
717
+
718
+ if not self.entity_id:
719
+ raise ValueError("Entity id must be provided.")
720
+
721
+ if not self.name:
722
+ self.name = self._random_display_name()
723
+
724
+ if self.statistics_config is None:
725
+ self.statistics_config = StatisticsConfig()
726
+
727
+ payload = deepcopy(self._spec)
728
+ payload.pop("id", None)
729
+ logger.debug(f"Creating a feature group resource with payload {payload}")
730
+
731
+ # Create feature group
732
+ logger.info("Saving feature group.")
733
+ self.oci_feature_group = self._to_oci_feature_group(**kwargs).create()
734
+ self.with_id(self.oci_feature_group.id)
735
+ return self
736
+
737
+ def get_features(self) -> List[Feature]:
738
+ """
739
+ Returns all the features in the feature group.
740
+
741
+ Returns:
742
+ List[Feature]
743
+ """
744
+
745
+ return self.features
746
+
747
+ def get_features_df(self) -> "pd.DataFrame":
748
+ """
749
+ Returns all the features as pandas dataframe.
750
+
751
+ Returns:
752
+ pandas.DataFrame
753
+ """
754
+ records = []
755
+ for feature in self.features:
756
+ records.append(
757
+ {
758
+ "name": feature.feature_name,
759
+ "type": feature.feature_type,
760
+ }
761
+ )
762
+ return pd.DataFrame.from_records(records)
763
+
764
+ def get_input_features_df(self) -> "pd.DataFrame":
765
+ """
766
+ Returns all the input features details as pandas dataframe.
767
+
768
+ Returns:
769
+ pandas.DataFrame
770
+ """
771
+ records = []
772
+ for input_feature in self.input_feature_details:
773
+ records.append(
774
+ {
775
+ "name": input_feature.get("name"),
776
+ "type": input_feature.get("featureType"),
777
+ "order_number": input_feature.get("orderNumber"),
778
+ "is_event_timestamp": input_feature.get("isEventTimestamp"),
779
+ "event_timestamp_format": input_feature.get("eventTimestampFormat"),
780
+ }
781
+ )
782
+ return pd.DataFrame.from_records(records)
783
+
784
+ def update(self, **kwargs) -> "FeatureGroup":
785
+ """Updates FeatureGroup in the feature store.
786
+
787
+ Parameters
788
+ ----------
789
+ kwargs
790
+ Additional kwargs arguments.
791
+ Can be any attribute that `feature_store.models.FeatureGroup` accepts.
792
+
793
+ Returns
794
+ -------
795
+ FeatureGroup
796
+ The FeatureGroup instance (self).
797
+ """
798
+
799
+ if not self.id:
800
+ raise ValueError(
801
+ "FeatureGroup needs to be saved to the feature store before it can be updated."
802
+ )
803
+
804
+ self.oci_feature_group = self._to_oci_feature_group(**kwargs).update()
805
+ return self
806
+
807
+ def _update_from_oci_feature_group_model(
808
+ self, oci_feature_group: OCIFeatureGroup
809
+ ) -> "FeatureGroup":
810
+ """Update the properties from an OCIFeatureGroup object.
811
+
812
+ Parameters
813
+ ----------
814
+ oci_feature_group: OCIFeatureGroup
815
+ An instance of OCIFeatureGroup.
816
+
817
+ Returns
818
+ -------
819
+ FeatureGroup
820
+ The FeatureGroup instance (self).
821
+ """
822
+
823
+ # Update the main properties
824
+ self.oci_feature_group = oci_feature_group
825
+ feature_group_details = oci_feature_group.to_dict()
826
+
827
+ for infra_attr, dsc_attr in self.attribute_map.items():
828
+ if infra_attr in feature_group_details:
829
+ if infra_attr == self.CONST_OUTPUT_FEATURE_DETAILS:
830
+ # May not need if we fix the backend and add feature_group_id to the output_feature
831
+ features_list = []
832
+ for output_feature in feature_group_details[infra_attr]["items"]:
833
+ output_feature["featureGroupId"] = feature_group_details[
834
+ self.CONST_ID
835
+ ]
836
+ features_list.append(output_feature)
837
+
838
+ value = {self.CONST_ITEMS: features_list}
839
+ else:
840
+ value = feature_group_details[infra_attr]
841
+
842
+ self.set_spec(infra_attr, value)
843
+
844
+ return self
845
+
846
+ def _build_feature_group_job(
847
+ self,
848
+ ingestion_mode,
849
+ from_timestamp: str = None,
850
+ to_timestamp: str = None,
851
+ feature_option_details=None,
852
+ ):
853
+ feature_group_job = (
854
+ FeatureGroupJob()
855
+ .with_feature_group_id(self.id)
856
+ .with_compartment_id(self.compartment_id)
857
+ .with_ingestion_mode(ingestion_mode)
858
+ .with_time_from(from_timestamp)
859
+ .with_time_to(to_timestamp)
860
+ )
861
+
862
+ if feature_option_details:
863
+ feature_group_job = feature_group_job.with_feature_option_details(
864
+ feature_option_details
865
+ )
866
+
867
+ return feature_group_job
868
+
869
+ def materialise(
870
+ self,
871
+ input_dataframe: Union[DataFrame, pd.DataFrame],
872
+ ingestion_mode: BatchIngestionMode = BatchIngestionMode.OVERWRITE,
873
+ from_timestamp: str = None,
874
+ to_timestamp: str = None,
875
+ feature_option_details: FeatureOptionDetails = None,
876
+ ):
877
+ """
878
+ Executes a feature group job to materialize feature data into the feature store.
879
+
880
+ Args:
881
+ input_dataframe. A pandas/spark DataFrame containing the input data for the feature group job.
882
+ ingestion_mode: Optional. An instance of the IngestionMode enum indicating how to ingest the data into the feature store.
883
+ from_timestamp: Optional. A string representing the lower bound of the time range of data to include in the job.
884
+ to_timestamp: Optional. A string representing the upper bound of the time range of data to include in the job.
885
+ feature_option_details: Optional. An instance of the FeatureOptionDetails class containing feature options.
886
+
887
+ Returns:
888
+ None. This method does not return anything.
889
+
890
+ Raises:
891
+ Any exceptions thrown by the underlying execution strategy or feature store.
892
+
893
+ """
894
+
895
+ # Create Feature Definition Job and persist it
896
+ feature_group_job = self._build_feature_group_job(
897
+ ingestion_mode=ingestion_mode,
898
+ from_timestamp=from_timestamp,
899
+ to_timestamp=to_timestamp,
900
+ feature_option_details=feature_option_details,
901
+ )
902
+
903
+ # Create the Job
904
+ feature_group_job.create()
905
+ # Update the feature group with corresponding job so that user can see the details about the job
906
+ self.with_job_id(feature_group_job.id)
907
+
908
+ feature_group_execution_strategy = (
909
+ OciExecutionStrategyProvider.provide_execution_strategy(
910
+ execution_engine=get_execution_engine_type(input_dataframe),
911
+ metastore_id=get_metastore_id(self.feature_store_id),
912
+ )
913
+ )
914
+
915
+ feature_group_execution_strategy.ingest_feature_definition(
916
+ self, feature_group_job, input_dataframe
917
+ )
918
+
919
+ def materialise_stream(
920
+ self,
921
+ input_dataframe: Union[DataFrame],
922
+ checkpoint_dir: str,
923
+ query_name: Optional[str] = None,
924
+ ingestion_mode: StreamingIngestionMode = StreamingIngestionMode.APPEND,
925
+ await_termination: Optional[bool] = False,
926
+ timeout: Optional[int] = None,
927
+ feature_option_details: FeatureOptionDetails = None,
928
+ ):
929
+ """Ingest a Spark Structured Streaming Dataframe to the feature store.
930
+
931
+ This method creates a long running Spark Streaming Query, you can control the
932
+ termination of the query through the arguments.
933
+
934
+ It is possible to stop the returned query with the `.stop()` and check its
935
+ status with `.isActive`.
936
+
937
+ !!! warning "Engine Support"
938
+ **Spark only**
939
+
940
+ Stream ingestion using Pandas/Python as engine is currently not supported.
941
+ Python/Pandas has no notion of streaming.
942
+
943
+ !!! warning "Data Validation Support"
944
+ `materialise_stream` does not perform any data validation using Great Expectations
945
+ even when a expectation suite is attached.
946
+
947
+ # Arguments
948
+ input_dataframe: Features in Streaming Dataframe to be saved.
949
+ query_name: It is possible to optionally specify a name for the query to
950
+ make it easier to recognise in the Spark UI. Defaults to `None`.
951
+ ingestion_mode: Specifies how data of a streaming DataFrame/Dataset is
952
+ written to a streaming sink. (1) `"append"`: Only the new rows in the
953
+ streaming DataFrame/Dataset will be written to the sink. (2)
954
+ `"complete"`: All the rows in the streaming DataFrame/Dataset will be
955
+ written to the sink every time there is some update. (3) `"update"`:
956
+ only the rows that were updated in the streaming DataFrame/Dataset will
957
+ be written to the sink every time there are some updates.
958
+ If the query doesn’t contain aggregations, it will be equivalent to
959
+ append mode. Defaults to `"append"`.
960
+ await_termination: Waits for the termination of this query, either by
961
+ query.stop() or by an exception. If the query has terminated with an
962
+ exception, then the exception will be thrown. If timeout is set, it
963
+ returns whether the query has terminated or not within the timeout
964
+ seconds. Defaults to `False`.
965
+ timeout: Only relevant in combination with `await_termination=True`.
966
+ Defaults to `None`.
967
+ checkpoint_dir: Checkpoint directory location. This will be used to as a reference to
968
+ from where to resume the streaming job.
969
+
970
+ # Returns
971
+ `StreamingQuery`: Spark Structured Streaming Query object.
972
+ """
973
+
974
+ # Create Feature Definition Job and persist it
975
+ feature_group_job = self._build_feature_group_job(
976
+ ingestion_mode=ingestion_mode,
977
+ feature_option_details=feature_option_details,
978
+ )
979
+
980
+ # Create the Job
981
+ feature_group_job.create()
982
+
983
+ # Update the feature group with corresponding job so that user can see the details about the job
984
+ self.with_job_id(feature_group_job.id)
985
+
986
+ feature_group_execution_strategy = (
987
+ OciExecutionStrategyProvider.provide_execution_strategy(
988
+ execution_engine=get_execution_engine_type(input_dataframe),
989
+ metastore_id=get_metastore_id(self.feature_store_id),
990
+ )
991
+ )
992
+
993
+ return feature_group_execution_strategy.ingest_feature_definition_stream(
994
+ self,
995
+ feature_group_job,
996
+ input_dataframe,
997
+ query_name,
998
+ await_termination,
999
+ timeout,
1000
+ checkpoint_dir,
1001
+ )
1002
+
1003
+ def get_last_job(self) -> "FeatureGroupJob":
1004
+ """Gets the Job details for the last running job.
1005
+
1006
+ Returns:
1007
+ FeatureGroupJob
1008
+ """
1009
+
1010
+ if not self.id:
1011
+ raise ValueError(
1012
+ "FeatureGroup needs to be saved to the feature store before getting associated jobs."
1013
+ )
1014
+
1015
+ if not self.job_id:
1016
+ fg_job = FeatureGroupJob.list(
1017
+ feature_group_id=self.id,
1018
+ compartment_id=self.compartment_id,
1019
+ sort_by="timeCreated",
1020
+ limit="1",
1021
+ )
1022
+ if not fg_job:
1023
+ raise ValueError(
1024
+ "Unable to retrieve the associated last job. Please make sure you materialized the data."
1025
+ )
1026
+ self.with_job_id(fg_job[0].id)
1027
+ return fg_job[0]
1028
+ return FeatureGroupJob.from_id(self.job_id)
1029
+
1030
+ def select(self, features: Optional[List[str]] = ()) -> Query:
1031
+ """
1032
+ Selects a subset of features from the feature group and returns a Query object that can be used to view the
1033
+ resulting dataframe.
1034
+
1035
+ Args:
1036
+ features (Optional[List[str]], optional): A list of feature names to be selected. Defaults to [].
1037
+
1038
+ Returns:
1039
+ Query: A Query object that includes the selected features from the feature group.
1040
+ """
1041
+ self.check_resource_materialization()
1042
+
1043
+ if features:
1044
+ self.__validate_features_exist(features)
1045
+
1046
+ return Query(
1047
+ left_feature_group=self,
1048
+ left_features=features,
1049
+ feature_store_id=self.feature_store_id,
1050
+ entity_id=self.entity_id,
1051
+ )
1052
+
1053
+ def delete(self):
1054
+ """Removes FeatureGroup Resource.
1055
+ Returns
1056
+ -------
1057
+ None
1058
+ """
1059
+ # Create Feature Definition Job and persist it
1060
+ feature_group_job = self._build_feature_group_job(BatchIngestionMode.DEFAULT)
1061
+
1062
+ # Create the Job
1063
+ feature_group_job.create()
1064
+ feature_group_execution_strategy = (
1065
+ OciExecutionStrategyProvider.provide_execution_strategy(
1066
+ execution_engine=ExecutionEngine.SPARK,
1067
+ metastore_id=get_metastore_id(self.feature_store_id),
1068
+ )
1069
+ )
1070
+
1071
+ feature_group_execution_strategy.delete_feature_definition(
1072
+ self, feature_group_job
1073
+ )
1074
+
1075
+ def filter(self, f: Union[Filter, Logic]):
1076
+ """Apply filter to the feature group.
1077
+
1078
+ Selects all features and returns the resulting `Query` with the applied filter.
1079
+
1080
+ ```python
1081
+ fg.filter((fg.feature1 == 1) | (fg.feature2 >= 2))
1082
+ ```
1083
+
1084
+ # Arguments
1085
+ f: Filter object.
1086
+
1087
+ # Returns
1088
+ `Query`. The query object with the applied filter.
1089
+ """
1090
+ return self.select().filter(f)
1091
+
1092
+ @deprecated(details="preview functionality is deprecated. Please use as_of.")
1093
+ def preview(
1094
+ self,
1095
+ row_count: int = 10,
1096
+ version_number: int = None,
1097
+ timestamp: datetime = None,
1098
+ ):
1099
+ """preview the feature definition and return the response in dataframe.
1100
+
1101
+ Parameters
1102
+ ----------
1103
+ timestamp: datetime
1104
+ commit date time to preview in format yyyy-MM-dd or yyyy-MM-dd HH:mm:ss
1105
+ commit date time is maintained for every ingestion commit using delta lake
1106
+ version_number: int
1107
+ commit version number for the preview. Version numbers are automatically versioned for every ingestion
1108
+ commit using delta lake
1109
+ row_count: int
1110
+ preview row count
1111
+
1112
+ Returns
1113
+ -------
1114
+ spark dataframe
1115
+ The preview result in spark dataframe
1116
+ """
1117
+ self.check_resource_materialization()
1118
+
1119
+ validate_delta_format_parameters(timestamp, version_number)
1120
+ target_table = self.target_delta_table()
1121
+
1122
+ if version_number is not None:
1123
+ logger.warning("Time travel queries are not supported in current version")
1124
+
1125
+ sql_query = f"select * from {target_table} LIMIT {row_count}"
1126
+
1127
+ return self.spark_engine.sql(sql_query)
1128
+
1129
+ def as_of(
1130
+ self,
1131
+ version_number: int = None,
1132
+ commit_timestamp: datetime = None,
1133
+ ):
1134
+ """preview the feature definition and return the response in dataframe.
1135
+
1136
+ Parameters
1137
+ ----------
1138
+ commit_timestamp: datetime
1139
+ commit date time to preview in format yyyy-MM-dd or yyyy-MM-dd HH:mm:ss
1140
+ commit date time is maintained for every ingestion commit using delta lake
1141
+ version_number: int
1142
+ commit version number for the preview. Version numbers are automatically versioned for every ingestion
1143
+ commit using delta lake
1144
+
1145
+ Returns
1146
+ -------
1147
+ spark dataframe
1148
+ The preview result in spark dataframe
1149
+ """
1150
+ self.check_resource_materialization()
1151
+
1152
+ validate_delta_format_parameters(commit_timestamp, version_number)
1153
+ target_table = self.target_delta_table()
1154
+
1155
+ return self.spark_engine.get_time_version_data(
1156
+ target_table, version_number, commit_timestamp
1157
+ )
1158
+
1159
+ def profile(self):
1160
+ """get the profile information for feature definition and return the response in dataframe.
1161
+
1162
+ Returns
1163
+ -------
1164
+ spark dataframe
1165
+ The profile result in spark dataframe
1166
+ """
1167
+ self.check_resource_materialization()
1168
+
1169
+ sql_query = f"DESCRIBE DETAIL {self.target_delta_table()}"
1170
+
1171
+ return self.spark_engine.sql(sql_query)
1172
+
1173
+ def restore(self, version_number: int = None, timestamp: datetime = None):
1174
+ """restore the feature definition and return the response in dataframe.
1175
+
1176
+ Parameters
1177
+ ----------
1178
+ timestamp: datetime
1179
+ commit date time to restore in format yyyy-MM-dd or yyyy-MM-dd HH:mm:ss.
1180
+ commit date time is maintained for every ingestion commit using delta lake
1181
+ version_number: int
1182
+ commit version number to restore. Version numbers are automatically versioned for every ingestion
1183
+ commit using delta lake
1184
+ Returns
1185
+ -------
1186
+ spark dataframe
1187
+ The restore output as spark dataframe
1188
+ """
1189
+ self.check_resource_materialization()
1190
+
1191
+ validate_delta_format_parameters(timestamp, version_number, True)
1192
+ target_table = self.target_delta_table()
1193
+ if version_number is not None:
1194
+ sql_query = (
1195
+ f"RESTORE TABLE {target_table} TO VERSION AS OF {version_number}"
1196
+ )
1197
+ else:
1198
+ sql_query = f"RESTORE TABLE {target_table} TO TIMESTAMP AS OF {timestamp}"
1199
+
1200
+ restore_output = self.spark_engine.sql(sql_query)
1201
+
1202
+ feature_group_execution_strategy = (
1203
+ OciExecutionStrategyProvider.provide_execution_strategy(
1204
+ execution_engine=ExecutionEngine.SPARK,
1205
+ metastore_id=get_metastore_id(self.feature_store_id),
1206
+ )
1207
+ )
1208
+
1209
+ feature_group_execution_strategy.update_feature_definition_features(
1210
+ self, target_table
1211
+ )
1212
+
1213
+ return restore_output
1214
+
1215
+ def check_resource_materialization(self):
1216
+ """Checks whether the target Delta table for this resource has been materialized in Spark.
1217
+ If the target Delta table doesn't exist, raises a NotMaterializedError with the type and name of this resource.
1218
+ """
1219
+ if not self.spark_engine.is_delta_table_exists(self.target_delta_table()):
1220
+ raise NotMaterializedError(self.type, self.name)
1221
+
1222
+ def history(self):
1223
+ """get the feature definition commit history.
1224
+
1225
+ Returns
1226
+ -------
1227
+ spark dataframe
1228
+ The history output as spark dataframe
1229
+ """
1230
+ target_table = self.target_delta_table()
1231
+ sql_query = f"DESCRIBE HISTORY {target_table}"
1232
+ return self.spark_engine.sql(sql_query)
1233
+
1234
+ @classmethod
1235
+ def list_df(cls, compartment_id: str = None, **kwargs) -> "pd.DataFrame":
1236
+ """Lists FeatureGroup resources in a given compartment.
1237
+
1238
+ Parameters
1239
+ ----------
1240
+ compartment_id: (str, optional). Defaults to `None`.
1241
+ The compartment OCID.
1242
+ kwargs
1243
+ Additional keyword arguments for filtering models.
1244
+
1245
+ Returns
1246
+ -------
1247
+ pandas.DataFrame
1248
+ The list of the FeatureGroup resources in a pandas dataframe format.
1249
+ """
1250
+ records = []
1251
+ for oci_feature_group in OCIFeatureGroup.list_resource(
1252
+ compartment_id, **kwargs
1253
+ ):
1254
+ oci_feature_group: OCIFeatureGroup = oci_feature_group
1255
+ records.append(oci_feature_group.to_df_record())
1256
+
1257
+ return pd.DataFrame.from_records(records)
1258
+
1259
+ @classmethod
1260
+ def list(cls, compartment_id: str = None, **kwargs) -> List["FeatureGroup"]:
1261
+ """Lists FeatureGroup Resources in a given compartment.
1262
+
1263
+ Parameters
1264
+ ----------
1265
+ compartment_id: (str, optional). Defaults to `None`.
1266
+ The compartment OCID.
1267
+ kwargs
1268
+ Additional keyword arguments for filtering FeatureGroup.
1269
+
1270
+ Returns
1271
+ -------
1272
+ List[FeatureGroup]
1273
+ The list of the FeatureGroup Resources.
1274
+ """
1275
+ return [
1276
+ cls()._update_from_oci_feature_group_model(oci_feature_group)
1277
+ for oci_feature_group in OCIFeatureGroup.list_resource(
1278
+ compartment_id, **kwargs
1279
+ )
1280
+ ]
1281
+
1282
+ @classmethod
1283
+ def from_id(cls, id: str) -> "FeatureGroup":
1284
+ """Gets an existing feature group resource by Id.
1285
+
1286
+ Parameters
1287
+ ----------
1288
+ id: str
1289
+ The feature group id.
1290
+
1291
+ Returns
1292
+ -------
1293
+ FeatureGroup
1294
+ An instance of FeatureGroup resource.
1295
+ """
1296
+ return cls()._update_from_oci_feature_group_model(OCIFeatureGroup.from_id(id))
1297
+
1298
+ def _random_display_name(self):
1299
+ """Generates a random display name."""
1300
+ return f"{self._PREFIX}-{utils.get_random_name_for_resource()}"
1301
+
1302
+ def to_dict(self) -> Dict:
1303
+ """Serializes feature group to a dictionary.
1304
+
1305
+ Returns
1306
+ -------
1307
+ dict
1308
+ The feature group resource serialized as a dictionary.
1309
+ """
1310
+
1311
+ spec = deepcopy(self._spec)
1312
+ for key, value in spec.items():
1313
+ if hasattr(value, "to_dict"):
1314
+ value = value.to_dict()
1315
+ spec[key] = value
1316
+
1317
+ return {
1318
+ "kind": self.kind,
1319
+ "type": self.type,
1320
+ "spec": utils.batch_convert_case(spec, "camel"),
1321
+ }
1322
+
1323
+ def __repr__(self) -> str:
1324
+ """Displays the object as YAML."""
1325
+ return self.to_yaml()
1326
+
1327
+ def show(self, rankdir: str = GraphOrientation.LEFT_RIGHT) -> None:
1328
+ """
1329
+ Show the lineage tree for the feature_group instance.
1330
+
1331
+ Raises:
1332
+ ValueError: If lineage graph cannot be plotted due to missing lineage information.
1333
+ """
1334
+ lineage = self.lineage.from_id(self.id)
1335
+ if lineage:
1336
+ GraphService.view_lineage(lineage.data, EntityType.FEATURE_GROUP, rankdir)
1337
+ else:
1338
+ raise ValueError(
1339
+ f"Can't get lineage information for Feature group id {self.id}"
1340
+ )
1341
+
1342
+ def __validate_features_exist(self, features: List[str]) -> None:
1343
+ """
1344
+ Validates whether each feature in the input list is present in the output features list.
1345
+
1346
+ Args:
1347
+ features (List[str]): A list of feature names to validate.
1348
+
1349
+ Raises:
1350
+ ValueError: If any feature in the input list is not present in the output features list.
1351
+ """
1352
+ # Get a list of output feature names
1353
+ output_feature_names = [
1354
+ output_feature.feature_name for output_feature in self.features
1355
+ ]
1356
+
1357
+ # Initialize an empty list to store non-existing features
1358
+ non_existing_features = []
1359
+
1360
+ # Check if each feature in the input list is present in the output features list
1361
+ for feature in features:
1362
+ if feature not in output_feature_names:
1363
+ non_existing_features.append(feature)
1364
+
1365
+ # If there are any non-existing features, raise a ValueError
1366
+ if len(non_existing_features) != 0:
1367
+ raise ValueError(
1368
+ f"Features {non_existing_features} are not defined in the feature group."
1369
+ )
1370
+
1371
+ def get_feature(self, name: str):
1372
+ """Retrieve a `Feature` object from the schema of the feature group.
1373
+
1374
+ There are several ways to access features of a feature group:
1375
+
1376
+ ```python
1377
+ fg.feature1
1378
+ fg.get_feature("feature1")
1379
+ ```
1380
+
1381
+ Args:
1382
+ name (str): [description]
1383
+
1384
+ Returns:
1385
+ [type]: [description]
1386
+ """
1387
+ try:
1388
+ return self.__getitem__(name)
1389
+ except KeyError:
1390
+ raise ValueError(f"'FeatureGroup' object has no feature called '{name}'.")
1391
+
1392
+ def _get_job_id(self, job_id: str = None) -> str:
1393
+ """
1394
+ Helper function to determine the job ID based on the given input or the last run job.
1395
+
1396
+ Args:
1397
+ job_id (str): Job ID provided by the user.
1398
+
1399
+ Returns:
1400
+ str: Job ID to be used.
1401
+ """
1402
+ if job_id is not None:
1403
+ return job_id
1404
+
1405
+ if self.job_id is None:
1406
+ raise ValueError(
1407
+ "Unable to retrieve the last job. Please provide the job ID and make sure you materialized the data."
1408
+ )
1409
+
1410
+ return self.job_id
1411
+
1412
+ def get_statistics(self, job_id: str = None) -> "Statistics":
1413
+ """Retrieve Statistics object for the job with job_id
1414
+ if job_id is not specified the last run job will be considered.
1415
+ Args:
1416
+ job_id (str): [job id of the job for which the statistics need to be calculated]
1417
+
1418
+ Returns:
1419
+ [type]: [Statistics]
1420
+ """
1421
+
1422
+ if not self.id:
1423
+ raise ValueError(
1424
+ "FeatureGroup needs to be saved to the feature store before retrieving the statistics"
1425
+ )
1426
+
1427
+ stat_job_id = job_id if job_id is not None else self.get_last_job().id
1428
+
1429
+ # TODO: take the one in memory or will list down job ids and find the latest
1430
+ fg_job = FeatureGroupJob.from_id(stat_job_id)
1431
+ if self.id != fg_job.feature_group_id:
1432
+ raise ValueError(
1433
+ "The specified job id does not belong to this feature group"
1434
+ )
1435
+ output_details = fg_job.job_output_details
1436
+ feature_statistics = (
1437
+ output_details.get("featureStatistics") if output_details else None
1438
+ )
1439
+ stat_version = output_details.get("version") if output_details else None
1440
+ version = stat_version if stat_version is not None else 1
1441
+
1442
+ return Statistics(feature_statistics, version)
1443
+
1444
+ def get_validation_output(self, job_id: str = None) -> "ValidationOutput":
1445
+ """Retrieve validation report for the job with job_id
1446
+ if job_id is not specified the last run job will be considered.
1447
+ Args:
1448
+ job_id (str): [job id of the job for which the validation report need to be retrieved]
1449
+
1450
+ Returns:
1451
+ ValidationOutput
1452
+ """
1453
+
1454
+ if not self.id:
1455
+ raise ValueError(
1456
+ "FeatureGroup needs to be saved to the feature store before retrieving the validation report"
1457
+ )
1458
+
1459
+ validation_job_id = job_id if job_id is not None else self.get_last_job().id
1460
+
1461
+ # Retrieve the validation output JSON from data_flow_batch_execution_output.
1462
+ fg_job = FeatureGroupJob.from_id(validation_job_id)
1463
+ output_details = fg_job.job_output_details
1464
+ validation_output = (
1465
+ output_details.get("validationOutput") if output_details else None
1466
+ )
1467
+
1468
+ return ValidationOutput(validation_output)
1469
+
1470
+ def __getattr__(self, name):
1471
+ try:
1472
+ return self.__getitem__(name)
1473
+ except KeyError:
1474
+ raise AttributeError(
1475
+ f"'FeatureGroup' object has no attribute '{name}'. "
1476
+ "If you are trying to access a feature, fall back on "
1477
+ "using the `get_feature` method."
1478
+ )
1479
+
1480
+ def __getitem__(self, name):
1481
+ if not isinstance(name, str):
1482
+ raise TypeError(
1483
+ f"Expected type `str`, got `{type(name)}`. "
1484
+ "Features are accessible by name."
1485
+ )
1486
+ feature = [
1487
+ feature
1488
+ for feature in self.__getattribute__("features")
1489
+ if feature.feature_name == name
1490
+ ]
1491
+ if len(feature) == 1:
1492
+ return feature[0]
1493
+ else:
1494
+ raise KeyError(f"'FeatureGroup' object has no feature called '{name}'.")