oracle-ads 2.13.9rc0__py3-none-any.whl → 2.13.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (858) hide show
  1. ads/aqua/__init__.py +40 -0
  2. ads/aqua/app.py +507 -0
  3. ads/aqua/cli.py +96 -0
  4. ads/aqua/client/__init__.py +3 -0
  5. ads/aqua/client/client.py +836 -0
  6. ads/aqua/client/openai_client.py +305 -0
  7. ads/aqua/common/__init__.py +5 -0
  8. ads/aqua/common/decorator.py +125 -0
  9. ads/aqua/common/entities.py +274 -0
  10. ads/aqua/common/enums.py +134 -0
  11. ads/aqua/common/errors.py +109 -0
  12. ads/aqua/common/utils.py +1295 -0
  13. ads/aqua/config/__init__.py +4 -0
  14. ads/aqua/config/container_config.py +246 -0
  15. ads/aqua/config/evaluation/__init__.py +4 -0
  16. ads/aqua/config/evaluation/evaluation_service_config.py +147 -0
  17. ads/aqua/config/utils/__init__.py +4 -0
  18. ads/aqua/config/utils/serializer.py +339 -0
  19. ads/aqua/constants.py +116 -0
  20. ads/aqua/data.py +14 -0
  21. ads/aqua/dummy_data/icon.txt +1 -0
  22. ads/aqua/dummy_data/oci_model_deployments.json +56 -0
  23. ads/aqua/dummy_data/oci_models.json +1 -0
  24. ads/aqua/dummy_data/readme.md +26 -0
  25. ads/aqua/evaluation/__init__.py +8 -0
  26. ads/aqua/evaluation/constants.py +53 -0
  27. ads/aqua/evaluation/entities.py +186 -0
  28. ads/aqua/evaluation/errors.py +70 -0
  29. ads/aqua/evaluation/evaluation.py +1814 -0
  30. ads/aqua/extension/__init__.py +42 -0
  31. ads/aqua/extension/aqua_ws_msg_handler.py +76 -0
  32. ads/aqua/extension/base_handler.py +90 -0
  33. ads/aqua/extension/common_handler.py +121 -0
  34. ads/aqua/extension/common_ws_msg_handler.py +36 -0
  35. ads/aqua/extension/deployment_handler.py +381 -0
  36. ads/aqua/extension/deployment_ws_msg_handler.py +54 -0
  37. ads/aqua/extension/errors.py +30 -0
  38. ads/aqua/extension/evaluation_handler.py +129 -0
  39. ads/aqua/extension/evaluation_ws_msg_handler.py +61 -0
  40. ads/aqua/extension/finetune_handler.py +96 -0
  41. ads/aqua/extension/model_handler.py +390 -0
  42. ads/aqua/extension/models/__init__.py +0 -0
  43. ads/aqua/extension/models/ws_models.py +145 -0
  44. ads/aqua/extension/models_ws_msg_handler.py +50 -0
  45. ads/aqua/extension/ui_handler.py +300 -0
  46. ads/aqua/extension/ui_websocket_handler.py +130 -0
  47. ads/aqua/extension/utils.py +133 -0
  48. ads/aqua/finetuning/__init__.py +7 -0
  49. ads/aqua/finetuning/constants.py +23 -0
  50. ads/aqua/finetuning/entities.py +181 -0
  51. ads/aqua/finetuning/finetuning.py +749 -0
  52. ads/aqua/model/__init__.py +8 -0
  53. ads/aqua/model/constants.py +60 -0
  54. ads/aqua/model/entities.py +385 -0
  55. ads/aqua/model/enums.py +32 -0
  56. ads/aqua/model/model.py +2134 -0
  57. ads/aqua/model/utils.py +52 -0
  58. ads/aqua/modeldeployment/__init__.py +6 -0
  59. ads/aqua/modeldeployment/constants.py +10 -0
  60. ads/aqua/modeldeployment/deployment.py +1315 -0
  61. ads/aqua/modeldeployment/entities.py +653 -0
  62. ads/aqua/modeldeployment/utils.py +543 -0
  63. ads/aqua/resources/gpu_shapes_index.json +94 -0
  64. ads/aqua/server/__init__.py +4 -0
  65. ads/aqua/server/__main__.py +24 -0
  66. ads/aqua/server/app.py +47 -0
  67. ads/aqua/server/aqua_spec.yml +1291 -0
  68. ads/aqua/training/__init__.py +4 -0
  69. ads/aqua/training/exceptions.py +476 -0
  70. ads/aqua/ui.py +519 -0
  71. ads/automl/__init__.py +9 -0
  72. ads/automl/driver.py +330 -0
  73. ads/automl/provider.py +975 -0
  74. ads/bds/__init__.py +5 -0
  75. ads/bds/auth.py +127 -0
  76. ads/bds/big_data_service.py +255 -0
  77. ads/catalog/__init__.py +19 -0
  78. ads/catalog/model.py +1576 -0
  79. ads/catalog/notebook.py +461 -0
  80. ads/catalog/project.py +468 -0
  81. ads/catalog/summary.py +178 -0
  82. ads/common/__init__.py +11 -0
  83. ads/common/analyzer.py +65 -0
  84. ads/common/artifact/.model-ignore +63 -0
  85. ads/common/artifact/__init__.py +10 -0
  86. ads/common/auth.py +1122 -0
  87. ads/common/card_identifier.py +83 -0
  88. ads/common/config.py +647 -0
  89. ads/common/data.py +165 -0
  90. ads/common/decorator/__init__.py +9 -0
  91. ads/common/decorator/argument_to_case.py +88 -0
  92. ads/common/decorator/deprecate.py +69 -0
  93. ads/common/decorator/require_nonempty_arg.py +65 -0
  94. ads/common/decorator/runtime_dependency.py +178 -0
  95. ads/common/decorator/threaded.py +97 -0
  96. ads/common/decorator/utils.py +35 -0
  97. ads/common/dsc_file_system.py +303 -0
  98. ads/common/error.py +14 -0
  99. ads/common/extended_enum.py +81 -0
  100. ads/common/function/__init__.py +5 -0
  101. ads/common/function/fn_util.py +142 -0
  102. ads/common/function/func_conf.yaml +25 -0
  103. ads/common/ipython.py +76 -0
  104. ads/common/model.py +679 -0
  105. ads/common/model_artifact.py +1759 -0
  106. ads/common/model_artifact_schema.json +107 -0
  107. ads/common/model_export_util.py +664 -0
  108. ads/common/model_metadata.py +24 -0
  109. ads/common/object_storage_details.py +296 -0
  110. ads/common/oci_client.py +179 -0
  111. ads/common/oci_datascience.py +46 -0
  112. ads/common/oci_logging.py +1144 -0
  113. ads/common/oci_mixin.py +957 -0
  114. ads/common/oci_resource.py +136 -0
  115. ads/common/serializer.py +559 -0
  116. ads/common/utils.py +1852 -0
  117. ads/common/word_lists.py +1491 -0
  118. ads/common/work_request.py +189 -0
  119. ads/config.py +1 -0
  120. ads/data_labeling/__init__.py +13 -0
  121. ads/data_labeling/boundingbox.py +253 -0
  122. ads/data_labeling/constants.py +47 -0
  123. ads/data_labeling/data_labeling_service.py +244 -0
  124. ads/data_labeling/interface/__init__.py +5 -0
  125. ads/data_labeling/interface/loader.py +16 -0
  126. ads/data_labeling/interface/parser.py +16 -0
  127. ads/data_labeling/interface/reader.py +23 -0
  128. ads/data_labeling/loader/__init__.py +5 -0
  129. ads/data_labeling/loader/file_loader.py +241 -0
  130. ads/data_labeling/metadata.py +110 -0
  131. ads/data_labeling/mixin/__init__.py +5 -0
  132. ads/data_labeling/mixin/data_labeling.py +232 -0
  133. ads/data_labeling/ner.py +129 -0
  134. ads/data_labeling/parser/__init__.py +5 -0
  135. ads/data_labeling/parser/dls_record_parser.py +388 -0
  136. ads/data_labeling/parser/export_metadata_parser.py +94 -0
  137. ads/data_labeling/parser/export_record_parser.py +473 -0
  138. ads/data_labeling/reader/__init__.py +5 -0
  139. ads/data_labeling/reader/dataset_reader.py +574 -0
  140. ads/data_labeling/reader/dls_record_reader.py +121 -0
  141. ads/data_labeling/reader/export_record_reader.py +62 -0
  142. ads/data_labeling/reader/jsonl_reader.py +75 -0
  143. ads/data_labeling/reader/metadata_reader.py +203 -0
  144. ads/data_labeling/reader/record_reader.py +263 -0
  145. ads/data_labeling/record.py +52 -0
  146. ads/data_labeling/visualizer/__init__.py +5 -0
  147. ads/data_labeling/visualizer/image_visualizer.py +525 -0
  148. ads/data_labeling/visualizer/text_visualizer.py +357 -0
  149. ads/database/__init__.py +5 -0
  150. ads/database/connection.py +338 -0
  151. ads/dataset/__init__.py +10 -0
  152. ads/dataset/capabilities.md +51 -0
  153. ads/dataset/classification_dataset.py +339 -0
  154. ads/dataset/correlation.py +226 -0
  155. ads/dataset/correlation_plot.py +563 -0
  156. ads/dataset/dask_series.py +173 -0
  157. ads/dataset/dataframe_transformer.py +110 -0
  158. ads/dataset/dataset.py +1979 -0
  159. ads/dataset/dataset_browser.py +360 -0
  160. ads/dataset/dataset_with_target.py +995 -0
  161. ads/dataset/exception.py +25 -0
  162. ads/dataset/factory.py +987 -0
  163. ads/dataset/feature_engineering_transformer.py +35 -0
  164. ads/dataset/feature_selection.py +107 -0
  165. ads/dataset/forecasting_dataset.py +26 -0
  166. ads/dataset/helper.py +1450 -0
  167. ads/dataset/label_encoder.py +99 -0
  168. ads/dataset/mixin/__init__.py +5 -0
  169. ads/dataset/mixin/dataset_accessor.py +134 -0
  170. ads/dataset/pipeline.py +58 -0
  171. ads/dataset/plot.py +710 -0
  172. ads/dataset/progress.py +86 -0
  173. ads/dataset/recommendation.py +297 -0
  174. ads/dataset/recommendation_transformer.py +502 -0
  175. ads/dataset/regression_dataset.py +14 -0
  176. ads/dataset/sampled_dataset.py +1050 -0
  177. ads/dataset/target.py +98 -0
  178. ads/dataset/timeseries.py +18 -0
  179. ads/dbmixin/__init__.py +5 -0
  180. ads/dbmixin/db_pandas_accessor.py +153 -0
  181. ads/environment/__init__.py +9 -0
  182. ads/environment/ml_runtime.py +66 -0
  183. ads/evaluations/README.md +14 -0
  184. ads/evaluations/__init__.py +109 -0
  185. ads/evaluations/evaluation_plot.py +983 -0
  186. ads/evaluations/evaluator.py +1334 -0
  187. ads/evaluations/statistical_metrics.py +543 -0
  188. ads/experiments/__init__.py +9 -0
  189. ads/experiments/capabilities.md +0 -0
  190. ads/explanations/__init__.py +21 -0
  191. ads/explanations/base_explainer.py +142 -0
  192. ads/explanations/capabilities.md +83 -0
  193. ads/explanations/explainer.py +190 -0
  194. ads/explanations/mlx_global_explainer.py +1050 -0
  195. ads/explanations/mlx_interface.py +386 -0
  196. ads/explanations/mlx_local_explainer.py +287 -0
  197. ads/explanations/mlx_whatif_explainer.py +201 -0
  198. ads/feature_engineering/__init__.py +20 -0
  199. ads/feature_engineering/accessor/__init__.py +5 -0
  200. ads/feature_engineering/accessor/dataframe_accessor.py +535 -0
  201. ads/feature_engineering/accessor/mixin/__init__.py +5 -0
  202. ads/feature_engineering/accessor/mixin/correlation.py +166 -0
  203. ads/feature_engineering/accessor/mixin/eda_mixin.py +266 -0
  204. ads/feature_engineering/accessor/mixin/eda_mixin_series.py +85 -0
  205. ads/feature_engineering/accessor/mixin/feature_types_mixin.py +211 -0
  206. ads/feature_engineering/accessor/mixin/utils.py +65 -0
  207. ads/feature_engineering/accessor/series_accessor.py +431 -0
  208. ads/feature_engineering/adsimage/__init__.py +5 -0
  209. ads/feature_engineering/adsimage/image.py +192 -0
  210. ads/feature_engineering/adsimage/image_reader.py +170 -0
  211. ads/feature_engineering/adsimage/interface/__init__.py +5 -0
  212. ads/feature_engineering/adsimage/interface/reader.py +19 -0
  213. ads/feature_engineering/adsstring/__init__.py +7 -0
  214. ads/feature_engineering/adsstring/oci_language/__init__.py +8 -0
  215. ads/feature_engineering/adsstring/string/__init__.py +8 -0
  216. ads/feature_engineering/data_schema.json +57 -0
  217. ads/feature_engineering/dataset/__init__.py +5 -0
  218. ads/feature_engineering/dataset/zip_code_data.py +42062 -0
  219. ads/feature_engineering/exceptions.py +40 -0
  220. ads/feature_engineering/feature_type/__init__.py +133 -0
  221. ads/feature_engineering/feature_type/address.py +184 -0
  222. ads/feature_engineering/feature_type/adsstring/__init__.py +5 -0
  223. ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +164 -0
  224. ads/feature_engineering/feature_type/adsstring/oci_language.py +93 -0
  225. ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +5 -0
  226. ads/feature_engineering/feature_type/adsstring/parsers/base.py +47 -0
  227. ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +96 -0
  228. ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +221 -0
  229. ads/feature_engineering/feature_type/adsstring/string.py +258 -0
  230. ads/feature_engineering/feature_type/base.py +58 -0
  231. ads/feature_engineering/feature_type/boolean.py +183 -0
  232. ads/feature_engineering/feature_type/category.py +146 -0
  233. ads/feature_engineering/feature_type/constant.py +137 -0
  234. ads/feature_engineering/feature_type/continuous.py +151 -0
  235. ads/feature_engineering/feature_type/creditcard.py +314 -0
  236. ads/feature_engineering/feature_type/datetime.py +190 -0
  237. ads/feature_engineering/feature_type/discrete.py +134 -0
  238. ads/feature_engineering/feature_type/document.py +43 -0
  239. ads/feature_engineering/feature_type/gis.py +251 -0
  240. ads/feature_engineering/feature_type/handler/__init__.py +5 -0
  241. ads/feature_engineering/feature_type/handler/feature_validator.py +524 -0
  242. ads/feature_engineering/feature_type/handler/feature_warning.py +319 -0
  243. ads/feature_engineering/feature_type/handler/warnings.py +128 -0
  244. ads/feature_engineering/feature_type/integer.py +142 -0
  245. ads/feature_engineering/feature_type/ip_address.py +144 -0
  246. ads/feature_engineering/feature_type/ip_address_v4.py +138 -0
  247. ads/feature_engineering/feature_type/ip_address_v6.py +138 -0
  248. ads/feature_engineering/feature_type/lat_long.py +256 -0
  249. ads/feature_engineering/feature_type/object.py +43 -0
  250. ads/feature_engineering/feature_type/ordinal.py +132 -0
  251. ads/feature_engineering/feature_type/phone_number.py +135 -0
  252. ads/feature_engineering/feature_type/string.py +171 -0
  253. ads/feature_engineering/feature_type/text.py +93 -0
  254. ads/feature_engineering/feature_type/unknown.py +43 -0
  255. ads/feature_engineering/feature_type/zip_code.py +164 -0
  256. ads/feature_engineering/feature_type_manager.py +406 -0
  257. ads/feature_engineering/schema.py +795 -0
  258. ads/feature_engineering/utils.py +245 -0
  259. ads/feature_store/.readthedocs.yaml +19 -0
  260. ads/feature_store/README.md +65 -0
  261. ads/feature_store/__init__.py +9 -0
  262. ads/feature_store/common/__init__.py +0 -0
  263. ads/feature_store/common/enums.py +339 -0
  264. ads/feature_store/common/exceptions.py +18 -0
  265. ads/feature_store/common/spark_session_singleton.py +125 -0
  266. ads/feature_store/common/utils/__init__.py +0 -0
  267. ads/feature_store/common/utils/base64_encoder_decoder.py +72 -0
  268. ads/feature_store/common/utils/feature_schema_mapper.py +283 -0
  269. ads/feature_store/common/utils/transformation_utils.py +82 -0
  270. ads/feature_store/common/utils/utility.py +403 -0
  271. ads/feature_store/data_validation/__init__.py +0 -0
  272. ads/feature_store/data_validation/great_expectation.py +129 -0
  273. ads/feature_store/dataset.py +1230 -0
  274. ads/feature_store/dataset_job.py +530 -0
  275. ads/feature_store/docs/Dockerfile +7 -0
  276. ads/feature_store/docs/Makefile +44 -0
  277. ads/feature_store/docs/conf.py +28 -0
  278. ads/feature_store/docs/requirements.txt +14 -0
  279. ads/feature_store/docs/source/ads.feature_store.query.rst +20 -0
  280. ads/feature_store/docs/source/cicd.rst +137 -0
  281. ads/feature_store/docs/source/conf.py +86 -0
  282. ads/feature_store/docs/source/data_versioning.rst +33 -0
  283. ads/feature_store/docs/source/dataset.rst +388 -0
  284. ads/feature_store/docs/source/dataset_job.rst +27 -0
  285. ads/feature_store/docs/source/demo.rst +70 -0
  286. ads/feature_store/docs/source/entity.rst +78 -0
  287. ads/feature_store/docs/source/feature_group.rst +624 -0
  288. ads/feature_store/docs/source/feature_group_job.rst +29 -0
  289. ads/feature_store/docs/source/feature_store.rst +122 -0
  290. ads/feature_store/docs/source/feature_store_class.rst +123 -0
  291. ads/feature_store/docs/source/feature_validation.rst +66 -0
  292. ads/feature_store/docs/source/figures/cicd.png +0 -0
  293. ads/feature_store/docs/source/figures/data_validation.png +0 -0
  294. ads/feature_store/docs/source/figures/data_versioning.png +0 -0
  295. ads/feature_store/docs/source/figures/dataset.gif +0 -0
  296. ads/feature_store/docs/source/figures/dataset.png +0 -0
  297. ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
  298. ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
  299. ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
  300. ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
  301. ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
  302. ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
  303. ads/feature_store/docs/source/figures/entity.png +0 -0
  304. ads/feature_store/docs/source/figures/feature_group.png +0 -0
  305. ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
  306. ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
  307. ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
  308. ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
  309. ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
  310. ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
  311. ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
  312. ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
  313. ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
  314. ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
  315. ads/feature_store/docs/source/figures/overview.png +0 -0
  316. ads/feature_store/docs/source/figures/resource_manager.png +0 -0
  317. ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
  318. ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
  319. ads/feature_store/docs/source/figures/stats_1.png +0 -0
  320. ads/feature_store/docs/source/figures/stats_2.png +0 -0
  321. ads/feature_store/docs/source/figures/stats_d.png +0 -0
  322. ads/feature_store/docs/source/figures/stats_fg.png +0 -0
  323. ads/feature_store/docs/source/figures/transformation.png +0 -0
  324. ads/feature_store/docs/source/figures/transformations.gif +0 -0
  325. ads/feature_store/docs/source/figures/validation.png +0 -0
  326. ads/feature_store/docs/source/figures/validation_fg.png +0 -0
  327. ads/feature_store/docs/source/figures/validation_results.png +0 -0
  328. ads/feature_store/docs/source/figures/validation_summary.png +0 -0
  329. ads/feature_store/docs/source/index.rst +81 -0
  330. ads/feature_store/docs/source/module.rst +8 -0
  331. ads/feature_store/docs/source/notebook.rst +94 -0
  332. ads/feature_store/docs/source/overview.rst +47 -0
  333. ads/feature_store/docs/source/quickstart.rst +176 -0
  334. ads/feature_store/docs/source/release_notes.rst +194 -0
  335. ads/feature_store/docs/source/setup_feature_store.rst +81 -0
  336. ads/feature_store/docs/source/statistics.rst +58 -0
  337. ads/feature_store/docs/source/transformation.rst +199 -0
  338. ads/feature_store/docs/source/ui.rst +65 -0
  339. ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +66 -0
  340. ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +192 -0
  341. ads/feature_store/docs/source/user_guides.setup.terraform.rst +338 -0
  342. ads/feature_store/entity.py +718 -0
  343. ads/feature_store/execution_strategy/__init__.py +0 -0
  344. ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
  345. ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +375 -0
  346. ads/feature_store/execution_strategy/engine/__init__.py +0 -0
  347. ads/feature_store/execution_strategy/engine/spark_engine.py +316 -0
  348. ads/feature_store/execution_strategy/execution_strategy.py +113 -0
  349. ads/feature_store/execution_strategy/execution_strategy_provider.py +47 -0
  350. ads/feature_store/execution_strategy/spark/__init__.py +0 -0
  351. ads/feature_store/execution_strategy/spark/spark_execution.py +618 -0
  352. ads/feature_store/feature.py +192 -0
  353. ads/feature_store/feature_group.py +1494 -0
  354. ads/feature_store/feature_group_expectation.py +346 -0
  355. ads/feature_store/feature_group_job.py +602 -0
  356. ads/feature_store/feature_lineage/__init__.py +0 -0
  357. ads/feature_store/feature_lineage/graphviz_service.py +180 -0
  358. ads/feature_store/feature_option_details.py +50 -0
  359. ads/feature_store/feature_statistics/__init__.py +0 -0
  360. ads/feature_store/feature_statistics/statistics_service.py +99 -0
  361. ads/feature_store/feature_store.py +699 -0
  362. ads/feature_store/feature_store_registrar.py +518 -0
  363. ads/feature_store/input_feature_detail.py +149 -0
  364. ads/feature_store/mixin/__init__.py +4 -0
  365. ads/feature_store/mixin/oci_feature_store.py +145 -0
  366. ads/feature_store/model_details.py +73 -0
  367. ads/feature_store/query/__init__.py +0 -0
  368. ads/feature_store/query/filter.py +266 -0
  369. ads/feature_store/query/generator/__init__.py +0 -0
  370. ads/feature_store/query/generator/query_generator.py +298 -0
  371. ads/feature_store/query/join.py +161 -0
  372. ads/feature_store/query/query.py +403 -0
  373. ads/feature_store/query/validator/__init__.py +0 -0
  374. ads/feature_store/query/validator/query_validator.py +57 -0
  375. ads/feature_store/response/__init__.py +0 -0
  376. ads/feature_store/response/response_builder.py +68 -0
  377. ads/feature_store/service/__init__.py +0 -0
  378. ads/feature_store/service/oci_dataset.py +139 -0
  379. ads/feature_store/service/oci_dataset_job.py +199 -0
  380. ads/feature_store/service/oci_entity.py +125 -0
  381. ads/feature_store/service/oci_feature_group.py +164 -0
  382. ads/feature_store/service/oci_feature_group_job.py +214 -0
  383. ads/feature_store/service/oci_feature_store.py +182 -0
  384. ads/feature_store/service/oci_lineage.py +87 -0
  385. ads/feature_store/service/oci_transformation.py +104 -0
  386. ads/feature_store/statistics/__init__.py +0 -0
  387. ads/feature_store/statistics/abs_feature_value.py +49 -0
  388. ads/feature_store/statistics/charts/__init__.py +0 -0
  389. ads/feature_store/statistics/charts/abstract_feature_plot.py +37 -0
  390. ads/feature_store/statistics/charts/box_plot.py +148 -0
  391. ads/feature_store/statistics/charts/frequency_distribution.py +65 -0
  392. ads/feature_store/statistics/charts/probability_distribution.py +68 -0
  393. ads/feature_store/statistics/charts/top_k_frequent_elements.py +98 -0
  394. ads/feature_store/statistics/feature_stat.py +126 -0
  395. ads/feature_store/statistics/generic_feature_value.py +33 -0
  396. ads/feature_store/statistics/statistics.py +41 -0
  397. ads/feature_store/statistics_config.py +101 -0
  398. ads/feature_store/templates/feature_store_template.yaml +45 -0
  399. ads/feature_store/transformation.py +499 -0
  400. ads/feature_store/validation_output.py +57 -0
  401. ads/hpo/__init__.py +9 -0
  402. ads/hpo/_imports.py +91 -0
  403. ads/hpo/ads_search_space.py +439 -0
  404. ads/hpo/distributions.py +325 -0
  405. ads/hpo/objective.py +280 -0
  406. ads/hpo/search_cv.py +1657 -0
  407. ads/hpo/stopping_criterion.py +75 -0
  408. ads/hpo/tuner_artifact.py +413 -0
  409. ads/hpo/utils.py +91 -0
  410. ads/hpo/validation.py +140 -0
  411. ads/hpo/visualization/__init__.py +5 -0
  412. ads/hpo/visualization/_contour.py +23 -0
  413. ads/hpo/visualization/_edf.py +20 -0
  414. ads/hpo/visualization/_intermediate_values.py +21 -0
  415. ads/hpo/visualization/_optimization_history.py +25 -0
  416. ads/hpo/visualization/_parallel_coordinate.py +169 -0
  417. ads/hpo/visualization/_param_importances.py +26 -0
  418. ads/jobs/__init__.py +53 -0
  419. ads/jobs/ads_job.py +663 -0
  420. ads/jobs/builders/__init__.py +5 -0
  421. ads/jobs/builders/base.py +156 -0
  422. ads/jobs/builders/infrastructure/__init__.py +6 -0
  423. ads/jobs/builders/infrastructure/base.py +165 -0
  424. ads/jobs/builders/infrastructure/dataflow.py +1252 -0
  425. ads/jobs/builders/infrastructure/dsc_job.py +1894 -0
  426. ads/jobs/builders/infrastructure/dsc_job_runtime.py +1233 -0
  427. ads/jobs/builders/infrastructure/utils.py +65 -0
  428. ads/jobs/builders/runtimes/__init__.py +5 -0
  429. ads/jobs/builders/runtimes/artifact.py +338 -0
  430. ads/jobs/builders/runtimes/base.py +325 -0
  431. ads/jobs/builders/runtimes/container_runtime.py +242 -0
  432. ads/jobs/builders/runtimes/python_runtime.py +1016 -0
  433. ads/jobs/builders/runtimes/pytorch_runtime.py +204 -0
  434. ads/jobs/cli.py +104 -0
  435. ads/jobs/env_var_parser.py +131 -0
  436. ads/jobs/extension.py +160 -0
  437. ads/jobs/schema/__init__.py +5 -0
  438. ads/jobs/schema/infrastructure_schema.json +116 -0
  439. ads/jobs/schema/job_schema.json +42 -0
  440. ads/jobs/schema/runtime_schema.json +183 -0
  441. ads/jobs/schema/validator.py +141 -0
  442. ads/jobs/serializer.py +296 -0
  443. ads/jobs/templates/__init__.py +5 -0
  444. ads/jobs/templates/container.py +6 -0
  445. ads/jobs/templates/driver_notebook.py +177 -0
  446. ads/jobs/templates/driver_oci.py +500 -0
  447. ads/jobs/templates/driver_python.py +48 -0
  448. ads/jobs/templates/driver_pytorch.py +852 -0
  449. ads/jobs/templates/driver_utils.py +615 -0
  450. ads/jobs/templates/hostname_from_env.c +55 -0
  451. ads/jobs/templates/oci_metrics.py +181 -0
  452. ads/jobs/utils.py +104 -0
  453. ads/llm/__init__.py +28 -0
  454. ads/llm/autogen/__init__.py +2 -0
  455. ads/llm/autogen/constants.py +15 -0
  456. ads/llm/autogen/reports/__init__.py +2 -0
  457. ads/llm/autogen/reports/base.py +67 -0
  458. ads/llm/autogen/reports/data.py +103 -0
  459. ads/llm/autogen/reports/session.py +526 -0
  460. ads/llm/autogen/reports/templates/chat_box.html +13 -0
  461. ads/llm/autogen/reports/templates/chat_box_lt.html +5 -0
  462. ads/llm/autogen/reports/templates/chat_box_rt.html +6 -0
  463. ads/llm/autogen/reports/utils.py +56 -0
  464. ads/llm/autogen/v02/__init__.py +4 -0
  465. ads/llm/autogen/v02/client.py +295 -0
  466. ads/llm/autogen/v02/log_handlers/__init__.py +2 -0
  467. ads/llm/autogen/v02/log_handlers/oci_file_handler.py +83 -0
  468. ads/llm/autogen/v02/loggers/__init__.py +6 -0
  469. ads/llm/autogen/v02/loggers/metric_logger.py +320 -0
  470. ads/llm/autogen/v02/loggers/session_logger.py +580 -0
  471. ads/llm/autogen/v02/loggers/utils.py +86 -0
  472. ads/llm/autogen/v02/runtime_logging.py +163 -0
  473. ads/llm/chain.py +268 -0
  474. ads/llm/chat_template.py +31 -0
  475. ads/llm/deploy.py +63 -0
  476. ads/llm/guardrails/__init__.py +5 -0
  477. ads/llm/guardrails/base.py +442 -0
  478. ads/llm/guardrails/huggingface.py +44 -0
  479. ads/llm/langchain/__init__.py +5 -0
  480. ads/llm/langchain/plugins/__init__.py +5 -0
  481. ads/llm/langchain/plugins/chat_models/__init__.py +5 -0
  482. ads/llm/langchain/plugins/chat_models/oci_data_science.py +1027 -0
  483. ads/llm/langchain/plugins/embeddings/__init__.py +4 -0
  484. ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +184 -0
  485. ads/llm/langchain/plugins/llms/__init__.py +5 -0
  486. ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +979 -0
  487. ads/llm/requirements.txt +3 -0
  488. ads/llm/serialize.py +219 -0
  489. ads/llm/serializers/__init__.py +0 -0
  490. ads/llm/serializers/retrieval_qa.py +153 -0
  491. ads/llm/serializers/runnable_parallel.py +27 -0
  492. ads/llm/templates/score_chain.jinja2 +155 -0
  493. ads/llm/templates/tool_chat_template_hermes.jinja +130 -0
  494. ads/llm/templates/tool_chat_template_mistral_parallel.jinja +94 -0
  495. ads/model/__init__.py +52 -0
  496. ads/model/artifact.py +573 -0
  497. ads/model/artifact_downloader.py +254 -0
  498. ads/model/artifact_uploader.py +267 -0
  499. ads/model/base_properties.py +238 -0
  500. ads/model/common/.model-ignore +66 -0
  501. ads/model/common/__init__.py +5 -0
  502. ads/model/common/utils.py +142 -0
  503. ads/model/datascience_model.py +2635 -0
  504. ads/model/deployment/__init__.py +20 -0
  505. ads/model/deployment/common/__init__.py +5 -0
  506. ads/model/deployment/common/utils.py +308 -0
  507. ads/model/deployment/model_deployer.py +466 -0
  508. ads/model/deployment/model_deployment.py +1846 -0
  509. ads/model/deployment/model_deployment_infrastructure.py +671 -0
  510. ads/model/deployment/model_deployment_properties.py +493 -0
  511. ads/model/deployment/model_deployment_runtime.py +838 -0
  512. ads/model/extractor/__init__.py +5 -0
  513. ads/model/extractor/automl_extractor.py +74 -0
  514. ads/model/extractor/embedding_onnx_extractor.py +80 -0
  515. ads/model/extractor/huggingface_extractor.py +88 -0
  516. ads/model/extractor/keras_extractor.py +84 -0
  517. ads/model/extractor/lightgbm_extractor.py +93 -0
  518. ads/model/extractor/model_info_extractor.py +114 -0
  519. ads/model/extractor/model_info_extractor_factory.py +105 -0
  520. ads/model/extractor/pytorch_extractor.py +87 -0
  521. ads/model/extractor/sklearn_extractor.py +112 -0
  522. ads/model/extractor/spark_extractor.py +89 -0
  523. ads/model/extractor/tensorflow_extractor.py +85 -0
  524. ads/model/extractor/xgboost_extractor.py +94 -0
  525. ads/model/framework/__init__.py +5 -0
  526. ads/model/framework/automl_model.py +178 -0
  527. ads/model/framework/embedding_onnx_model.py +438 -0
  528. ads/model/framework/huggingface_model.py +399 -0
  529. ads/model/framework/lightgbm_model.py +266 -0
  530. ads/model/framework/pytorch_model.py +266 -0
  531. ads/model/framework/sklearn_model.py +250 -0
  532. ads/model/framework/spark_model.py +326 -0
  533. ads/model/framework/tensorflow_model.py +254 -0
  534. ads/model/framework/xgboost_model.py +258 -0
  535. ads/model/generic_model.py +3518 -0
  536. ads/model/model_artifact_boilerplate/README.md +381 -0
  537. ads/model/model_artifact_boilerplate/__init__.py +5 -0
  538. ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +5 -0
  539. ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +427 -0
  540. ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +2 -0
  541. ads/model/model_artifact_boilerplate/runtime.yaml +7 -0
  542. ads/model/model_artifact_boilerplate/score.py +61 -0
  543. ads/model/model_file_description_schema.json +68 -0
  544. ads/model/model_introspect.py +331 -0
  545. ads/model/model_metadata.py +1810 -0
  546. ads/model/model_metadata_mixin.py +460 -0
  547. ads/model/model_properties.py +63 -0
  548. ads/model/model_version_set.py +739 -0
  549. ads/model/runtime/__init__.py +5 -0
  550. ads/model/runtime/env_info.py +306 -0
  551. ads/model/runtime/model_deployment_details.py +37 -0
  552. ads/model/runtime/model_provenance_details.py +58 -0
  553. ads/model/runtime/runtime_info.py +81 -0
  554. ads/model/runtime/schemas/inference_env_info_schema.yaml +16 -0
  555. ads/model/runtime/schemas/model_provenance_schema.yaml +36 -0
  556. ads/model/runtime/schemas/training_env_info_schema.yaml +16 -0
  557. ads/model/runtime/utils.py +201 -0
  558. ads/model/serde/__init__.py +5 -0
  559. ads/model/serde/common.py +40 -0
  560. ads/model/serde/model_input.py +547 -0
  561. ads/model/serde/model_serializer.py +1184 -0
  562. ads/model/service/__init__.py +5 -0
  563. ads/model/service/oci_datascience_model.py +1076 -0
  564. ads/model/service/oci_datascience_model_deployment.py +500 -0
  565. ads/model/service/oci_datascience_model_version_set.py +176 -0
  566. ads/model/transformer/__init__.py +5 -0
  567. ads/model/transformer/onnx_transformer.py +324 -0
  568. ads/mysqldb/__init__.py +5 -0
  569. ads/mysqldb/mysql_db.py +227 -0
  570. ads/opctl/__init__.py +18 -0
  571. ads/opctl/anomaly_detection.py +11 -0
  572. ads/opctl/backend/__init__.py +5 -0
  573. ads/opctl/backend/ads_dataflow.py +353 -0
  574. ads/opctl/backend/ads_ml_job.py +710 -0
  575. ads/opctl/backend/ads_ml_pipeline.py +164 -0
  576. ads/opctl/backend/ads_model_deployment.py +209 -0
  577. ads/opctl/backend/base.py +146 -0
  578. ads/opctl/backend/local.py +1053 -0
  579. ads/opctl/backend/marketplace/__init__.py +9 -0
  580. ads/opctl/backend/marketplace/helm_helper.py +173 -0
  581. ads/opctl/backend/marketplace/local_marketplace.py +271 -0
  582. ads/opctl/backend/marketplace/marketplace_backend_runner.py +71 -0
  583. ads/opctl/backend/marketplace/marketplace_operator_interface.py +44 -0
  584. ads/opctl/backend/marketplace/marketplace_operator_runner.py +24 -0
  585. ads/opctl/backend/marketplace/marketplace_utils.py +212 -0
  586. ads/opctl/backend/marketplace/models/__init__.py +5 -0
  587. ads/opctl/backend/marketplace/models/bearer_token.py +94 -0
  588. ads/opctl/backend/marketplace/models/marketplace_type.py +70 -0
  589. ads/opctl/backend/marketplace/models/ocir_details.py +56 -0
  590. ads/opctl/backend/marketplace/prerequisite_checker.py +238 -0
  591. ads/opctl/cli.py +707 -0
  592. ads/opctl/cmds.py +869 -0
  593. ads/opctl/conda/__init__.py +5 -0
  594. ads/opctl/conda/cli.py +193 -0
  595. ads/opctl/conda/cmds.py +749 -0
  596. ads/opctl/conda/config.yaml +34 -0
  597. ads/opctl/conda/manifest_template.yaml +13 -0
  598. ads/opctl/conda/multipart_uploader.py +188 -0
  599. ads/opctl/conda/pack.py +89 -0
  600. ads/opctl/config/__init__.py +5 -0
  601. ads/opctl/config/base.py +57 -0
  602. ads/opctl/config/diagnostics/__init__.py +5 -0
  603. ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +62 -0
  604. ads/opctl/config/merger.py +255 -0
  605. ads/opctl/config/resolver.py +297 -0
  606. ads/opctl/config/utils.py +79 -0
  607. ads/opctl/config/validator.py +17 -0
  608. ads/opctl/config/versioner.py +68 -0
  609. ads/opctl/config/yaml_parsers/__init__.py +7 -0
  610. ads/opctl/config/yaml_parsers/base.py +58 -0
  611. ads/opctl/config/yaml_parsers/distributed/__init__.py +7 -0
  612. ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +201 -0
  613. ads/opctl/constants.py +66 -0
  614. ads/opctl/decorator/__init__.py +5 -0
  615. ads/opctl/decorator/common.py +129 -0
  616. ads/opctl/diagnostics/__init__.py +5 -0
  617. ads/opctl/diagnostics/__main__.py +25 -0
  618. ads/opctl/diagnostics/check_distributed_job_requirements.py +212 -0
  619. ads/opctl/diagnostics/check_requirements.py +144 -0
  620. ads/opctl/diagnostics/requirement_exception.py +9 -0
  621. ads/opctl/distributed/README.md +109 -0
  622. ads/opctl/distributed/__init__.py +5 -0
  623. ads/opctl/distributed/certificates.py +32 -0
  624. ads/opctl/distributed/cli.py +207 -0
  625. ads/opctl/distributed/cmds.py +731 -0
  626. ads/opctl/distributed/common/__init__.py +5 -0
  627. ads/opctl/distributed/common/abstract_cluster_provider.py +449 -0
  628. ads/opctl/distributed/common/abstract_framework_spec_builder.py +88 -0
  629. ads/opctl/distributed/common/cluster_config_helper.py +103 -0
  630. ads/opctl/distributed/common/cluster_provider_factory.py +21 -0
  631. ads/opctl/distributed/common/cluster_runner.py +54 -0
  632. ads/opctl/distributed/common/framework_factory.py +29 -0
  633. ads/opctl/docker/Dockerfile.job +103 -0
  634. ads/opctl/docker/Dockerfile.job.arm +107 -0
  635. ads/opctl/docker/Dockerfile.job.gpu +175 -0
  636. ads/opctl/docker/base-env.yaml +13 -0
  637. ads/opctl/docker/cuda.repo +6 -0
  638. ads/opctl/docker/operator/.dockerignore +0 -0
  639. ads/opctl/docker/operator/Dockerfile +41 -0
  640. ads/opctl/docker/operator/Dockerfile.gpu +85 -0
  641. ads/opctl/docker/operator/cuda.repo +6 -0
  642. ads/opctl/docker/operator/environment.yaml +8 -0
  643. ads/opctl/forecast.py +11 -0
  644. ads/opctl/index.yaml +3 -0
  645. ads/opctl/model/__init__.py +5 -0
  646. ads/opctl/model/cli.py +65 -0
  647. ads/opctl/model/cmds.py +73 -0
  648. ads/opctl/operator/README.md +4 -0
  649. ads/opctl/operator/__init__.py +31 -0
  650. ads/opctl/operator/cli.py +344 -0
  651. ads/opctl/operator/cmd.py +596 -0
  652. ads/opctl/operator/common/__init__.py +5 -0
  653. ads/opctl/operator/common/backend_factory.py +460 -0
  654. ads/opctl/operator/common/const.py +27 -0
  655. ads/opctl/operator/common/data/synthetic.csv +16001 -0
  656. ads/opctl/operator/common/dictionary_merger.py +148 -0
  657. ads/opctl/operator/common/errors.py +42 -0
  658. ads/opctl/operator/common/operator_config.py +99 -0
  659. ads/opctl/operator/common/operator_loader.py +811 -0
  660. ads/opctl/operator/common/operator_schema.yaml +130 -0
  661. ads/opctl/operator/common/operator_yaml_generator.py +152 -0
  662. ads/opctl/operator/common/utils.py +208 -0
  663. ads/opctl/operator/lowcode/__init__.py +5 -0
  664. ads/opctl/operator/lowcode/anomaly/MLoperator +16 -0
  665. ads/opctl/operator/lowcode/anomaly/README.md +207 -0
  666. ads/opctl/operator/lowcode/anomaly/__init__.py +5 -0
  667. ads/opctl/operator/lowcode/anomaly/__main__.py +103 -0
  668. ads/opctl/operator/lowcode/anomaly/cmd.py +35 -0
  669. ads/opctl/operator/lowcode/anomaly/const.py +167 -0
  670. ads/opctl/operator/lowcode/anomaly/environment.yaml +10 -0
  671. ads/opctl/operator/lowcode/anomaly/model/__init__.py +5 -0
  672. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +146 -0
  673. ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +162 -0
  674. ads/opctl/operator/lowcode/anomaly/model/automlx.py +99 -0
  675. ads/opctl/operator/lowcode/anomaly/model/autots.py +115 -0
  676. ads/opctl/operator/lowcode/anomaly/model/base_model.py +404 -0
  677. ads/opctl/operator/lowcode/anomaly/model/factory.py +110 -0
  678. ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +78 -0
  679. ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +78 -0
  680. ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +120 -0
  681. ads/opctl/operator/lowcode/anomaly/model/tods.py +119 -0
  682. ads/opctl/operator/lowcode/anomaly/operator_config.py +127 -0
  683. ads/opctl/operator/lowcode/anomaly/schema.yaml +401 -0
  684. ads/opctl/operator/lowcode/anomaly/utils.py +88 -0
  685. ads/opctl/operator/lowcode/common/__init__.py +5 -0
  686. ads/opctl/operator/lowcode/common/const.py +10 -0
  687. ads/opctl/operator/lowcode/common/data.py +116 -0
  688. ads/opctl/operator/lowcode/common/errors.py +47 -0
  689. ads/opctl/operator/lowcode/common/transformations.py +296 -0
  690. ads/opctl/operator/lowcode/common/utils.py +384 -0
  691. ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +13 -0
  692. ads/opctl/operator/lowcode/feature_store_marketplace/README.md +30 -0
  693. ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +5 -0
  694. ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +116 -0
  695. ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +85 -0
  696. ads/opctl/operator/lowcode/feature_store_marketplace/const.py +15 -0
  697. ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
  698. ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +4 -0
  699. ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +32 -0
  700. ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +43 -0
  701. ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +120 -0
  702. ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +34 -0
  703. ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +386 -0
  704. ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +160 -0
  705. ads/opctl/operator/lowcode/forecast/MLoperator +25 -0
  706. ads/opctl/operator/lowcode/forecast/README.md +209 -0
  707. ads/opctl/operator/lowcode/forecast/__init__.py +5 -0
  708. ads/opctl/operator/lowcode/forecast/__main__.py +89 -0
  709. ads/opctl/operator/lowcode/forecast/cmd.py +40 -0
  710. ads/opctl/operator/lowcode/forecast/const.py +92 -0
  711. ads/opctl/operator/lowcode/forecast/environment.yaml +20 -0
  712. ads/opctl/operator/lowcode/forecast/errors.py +26 -0
  713. ads/opctl/operator/lowcode/forecast/model/__init__.py +5 -0
  714. ads/opctl/operator/lowcode/forecast/model/arima.py +279 -0
  715. ads/opctl/operator/lowcode/forecast/model/automlx.py +553 -0
  716. ads/opctl/operator/lowcode/forecast/model/autots.py +312 -0
  717. ads/opctl/operator/lowcode/forecast/model/base_model.py +875 -0
  718. ads/opctl/operator/lowcode/forecast/model/factory.py +106 -0
  719. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +492 -0
  720. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +243 -0
  721. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +482 -0
  722. ads/opctl/operator/lowcode/forecast/model/prophet.py +450 -0
  723. ads/opctl/operator/lowcode/forecast/model_evaluator.py +244 -0
  724. ads/opctl/operator/lowcode/forecast/operator_config.py +234 -0
  725. ads/opctl/operator/lowcode/forecast/schema.yaml +506 -0
  726. ads/opctl/operator/lowcode/forecast/utils.py +397 -0
  727. ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +7 -0
  728. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +285 -0
  729. ads/opctl/operator/lowcode/forecast/whatifserve/score.py +246 -0
  730. ads/opctl/operator/lowcode/pii/MLoperator +17 -0
  731. ads/opctl/operator/lowcode/pii/README.md +208 -0
  732. ads/opctl/operator/lowcode/pii/__init__.py +5 -0
  733. ads/opctl/operator/lowcode/pii/__main__.py +78 -0
  734. ads/opctl/operator/lowcode/pii/cmd.py +39 -0
  735. ads/opctl/operator/lowcode/pii/constant.py +84 -0
  736. ads/opctl/operator/lowcode/pii/environment.yaml +17 -0
  737. ads/opctl/operator/lowcode/pii/errors.py +27 -0
  738. ads/opctl/operator/lowcode/pii/model/__init__.py +5 -0
  739. ads/opctl/operator/lowcode/pii/model/factory.py +82 -0
  740. ads/opctl/operator/lowcode/pii/model/guardrails.py +167 -0
  741. ads/opctl/operator/lowcode/pii/model/pii.py +145 -0
  742. ads/opctl/operator/lowcode/pii/model/processor/__init__.py +34 -0
  743. ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +34 -0
  744. ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +35 -0
  745. ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +225 -0
  746. ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +73 -0
  747. ads/opctl/operator/lowcode/pii/model/processor/remover.py +26 -0
  748. ads/opctl/operator/lowcode/pii/model/report.py +487 -0
  749. ads/opctl/operator/lowcode/pii/operator_config.py +95 -0
  750. ads/opctl/operator/lowcode/pii/schema.yaml +108 -0
  751. ads/opctl/operator/lowcode/pii/utils.py +43 -0
  752. ads/opctl/operator/lowcode/recommender/MLoperator +16 -0
  753. ads/opctl/operator/lowcode/recommender/README.md +206 -0
  754. ads/opctl/operator/lowcode/recommender/__init__.py +5 -0
  755. ads/opctl/operator/lowcode/recommender/__main__.py +82 -0
  756. ads/opctl/operator/lowcode/recommender/cmd.py +33 -0
  757. ads/opctl/operator/lowcode/recommender/constant.py +30 -0
  758. ads/opctl/operator/lowcode/recommender/environment.yaml +11 -0
  759. ads/opctl/operator/lowcode/recommender/model/base_model.py +212 -0
  760. ads/opctl/operator/lowcode/recommender/model/factory.py +56 -0
  761. ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +25 -0
  762. ads/opctl/operator/lowcode/recommender/model/svd.py +106 -0
  763. ads/opctl/operator/lowcode/recommender/operator_config.py +81 -0
  764. ads/opctl/operator/lowcode/recommender/schema.yaml +265 -0
  765. ads/opctl/operator/lowcode/recommender/utils.py +13 -0
  766. ads/opctl/operator/runtime/__init__.py +5 -0
  767. ads/opctl/operator/runtime/const.py +17 -0
  768. ads/opctl/operator/runtime/container_runtime_schema.yaml +50 -0
  769. ads/opctl/operator/runtime/marketplace_runtime.py +50 -0
  770. ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +21 -0
  771. ads/opctl/operator/runtime/python_runtime_schema.yaml +21 -0
  772. ads/opctl/operator/runtime/runtime.py +115 -0
  773. ads/opctl/schema.yaml.yml +36 -0
  774. ads/opctl/script.py +40 -0
  775. ads/opctl/spark/__init__.py +5 -0
  776. ads/opctl/spark/cli.py +43 -0
  777. ads/opctl/spark/cmds.py +147 -0
  778. ads/opctl/templates/diagnostic_report_template.jinja2 +102 -0
  779. ads/opctl/utils.py +344 -0
  780. ads/oracledb/__init__.py +5 -0
  781. ads/oracledb/oracle_db.py +346 -0
  782. ads/pipeline/__init__.py +39 -0
  783. ads/pipeline/ads_pipeline.py +2279 -0
  784. ads/pipeline/ads_pipeline_run.py +772 -0
  785. ads/pipeline/ads_pipeline_step.py +605 -0
  786. ads/pipeline/builders/__init__.py +5 -0
  787. ads/pipeline/builders/infrastructure/__init__.py +5 -0
  788. ads/pipeline/builders/infrastructure/custom_script.py +32 -0
  789. ads/pipeline/cli.py +119 -0
  790. ads/pipeline/extension.py +291 -0
  791. ads/pipeline/schema/__init__.py +5 -0
  792. ads/pipeline/schema/cs_step_schema.json +35 -0
  793. ads/pipeline/schema/ml_step_schema.json +31 -0
  794. ads/pipeline/schema/pipeline_schema.json +71 -0
  795. ads/pipeline/visualizer/__init__.py +5 -0
  796. ads/pipeline/visualizer/base.py +570 -0
  797. ads/pipeline/visualizer/graph_renderer.py +272 -0
  798. ads/pipeline/visualizer/text_renderer.py +84 -0
  799. ads/secrets/__init__.py +11 -0
  800. ads/secrets/adb.py +386 -0
  801. ads/secrets/auth_token.py +86 -0
  802. ads/secrets/big_data_service.py +365 -0
  803. ads/secrets/mysqldb.py +149 -0
  804. ads/secrets/oracledb.py +160 -0
  805. ads/secrets/secrets.py +407 -0
  806. ads/telemetry/__init__.py +7 -0
  807. ads/telemetry/base.py +69 -0
  808. ads/telemetry/client.py +122 -0
  809. ads/telemetry/telemetry.py +257 -0
  810. ads/templates/dataflow_pyspark.jinja2 +13 -0
  811. ads/templates/dataflow_sparksql.jinja2 +22 -0
  812. ads/templates/func.jinja2 +20 -0
  813. ads/templates/schemas/openapi.json +1740 -0
  814. ads/templates/score-pkl.jinja2 +173 -0
  815. ads/templates/score.jinja2 +322 -0
  816. ads/templates/score_embedding_onnx.jinja2 +202 -0
  817. ads/templates/score_generic.jinja2 +165 -0
  818. ads/templates/score_huggingface_pipeline.jinja2 +217 -0
  819. ads/templates/score_lightgbm.jinja2 +185 -0
  820. ads/templates/score_onnx.jinja2 +407 -0
  821. ads/templates/score_onnx_new.jinja2 +473 -0
  822. ads/templates/score_oracle_automl.jinja2 +185 -0
  823. ads/templates/score_pyspark.jinja2 +154 -0
  824. ads/templates/score_pytorch.jinja2 +219 -0
  825. ads/templates/score_scikit-learn.jinja2 +184 -0
  826. ads/templates/score_tensorflow.jinja2 +184 -0
  827. ads/templates/score_xgboost.jinja2 +178 -0
  828. ads/text_dataset/__init__.py +5 -0
  829. ads/text_dataset/backends.py +211 -0
  830. ads/text_dataset/dataset.py +445 -0
  831. ads/text_dataset/extractor.py +207 -0
  832. ads/text_dataset/options.py +53 -0
  833. ads/text_dataset/udfs.py +22 -0
  834. ads/text_dataset/utils.py +49 -0
  835. ads/type_discovery/__init__.py +9 -0
  836. ads/type_discovery/abstract_detector.py +21 -0
  837. ads/type_discovery/constant_detector.py +41 -0
  838. ads/type_discovery/continuous_detector.py +54 -0
  839. ads/type_discovery/credit_card_detector.py +99 -0
  840. ads/type_discovery/datetime_detector.py +92 -0
  841. ads/type_discovery/discrete_detector.py +118 -0
  842. ads/type_discovery/document_detector.py +146 -0
  843. ads/type_discovery/ip_detector.py +68 -0
  844. ads/type_discovery/latlon_detector.py +90 -0
  845. ads/type_discovery/phone_number_detector.py +63 -0
  846. ads/type_discovery/type_discovery_driver.py +87 -0
  847. ads/type_discovery/typed_feature.py +594 -0
  848. ads/type_discovery/unknown_detector.py +41 -0
  849. ads/type_discovery/zipcode_detector.py +48 -0
  850. ads/vault/__init__.py +7 -0
  851. ads/vault/vault.py +237 -0
  852. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/METADATA +150 -149
  853. oracle_ads-2.13.10.dist-info/RECORD +858 -0
  854. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/WHEEL +1 -2
  855. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/entry_points.txt +2 -1
  856. oracle_ads-2.13.9rc0.dist-info/RECORD +0 -9
  857. oracle_ads-2.13.9rc0.dist-info/top_level.txt +0 -1
  858. {oracle_ads-2.13.9rc0.dist-info → oracle_ads-2.13.10.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2021, 2022 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ import re
8
+ from typing import Callable
9
+
10
+
11
+ class UDF:
12
+ @staticmethod
13
+ def from_regex(regex: str) -> Callable:
14
+ def function(content):
15
+ match = re.match(regex, content)
16
+ if match:
17
+ if len(match.groups()) == 0:
18
+ return [match.group(0)]
19
+ else:
20
+ return match.groups()
21
+
22
+ return function
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2021, 2022 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ import functools
8
+ import os
9
+
10
+
11
+ class NotSupportedError(Exception): # pragma: no cover
12
+ pass
13
+
14
+
15
+ def experimental(cls):
16
+ @functools.wraps(cls)
17
+ def wrapper(*args, **kwargs):
18
+ instance = cls(*args, **kwargs)
19
+ print(f"{cls.__name__} is experimental and may be removed in the future.")
20
+ return instance
21
+
22
+ return wrapper
23
+
24
+
25
+ class PY4JGateway:
26
+ def __init__(self) -> None:
27
+ try:
28
+ from py4j.java_gateway import GatewayParameters, JavaGateway, launch_gateway
29
+ except ModuleNotFoundError:
30
+ raise ModuleNotFoundError("py4j is not installed.")
31
+ if "CONDA_PREFIX" not in os.environ or not os.path.exists(
32
+ os.path.join(os.environ.get("CONDA_PREFIX"), "text-extraction-tools.jar")
33
+ ):
34
+ raise NotSupportedError(
35
+ "Tika is not supported in this distribution. Use alternatives such as pdfplumber."
36
+ )
37
+ port = launch_gateway(
38
+ java_path="/usr/bin/java",
39
+ classpath=os.path.join(
40
+ os.environ.get("CONDA_PREFIX"), "text-extraction-tools.jar"
41
+ ),
42
+ )
43
+ self.gateway = JavaGateway(gateway_parameters=GatewayParameters(port=port))
44
+
45
+ def __enter__(self) -> None:
46
+ return self.gateway
47
+
48
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
49
+ self.gateway.shutdown()
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*--
3
+
4
+ # Copyright (c) 2020, 2022 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2020, 2022 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ from __future__ import print_function, absolute_import
8
+
9
+ import abc
10
+
11
+
12
+ class AbstractTypeDiscoveryDetector(metaclass=abc.ABCMeta):
13
+ @abc.abstractmethod
14
+ def discover(self, name, series):
15
+ return
16
+
17
+
18
+ class DiscreteDiscoveryDetector(AbstractTypeDiscoveryDetector, metaclass=abc.ABCMeta):
19
+ @abc.abstractmethod
20
+ def discover(self, name, series):
21
+ return
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2020, 2022 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ from __future__ import print_function, absolute_import, division
8
+
9
+ import pandas as pd
10
+
11
+ from ads.type_discovery import logger
12
+ from ads.type_discovery.abstract_detector import AbstractTypeDiscoveryDetector
13
+ from ads.type_discovery.typed_feature import ConstantTypedFeature
14
+
15
+
16
+ class ConstantDetector(AbstractTypeDiscoveryDetector):
17
+ def is_constant(self, name, values):
18
+ #
19
+ # if all the values are null we treat this as a const feature
20
+ #
21
+
22
+ return values.size == 0 or values.nunique() == 1
23
+
24
+ def discover(self, name, series):
25
+ candidates = series.loc[~series.isnull()]
26
+
27
+ if self.is_constant(name, candidates):
28
+ logger.debug("column [{}]/[{}] Constant".format(name, series.dtype))
29
+ return ConstantTypedFeature.build(name, series)
30
+
31
+ return False
32
+
33
+
34
+ if __name__ == "__main__":
35
+ dd = ConstantDetector()
36
+ print(dd.discover("zipcodes", pd.Series([None, "94065", "94065", "94065", None])))
37
+ print(dd.discover("years", pd.Series([2008, 2008, 2008, 2008, 2008])))
38
+ df = pd.read_csv(
39
+ "https://artifacthub.oraclecorp.com/dsc-generic/advanced-ds/datasets/flights.csv"
40
+ )
41
+ print(dd.discover("flights_years", df["Year"]))
@@ -0,0 +1,54 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2020, 2022 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+
8
+ from __future__ import print_function, absolute_import, division
9
+
10
+ import pandas as pd
11
+
12
+ from ads.type_discovery import logger
13
+ from ads.type_discovery.abstract_detector import AbstractTypeDiscoveryDetector
14
+ from ads.type_discovery.typed_feature import ContinuousTypedFeature
15
+ from ads.common import utils
16
+
17
+
18
+ class ContinuousDetector(AbstractTypeDiscoveryDetector):
19
+ @staticmethod
20
+ def _target_is_continuous(series):
21
+ if str(series.dtype) in ["float16", "float32", "float64"]:
22
+ return True # treat target variable as continuous
23
+ elif str(series.dtype) in ["int16", "int32", "int64"]:
24
+ if series.nunique() >= 20:
25
+ return True # treat target variable as continuous
26
+
27
+ return False
28
+
29
+ def _is_continuous(self, series):
30
+ if series.dtype.name in ["object"]:
31
+ try:
32
+ series.astype("float")
33
+ return True
34
+ except:
35
+ pass
36
+
37
+ if series.dtype.name in utils.numeric_pandas_dtypes():
38
+ #
39
+ # if the type is float we simply beleive pandas and go with continuous
40
+ #
41
+ return True
42
+
43
+ def discover(self, name, series):
44
+
45
+ if self._is_continuous(series):
46
+ logger.debug("column [{}]/[{}] continuous".format(name, series.dtype))
47
+ return ContinuousTypedFeature.build(name, series)
48
+
49
+ return False
50
+
51
+
52
+ if __name__ == "__main__":
53
+ dd = ContinuousDetector()
54
+ print(dd.discover("continuous", pd.Series([None, 3.14, 12.0, 1, 2, 3, None])))
@@ -0,0 +1,99 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2020, 2022 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ """
8
+ NOTE:
9
+
10
+ There's an opportunity here to generate a new feature, credict card numbers are not preditive because they
11
+ don't generalize, however, if the feature is replaced by the type of card that might be predictive.
12
+
13
+ - Visa: ^4[0-9]{12}(?:[0-9]{3})?$ All Visa card numbers start with a 4. New cards have 16 digits. Old cards have 13.
14
+ - MasterCard: ^(?:5[1-5][0-9]{2}|222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}$ MasterCard numbers
15
+ either start with the numbers 51 through 55 or with the numbers 2221 through 2720. All have 16 digits.
16
+ - American Express: ^3[47][0-9]{13}$ American Express card numbers start with 34 or 37 and have 15 digits.
17
+ - Diners Club: ^3(?:0[0-5]|[68][0-9])[0-9]{11}$ Diners Club card numbers begin with 300 through 305, 36 or 38.
18
+ All have 14 digits. There are Diners Club cards that begin with 5 and have 16 digits. These are a joint
19
+ venture between Diners Club and MasterCard, and should be processed like a MasterCard.
20
+ - Discover: ^6(?:011|5[0-9]{2})[0-9]{12}$ Discover card numbers begin with 6011 or 65. All have 16 digits.
21
+ - JCB: ^(?:2131|1800|35\d{3})\d{11}$ JCB cards beginning with 2131 or 1800 have 15 digits.
22
+ JCB cards beginning with 35 have 16 digits.
23
+
24
+ """
25
+
26
+ from __future__ import print_function, absolute_import, division
27
+
28
+ import re
29
+
30
+ import pandas as pd
31
+
32
+ from ads.type_discovery import logger
33
+ from ads.type_discovery.abstract_detector import AbstractTypeDiscoveryDetector
34
+ from ads.type_discovery.typed_feature import CreditCardTypedFeature
35
+
36
+
37
+ class CreditCardDetector(AbstractTypeDiscoveryDetector):
38
+
39
+ _max_sample_size_to_luhn_check = 1000
40
+ _pattern_string = r"""^(?:4[0-9]{12}(?:[0-9]{3})? # Visa
41
+ | (?:5[1-5][0-9]{2} # MasterCard
42
+ | 222[1-9]|22[3-9][0-9]|2[3-6][0-9]{2}|27[01][0-9]|2720)[0-9]{12}
43
+ | 3[47][0-9]{13} # American Express
44
+ | 3(?:0[0-5]|[68][0-9])[0-9]{11} # Diners Club
45
+ | 6(?:011|5[0-9]{2})[0-9]{12} # Discover
46
+ | (?:2131|1800|35\d{3})\d{11} # JCB
47
+ | (5018|5020|5038|5612|5893|6304|6759|6761|6762|6763|0604|6390)\d+$ # Maestro
48
+ | ^(5[06789]|6)[0-9]{0,}$ # Maestro
49
+ | ^4[0-9]{12}(?:[0-9]{6})?$ #Visa 19 digit
50
+ )$"""
51
+
52
+ def luhn_checksum(self, card_number):
53
+ def digits_of(n):
54
+ return [int(d) for d in str(n)]
55
+
56
+ digits = digits_of(card_number)
57
+ odd_digits = digits[-1::-2]
58
+ even_digits = digits[-2::-2]
59
+ checksum = 0
60
+ checksum += sum(odd_digits)
61
+ for d in even_digits:
62
+ checksum += sum(digits_of(d * 2))
63
+ return checksum % 10
64
+
65
+ def is_luhn_valid(self, card_number):
66
+ return self.luhn_checksum(card_number) == 0
67
+
68
+ def is_credit_card(self, name, values):
69
+ cc = re.compile(CreditCardDetector._pattern_string, re.VERBOSE)
70
+ # since the nulls have been previously filtered we can safely do "all"
71
+ samp = (
72
+ values
73
+ if values.size <= CreditCardDetector._max_sample_size_to_luhn_check
74
+ else values.sample(n=CreditCardDetector._max_sample_size_to_luhn_check)
75
+ )
76
+
77
+ if samp.dtype.name in ["float16", "float32", "float64"]:
78
+ if samp.apply(float.is_integer).all():
79
+ samp = samp.fillna(0.0).astype(int)
80
+
81
+ if samp.dtype.name in ["int16", "int32", "int64"]:
82
+ samp = samp.astype(str)
83
+
84
+ if all([cc.match(str(x)) for x in samp]):
85
+ #
86
+ # iff the pattern matching succeeds do we try the luhn algorithm on a sample
87
+ #
88
+ return all([self.is_luhn_valid(x) for x in samp])
89
+
90
+ return False
91
+
92
+ def discover(self, name, series):
93
+ candidates = series.loc[~series.isnull()]
94
+
95
+ if self.is_credit_card(name, candidates.head(1000)):
96
+ logger.debug("column [{}]/[{}] credit card".format(name, series.dtype))
97
+ return CreditCardTypedFeature.build(name, series)
98
+
99
+ return False
@@ -0,0 +1,92 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2020, 2022 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+
8
+ from __future__ import print_function, absolute_import, division
9
+
10
+ import pandas as pd
11
+
12
+ from ads.common.decorator.runtime_dependency import (
13
+ runtime_dependency,
14
+ OptionalDependency,
15
+ )
16
+ from ads.type_discovery import logger
17
+ from ads.type_discovery.abstract_detector import AbstractTypeDiscoveryDetector
18
+ from ads.type_discovery.typed_feature import DateTimeTypedFeature
19
+
20
+
21
+ class DateTimeDetector(AbstractTypeDiscoveryDetector):
22
+ @runtime_dependency(module="datefinder", install_from=OptionalDependency.DATA)
23
+ def _is_date_time(self, name, values, low_level_type_name):
24
+ if low_level_type_name.startswith("datetime64"):
25
+ return lambda x: x
26
+ else:
27
+ #
28
+ # if the column/feature contains the word "timestamp" then
29
+ #
30
+ if low_level_type_name.startswith("int") and "timestamp" in name.lower():
31
+ # either s (max len 10) on ns (max len) 19
32
+ unit = "s" if values.astype("str").str.len().max() <= 10 else "ns"
33
+ try:
34
+ pd.to_datetime(values, unit=unit)
35
+ return lambda x: pd.to_datetime(x, unit=unit)
36
+ except:
37
+ pass
38
+ if values.dtype == "object":
39
+ try:
40
+ pd.to_datetime(values, infer_datetime_format=True)
41
+ datefinder_result = all(
42
+ [bool(list(datefinder.find_dates(str(x)))) for x in values]
43
+ )
44
+ if datefinder_result:
45
+ return lambda x: pd.to_datetime(x, infer_datetime_format=True)
46
+ except:
47
+ pass
48
+
49
+ return None
50
+
51
+ def discover(self, name, series):
52
+ candidates = series.loc[~series.isnull()]
53
+ fn = self._is_date_time(name, candidates.head(500), series.dtype.name)
54
+ if fn:
55
+ logger.debug("column [{}]/[{}] datetime".format(name, series.dtype))
56
+ return DateTimeTypedFeature.build(name, fn(series))
57
+
58
+ return False
59
+
60
+
61
+ if __name__ == "__main__":
62
+ dd = DateTimeDetector()
63
+ print(
64
+ dd.discover(
65
+ "date-range",
66
+ pd.Series(pd.date_range(start="1/1/2018", end="1/08/2018", freq="H")),
67
+ )
68
+ )
69
+ print(dd.discover("dates", pd.Series(["12/12/12", "12/12/13", None, "12/12/14"])))
70
+ print(
71
+ dd.discover(
72
+ "dates-with-other-values",
73
+ pd.Series(["12/12/12", "Monday", None, "12/12/14"]),
74
+ )
75
+ )
76
+ print(
77
+ dd.discover(
78
+ "timestamp s",
79
+ pd.Series(
80
+ [
81
+ 978300760,
82
+ 978302109,
83
+ 978301968,
84
+ 978300275,
85
+ 978824291,
86
+ 978302268,
87
+ 978302039,
88
+ ]
89
+ ),
90
+ )
91
+ )
92
+ print(dd.discover("timestamp ns", pd.Series([1490195805433502912])))
@@ -0,0 +1,118 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2020, 2022 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+ from __future__ import print_function, absolute_import, division
8
+
9
+ import pandas as pd
10
+ from sklearn.utils.multiclass import type_of_target
11
+
12
+ from ads.type_discovery import logger
13
+ from ads.type_discovery.abstract_detector import DiscreteDiscoveryDetector
14
+ from ads.type_discovery.typed_feature import (
15
+ OrdinalTypedFeature,
16
+ CategoricalTypedFeature,
17
+ )
18
+ from ads.common import utils
19
+
20
+
21
+ class DiscreteDetector(DiscreteDiscoveryDetector):
22
+
23
+ _max_categorical_values = 100
24
+
25
+ def _get_categorical_or_ordinal(self, name, series):
26
+ #
27
+ # categoricals are unordered discreet types
28
+ # ordinals are ordered discreet (int) types
29
+ #
30
+
31
+ low_level_type_name = series.dtype.name
32
+
33
+ if low_level_type_name == "category" or low_level_type_name == "bool":
34
+ return "categorical"
35
+
36
+ else:
37
+ #
38
+ # after removing nulls the new Series might already be categorical
39
+ #
40
+ nulls_removed = pd.Series(list(series.loc[~series.isna()]))
41
+ if (
42
+ nulls_removed.dtype.name == "category"
43
+ or nulls_removed.dtype.name == "bool"
44
+ ):
45
+ return "categorical"
46
+
47
+ count_distinct = series.nunique()
48
+ observations = series.size
49
+
50
+ tot = type_of_target(
51
+ list(nulls_removed.head(min(nulls_removed.size, 2000)))
52
+ )
53
+
54
+ if tot == "binary":
55
+ return "categorical"
56
+
57
+ elif tot == "multiclass":
58
+ if count_distinct <= DiscreteDetector._max_categorical_values:
59
+ if low_level_type_name in utils.numeric_pandas_dtypes():
60
+ return "ordinal"
61
+ else:
62
+ return "categorical"
63
+
64
+ if low_level_type_name.startswith(
65
+ "int"
66
+ ) or low_level_type_name.startswith("float"):
67
+ if nulls_removed.min() >= 0:
68
+ if (
69
+ low_level_type_name.startswith("int")
70
+ or nulls_removed.sum()
71
+ == nulls_removed.astype("int64").sum()
72
+ ):
73
+ return "ordinal"
74
+ # by summing all the values and summing all the int values we can know all the values are integers
75
+
76
+ return False
77
+
78
+ def discover(self, name, series):
79
+
80
+ guessed_type = self._get_categorical_or_ordinal(
81
+ name, series.loc[~series.isnull()]
82
+ )
83
+
84
+ if guessed_type == "categorical":
85
+ logger.debug("column [{}]/[{}] categorical".format(name, series.dtype))
86
+ return CategoricalTypedFeature.build(name, series)
87
+ elif guessed_type == "ordinal":
88
+ logger.debug("column [{}]/[{}] ordinal".format(name, series.dtype))
89
+ return OrdinalTypedFeature.build(name, series)
90
+ else:
91
+ return False
92
+
93
+
94
+ if __name__ == "__main__":
95
+ dd = DiscreteDetector()
96
+
97
+ print(
98
+ dd.discover(
99
+ "str-categorical",
100
+ pd.Series(["a", "a", "a", "b", "c", "a"], dtype="category"),
101
+ )
102
+ )
103
+ print(
104
+ dd.discover(
105
+ "bool-categorical", pd.Series([True, False, True, True, True, None, True])
106
+ )
107
+ )
108
+ print(dd.discover("continuous", pd.Series([None, 3.14, 12.0, 1, 2, 3, None])))
109
+ print(
110
+ dd.discover("int-1-categorical", pd.Series([1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 9]))
111
+ )
112
+ print(dd.discover("int-2-categorical", pd.Series([1, 1, 1, 5, 9])))
113
+ print(dd.discover("real-3-categorical", pd.Series([1.0, 2.0, 3.0, 1.0, 4.0, 5.0])))
114
+ print(
115
+ dd.discover(
116
+ "bool-categorical", pd.Series([True, False, True, True, True, None, True])
117
+ )
118
+ )
@@ -0,0 +1,146 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8; -*-
3
+
4
+ # Copyright (c) 2020, 2022 Oracle and/or its affiliates.
5
+ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
+
7
+
8
+ from __future__ import print_function, absolute_import, division
9
+
10
+ import re
11
+
12
+ import pandas as pd
13
+
14
+ from ads.type_discovery import logger
15
+ from ads.type_discovery.abstract_detector import AbstractTypeDiscoveryDetector
16
+ from ads.type_discovery.typed_feature import DocumentTypedFeature, AddressTypedFeature
17
+
18
+
19
+ class DocumentDetector(AbstractTypeDiscoveryDetector):
20
+
21
+ _min_cjk_chars_for_document = 100
22
+ _min_words = 10
23
+ _min_html_tags = 5
24
+
25
+ _html_pattern = re.compile("<.*?>")
26
+
27
+ _unicode_ranges = [
28
+ {"from": ord(u"\u3300"), "to": ord(u"\u33ff")}, # compatibility ideographs
29
+ {"from": ord(u"\ufe30"), "to": ord(u"\ufe4f")}, # compatibility ideographs
30
+ {"from": ord(u"\uf900"), "to": ord(u"\ufaff")}, # compatibility ideographs
31
+ {
32
+ "from": ord(u"\U0002F800"),
33
+ "to": ord(u"\U0002fa1f"),
34
+ }, # compatibility ideographs
35
+ {"from": ord(u"\u3040"), "to": ord(u"\u309f")}, # Japanese Hiragana
36
+ {"from": ord(u"\u30a0"), "to": ord(u"\u30ff")}, # Japanese Katakana
37
+ {"from": ord(u"\u2e80"), "to": ord(u"\u2eff")}, # cjk radicals supplement
38
+ {"from": ord(u"\u4e00"), "to": ord(u"\u9fff")},
39
+ {"from": ord(u"\u3400"), "to": ord(u"\u4dbf")},
40
+ {"from": ord(u"\U00020000"), "to": ord(u"\U0002a6df")},
41
+ {"from": ord(u"\U0002a700"), "to": ord(u"\U0002b73f")},
42
+ {"from": ord(u"\U0002b740"), "to": ord(u"\U0002b81f")},
43
+ {
44
+ "from": ord(u"\U0002b820"),
45
+ "to": ord(u"\U0002ceaf"),
46
+ }, # included as of Unicode 8.0
47
+ ]
48
+
49
+ def _is_cjk_char(self, char):
50
+ return any(
51
+ [
52
+ range["from"] <= ord(char) <= range["to"]
53
+ for range in DocumentDetector._unicode_ranges
54
+ ]
55
+ )
56
+
57
+ def cjk_string(self, document):
58
+ cjk_char_count = sum([1 if self._is_cjk_char(c) else 0 for c in document])
59
+ return cjk_char_count / len(document) >= 0.2
60
+
61
+ def html_document(self, document):
62
+ return (
63
+ len(list(re.finditer(DocumentDetector._html_pattern, document)))
64
+ > DocumentDetector._min_html_tags
65
+ )
66
+
67
+ def discover(self, name, series):
68
+ #
69
+ # very basic detection of a document. If the document is CJK then we use only the document length
70
+ # otherwise we split on whitespace and confirm that there are word-like strings
71
+ #
72
+ if series.dtype == "object":
73
+ null_series = series.loc[~series.isnull()]
74
+ first_non_null_document = null_series.iloc[0]
75
+
76
+ if isinstance(first_non_null_document, str):
77
+
78
+ is_html = self.html_document(first_non_null_document)
79
+
80
+ if self.cjk_string(first_non_null_document):
81
+ if (
82
+ len(first_non_null_document)
83
+ >= DocumentDetector._min_cjk_chars_for_document
84
+ ):
85
+ tf = DocumentTypedFeature.build(
86
+ name, series, is_cjk=True, is_html=is_html
87
+ )
88
+ logger.debug(
89
+ "type discovery on CJK column [{}]/[{}] found to be a document".format(
90
+ name, series.dtype
91
+ )
92
+ )
93
+ return tf
94
+ else:
95
+ # find rows with above average length
96
+ above_avg_series = null_series.loc[
97
+ null_series.str.len() >= null_series.str.len().mean()
98
+ ]
99
+
100
+ # take a sample. max 500 docs
101
+ above_avg_series_sample = above_avg_series.sample(
102
+ n=min(500, len(above_avg_series))
103
+ )
104
+
105
+ # if all of the samples have more than min_words tokens..
106
+ mean_number_of_words = (
107
+ above_avg_series_sample.str.split().str.len().mean()
108
+ )
109
+ if mean_number_of_words > DocumentDetector._min_words:
110
+ if (
111
+ mean_number_of_words < 15
112
+ and above_avg_series_sample.str.count(",").mean()
113
+ / mean_number_of_words
114
+ > 0.1
115
+ ):
116
+ # many commas probably means address type
117
+ logger.debug(
118
+ "type discovery on column [{}]/[{}] looks like an address type".format(
119
+ name, series.dtype
120
+ )
121
+ )
122
+ return AddressTypedFeature.build(name, series)
123
+ else:
124
+ logger.debug(
125
+ "type discovery on non-CJK column [{}]/[{}] found to be a document".format(
126
+ name, series.dtype
127
+ )
128
+ )
129
+
130
+ # previous check of first document for HTML is now refined using longer documents
131
+ is_html = all(
132
+ [
133
+ self.html_document(doc)
134
+ for doc in above_avg_series_sample
135
+ ]
136
+ )
137
+
138
+ return DocumentTypedFeature.build(
139
+ name, series, is_cjk=False, is_html=is_html
140
+ )
141
+
142
+ return False
143
+
144
+
145
+ if __name__ == "__main__":
146
+ dd = DocumentDetector()