oracle-ads 2.13.8__py3-none-any.whl → 2.13.9rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (857) hide show
  1. {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/METADATA +151 -151
  2. oracle_ads-2.13.9rc0.dist-info/RECORD +9 -0
  3. {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/WHEEL +2 -1
  4. {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/entry_points.txt +1 -2
  5. oracle_ads-2.13.9rc0.dist-info/top_level.txt +1 -0
  6. ads/aqua/__init__.py +0 -40
  7. ads/aqua/app.py +0 -506
  8. ads/aqua/cli.py +0 -96
  9. ads/aqua/client/__init__.py +0 -3
  10. ads/aqua/client/client.py +0 -836
  11. ads/aqua/client/openai_client.py +0 -305
  12. ads/aqua/common/__init__.py +0 -5
  13. ads/aqua/common/decorator.py +0 -125
  14. ads/aqua/common/entities.py +0 -266
  15. ads/aqua/common/enums.py +0 -122
  16. ads/aqua/common/errors.py +0 -109
  17. ads/aqua/common/utils.py +0 -1285
  18. ads/aqua/config/__init__.py +0 -4
  19. ads/aqua/config/container_config.py +0 -248
  20. ads/aqua/config/evaluation/__init__.py +0 -4
  21. ads/aqua/config/evaluation/evaluation_service_config.py +0 -147
  22. ads/aqua/config/utils/__init__.py +0 -4
  23. ads/aqua/config/utils/serializer.py +0 -339
  24. ads/aqua/constants.py +0 -114
  25. ads/aqua/data.py +0 -14
  26. ads/aqua/dummy_data/icon.txt +0 -1
  27. ads/aqua/dummy_data/oci_model_deployments.json +0 -56
  28. ads/aqua/dummy_data/oci_models.json +0 -1
  29. ads/aqua/dummy_data/readme.md +0 -26
  30. ads/aqua/evaluation/__init__.py +0 -8
  31. ads/aqua/evaluation/constants.py +0 -53
  32. ads/aqua/evaluation/entities.py +0 -186
  33. ads/aqua/evaluation/errors.py +0 -70
  34. ads/aqua/evaluation/evaluation.py +0 -1814
  35. ads/aqua/extension/__init__.py +0 -42
  36. ads/aqua/extension/aqua_ws_msg_handler.py +0 -76
  37. ads/aqua/extension/base_handler.py +0 -90
  38. ads/aqua/extension/common_handler.py +0 -121
  39. ads/aqua/extension/common_ws_msg_handler.py +0 -36
  40. ads/aqua/extension/deployment_handler.py +0 -298
  41. ads/aqua/extension/deployment_ws_msg_handler.py +0 -54
  42. ads/aqua/extension/errors.py +0 -30
  43. ads/aqua/extension/evaluation_handler.py +0 -129
  44. ads/aqua/extension/evaluation_ws_msg_handler.py +0 -61
  45. ads/aqua/extension/finetune_handler.py +0 -96
  46. ads/aqua/extension/model_handler.py +0 -390
  47. ads/aqua/extension/models/__init__.py +0 -0
  48. ads/aqua/extension/models/ws_models.py +0 -145
  49. ads/aqua/extension/models_ws_msg_handler.py +0 -50
  50. ads/aqua/extension/ui_handler.py +0 -282
  51. ads/aqua/extension/ui_websocket_handler.py +0 -130
  52. ads/aqua/extension/utils.py +0 -133
  53. ads/aqua/finetuning/__init__.py +0 -7
  54. ads/aqua/finetuning/constants.py +0 -23
  55. ads/aqua/finetuning/entities.py +0 -181
  56. ads/aqua/finetuning/finetuning.py +0 -731
  57. ads/aqua/model/__init__.py +0 -8
  58. ads/aqua/model/constants.py +0 -60
  59. ads/aqua/model/entities.py +0 -306
  60. ads/aqua/model/enums.py +0 -30
  61. ads/aqua/model/model.py +0 -2080
  62. ads/aqua/modeldeployment/__init__.py +0 -8
  63. ads/aqua/modeldeployment/constants.py +0 -10
  64. ads/aqua/modeldeployment/deployment.py +0 -1324
  65. ads/aqua/modeldeployment/entities.py +0 -653
  66. ads/aqua/modeldeployment/inference.py +0 -74
  67. ads/aqua/modeldeployment/utils.py +0 -543
  68. ads/aqua/resources/gpu_shapes_index.json +0 -94
  69. ads/aqua/server/__init__.py +0 -4
  70. ads/aqua/server/__main__.py +0 -24
  71. ads/aqua/server/app.py +0 -47
  72. ads/aqua/server/aqua_spec.yml +0 -1291
  73. ads/aqua/training/__init__.py +0 -4
  74. ads/aqua/training/exceptions.py +0 -476
  75. ads/aqua/ui.py +0 -499
  76. ads/automl/__init__.py +0 -9
  77. ads/automl/driver.py +0 -330
  78. ads/automl/provider.py +0 -975
  79. ads/bds/__init__.py +0 -5
  80. ads/bds/auth.py +0 -127
  81. ads/bds/big_data_service.py +0 -255
  82. ads/catalog/__init__.py +0 -19
  83. ads/catalog/model.py +0 -1576
  84. ads/catalog/notebook.py +0 -461
  85. ads/catalog/project.py +0 -468
  86. ads/catalog/summary.py +0 -178
  87. ads/common/__init__.py +0 -11
  88. ads/common/analyzer.py +0 -65
  89. ads/common/artifact/.model-ignore +0 -63
  90. ads/common/artifact/__init__.py +0 -10
  91. ads/common/auth.py +0 -1122
  92. ads/common/card_identifier.py +0 -83
  93. ads/common/config.py +0 -647
  94. ads/common/data.py +0 -165
  95. ads/common/decorator/__init__.py +0 -9
  96. ads/common/decorator/argument_to_case.py +0 -88
  97. ads/common/decorator/deprecate.py +0 -69
  98. ads/common/decorator/require_nonempty_arg.py +0 -65
  99. ads/common/decorator/runtime_dependency.py +0 -178
  100. ads/common/decorator/threaded.py +0 -97
  101. ads/common/decorator/utils.py +0 -35
  102. ads/common/dsc_file_system.py +0 -303
  103. ads/common/error.py +0 -14
  104. ads/common/extended_enum.py +0 -81
  105. ads/common/function/__init__.py +0 -5
  106. ads/common/function/fn_util.py +0 -142
  107. ads/common/function/func_conf.yaml +0 -25
  108. ads/common/ipython.py +0 -76
  109. ads/common/model.py +0 -679
  110. ads/common/model_artifact.py +0 -1759
  111. ads/common/model_artifact_schema.json +0 -107
  112. ads/common/model_export_util.py +0 -664
  113. ads/common/model_metadata.py +0 -24
  114. ads/common/object_storage_details.py +0 -296
  115. ads/common/oci_client.py +0 -175
  116. ads/common/oci_datascience.py +0 -46
  117. ads/common/oci_logging.py +0 -1144
  118. ads/common/oci_mixin.py +0 -957
  119. ads/common/oci_resource.py +0 -136
  120. ads/common/serializer.py +0 -559
  121. ads/common/utils.py +0 -1852
  122. ads/common/word_lists.py +0 -1491
  123. ads/common/work_request.py +0 -189
  124. ads/data_labeling/__init__.py +0 -13
  125. ads/data_labeling/boundingbox.py +0 -253
  126. ads/data_labeling/constants.py +0 -47
  127. ads/data_labeling/data_labeling_service.py +0 -244
  128. ads/data_labeling/interface/__init__.py +0 -5
  129. ads/data_labeling/interface/loader.py +0 -16
  130. ads/data_labeling/interface/parser.py +0 -16
  131. ads/data_labeling/interface/reader.py +0 -23
  132. ads/data_labeling/loader/__init__.py +0 -5
  133. ads/data_labeling/loader/file_loader.py +0 -241
  134. ads/data_labeling/metadata.py +0 -110
  135. ads/data_labeling/mixin/__init__.py +0 -5
  136. ads/data_labeling/mixin/data_labeling.py +0 -232
  137. ads/data_labeling/ner.py +0 -129
  138. ads/data_labeling/parser/__init__.py +0 -5
  139. ads/data_labeling/parser/dls_record_parser.py +0 -388
  140. ads/data_labeling/parser/export_metadata_parser.py +0 -94
  141. ads/data_labeling/parser/export_record_parser.py +0 -473
  142. ads/data_labeling/reader/__init__.py +0 -5
  143. ads/data_labeling/reader/dataset_reader.py +0 -574
  144. ads/data_labeling/reader/dls_record_reader.py +0 -121
  145. ads/data_labeling/reader/export_record_reader.py +0 -62
  146. ads/data_labeling/reader/jsonl_reader.py +0 -75
  147. ads/data_labeling/reader/metadata_reader.py +0 -203
  148. ads/data_labeling/reader/record_reader.py +0 -263
  149. ads/data_labeling/record.py +0 -52
  150. ads/data_labeling/visualizer/__init__.py +0 -5
  151. ads/data_labeling/visualizer/image_visualizer.py +0 -525
  152. ads/data_labeling/visualizer/text_visualizer.py +0 -357
  153. ads/database/__init__.py +0 -5
  154. ads/database/connection.py +0 -338
  155. ads/dataset/__init__.py +0 -10
  156. ads/dataset/capabilities.md +0 -51
  157. ads/dataset/classification_dataset.py +0 -339
  158. ads/dataset/correlation.py +0 -226
  159. ads/dataset/correlation_plot.py +0 -563
  160. ads/dataset/dask_series.py +0 -173
  161. ads/dataset/dataframe_transformer.py +0 -110
  162. ads/dataset/dataset.py +0 -1979
  163. ads/dataset/dataset_browser.py +0 -360
  164. ads/dataset/dataset_with_target.py +0 -995
  165. ads/dataset/exception.py +0 -25
  166. ads/dataset/factory.py +0 -987
  167. ads/dataset/feature_engineering_transformer.py +0 -35
  168. ads/dataset/feature_selection.py +0 -107
  169. ads/dataset/forecasting_dataset.py +0 -26
  170. ads/dataset/helper.py +0 -1450
  171. ads/dataset/label_encoder.py +0 -99
  172. ads/dataset/mixin/__init__.py +0 -5
  173. ads/dataset/mixin/dataset_accessor.py +0 -134
  174. ads/dataset/pipeline.py +0 -58
  175. ads/dataset/plot.py +0 -710
  176. ads/dataset/progress.py +0 -86
  177. ads/dataset/recommendation.py +0 -297
  178. ads/dataset/recommendation_transformer.py +0 -502
  179. ads/dataset/regression_dataset.py +0 -14
  180. ads/dataset/sampled_dataset.py +0 -1050
  181. ads/dataset/target.py +0 -98
  182. ads/dataset/timeseries.py +0 -18
  183. ads/dbmixin/__init__.py +0 -5
  184. ads/dbmixin/db_pandas_accessor.py +0 -153
  185. ads/environment/__init__.py +0 -9
  186. ads/environment/ml_runtime.py +0 -66
  187. ads/evaluations/README.md +0 -14
  188. ads/evaluations/__init__.py +0 -109
  189. ads/evaluations/evaluation_plot.py +0 -983
  190. ads/evaluations/evaluator.py +0 -1334
  191. ads/evaluations/statistical_metrics.py +0 -543
  192. ads/experiments/__init__.py +0 -9
  193. ads/experiments/capabilities.md +0 -0
  194. ads/explanations/__init__.py +0 -21
  195. ads/explanations/base_explainer.py +0 -142
  196. ads/explanations/capabilities.md +0 -83
  197. ads/explanations/explainer.py +0 -190
  198. ads/explanations/mlx_global_explainer.py +0 -1050
  199. ads/explanations/mlx_interface.py +0 -386
  200. ads/explanations/mlx_local_explainer.py +0 -287
  201. ads/explanations/mlx_whatif_explainer.py +0 -201
  202. ads/feature_engineering/__init__.py +0 -20
  203. ads/feature_engineering/accessor/__init__.py +0 -5
  204. ads/feature_engineering/accessor/dataframe_accessor.py +0 -535
  205. ads/feature_engineering/accessor/mixin/__init__.py +0 -5
  206. ads/feature_engineering/accessor/mixin/correlation.py +0 -166
  207. ads/feature_engineering/accessor/mixin/eda_mixin.py +0 -266
  208. ads/feature_engineering/accessor/mixin/eda_mixin_series.py +0 -85
  209. ads/feature_engineering/accessor/mixin/feature_types_mixin.py +0 -211
  210. ads/feature_engineering/accessor/mixin/utils.py +0 -65
  211. ads/feature_engineering/accessor/series_accessor.py +0 -431
  212. ads/feature_engineering/adsimage/__init__.py +0 -5
  213. ads/feature_engineering/adsimage/image.py +0 -192
  214. ads/feature_engineering/adsimage/image_reader.py +0 -170
  215. ads/feature_engineering/adsimage/interface/__init__.py +0 -5
  216. ads/feature_engineering/adsimage/interface/reader.py +0 -19
  217. ads/feature_engineering/adsstring/__init__.py +0 -7
  218. ads/feature_engineering/adsstring/oci_language/__init__.py +0 -8
  219. ads/feature_engineering/adsstring/string/__init__.py +0 -8
  220. ads/feature_engineering/data_schema.json +0 -57
  221. ads/feature_engineering/dataset/__init__.py +0 -5
  222. ads/feature_engineering/dataset/zip_code_data.py +0 -42062
  223. ads/feature_engineering/exceptions.py +0 -40
  224. ads/feature_engineering/feature_type/__init__.py +0 -133
  225. ads/feature_engineering/feature_type/address.py +0 -184
  226. ads/feature_engineering/feature_type/adsstring/__init__.py +0 -5
  227. ads/feature_engineering/feature_type/adsstring/common_regex_mixin.py +0 -164
  228. ads/feature_engineering/feature_type/adsstring/oci_language.py +0 -93
  229. ads/feature_engineering/feature_type/adsstring/parsers/__init__.py +0 -5
  230. ads/feature_engineering/feature_type/adsstring/parsers/base.py +0 -47
  231. ads/feature_engineering/feature_type/adsstring/parsers/nltk_parser.py +0 -96
  232. ads/feature_engineering/feature_type/adsstring/parsers/spacy_parser.py +0 -221
  233. ads/feature_engineering/feature_type/adsstring/string.py +0 -258
  234. ads/feature_engineering/feature_type/base.py +0 -58
  235. ads/feature_engineering/feature_type/boolean.py +0 -183
  236. ads/feature_engineering/feature_type/category.py +0 -146
  237. ads/feature_engineering/feature_type/constant.py +0 -137
  238. ads/feature_engineering/feature_type/continuous.py +0 -151
  239. ads/feature_engineering/feature_type/creditcard.py +0 -314
  240. ads/feature_engineering/feature_type/datetime.py +0 -190
  241. ads/feature_engineering/feature_type/discrete.py +0 -134
  242. ads/feature_engineering/feature_type/document.py +0 -43
  243. ads/feature_engineering/feature_type/gis.py +0 -251
  244. ads/feature_engineering/feature_type/handler/__init__.py +0 -5
  245. ads/feature_engineering/feature_type/handler/feature_validator.py +0 -524
  246. ads/feature_engineering/feature_type/handler/feature_warning.py +0 -319
  247. ads/feature_engineering/feature_type/handler/warnings.py +0 -128
  248. ads/feature_engineering/feature_type/integer.py +0 -142
  249. ads/feature_engineering/feature_type/ip_address.py +0 -144
  250. ads/feature_engineering/feature_type/ip_address_v4.py +0 -138
  251. ads/feature_engineering/feature_type/ip_address_v6.py +0 -138
  252. ads/feature_engineering/feature_type/lat_long.py +0 -256
  253. ads/feature_engineering/feature_type/object.py +0 -43
  254. ads/feature_engineering/feature_type/ordinal.py +0 -132
  255. ads/feature_engineering/feature_type/phone_number.py +0 -135
  256. ads/feature_engineering/feature_type/string.py +0 -171
  257. ads/feature_engineering/feature_type/text.py +0 -93
  258. ads/feature_engineering/feature_type/unknown.py +0 -43
  259. ads/feature_engineering/feature_type/zip_code.py +0 -164
  260. ads/feature_engineering/feature_type_manager.py +0 -406
  261. ads/feature_engineering/schema.py +0 -795
  262. ads/feature_engineering/utils.py +0 -245
  263. ads/feature_store/.readthedocs.yaml +0 -19
  264. ads/feature_store/README.md +0 -65
  265. ads/feature_store/__init__.py +0 -9
  266. ads/feature_store/common/__init__.py +0 -0
  267. ads/feature_store/common/enums.py +0 -339
  268. ads/feature_store/common/exceptions.py +0 -18
  269. ads/feature_store/common/spark_session_singleton.py +0 -125
  270. ads/feature_store/common/utils/__init__.py +0 -0
  271. ads/feature_store/common/utils/base64_encoder_decoder.py +0 -72
  272. ads/feature_store/common/utils/feature_schema_mapper.py +0 -283
  273. ads/feature_store/common/utils/transformation_utils.py +0 -82
  274. ads/feature_store/common/utils/utility.py +0 -403
  275. ads/feature_store/data_validation/__init__.py +0 -0
  276. ads/feature_store/data_validation/great_expectation.py +0 -129
  277. ads/feature_store/dataset.py +0 -1230
  278. ads/feature_store/dataset_job.py +0 -530
  279. ads/feature_store/docs/Dockerfile +0 -7
  280. ads/feature_store/docs/Makefile +0 -44
  281. ads/feature_store/docs/conf.py +0 -28
  282. ads/feature_store/docs/requirements.txt +0 -14
  283. ads/feature_store/docs/source/ads.feature_store.query.rst +0 -20
  284. ads/feature_store/docs/source/cicd.rst +0 -137
  285. ads/feature_store/docs/source/conf.py +0 -86
  286. ads/feature_store/docs/source/data_versioning.rst +0 -33
  287. ads/feature_store/docs/source/dataset.rst +0 -388
  288. ads/feature_store/docs/source/dataset_job.rst +0 -27
  289. ads/feature_store/docs/source/demo.rst +0 -70
  290. ads/feature_store/docs/source/entity.rst +0 -78
  291. ads/feature_store/docs/source/feature_group.rst +0 -624
  292. ads/feature_store/docs/source/feature_group_job.rst +0 -29
  293. ads/feature_store/docs/source/feature_store.rst +0 -122
  294. ads/feature_store/docs/source/feature_store_class.rst +0 -123
  295. ads/feature_store/docs/source/feature_validation.rst +0 -66
  296. ads/feature_store/docs/source/figures/cicd.png +0 -0
  297. ads/feature_store/docs/source/figures/data_validation.png +0 -0
  298. ads/feature_store/docs/source/figures/data_versioning.png +0 -0
  299. ads/feature_store/docs/source/figures/dataset.gif +0 -0
  300. ads/feature_store/docs/source/figures/dataset.png +0 -0
  301. ads/feature_store/docs/source/figures/dataset_lineage.png +0 -0
  302. ads/feature_store/docs/source/figures/dataset_statistics.png +0 -0
  303. ads/feature_store/docs/source/figures/dataset_statistics_viz.png +0 -0
  304. ads/feature_store/docs/source/figures/dataset_validation_results.png +0 -0
  305. ads/feature_store/docs/source/figures/dataset_validation_summary.png +0 -0
  306. ads/feature_store/docs/source/figures/drift_monitoring.png +0 -0
  307. ads/feature_store/docs/source/figures/entity.png +0 -0
  308. ads/feature_store/docs/source/figures/feature_group.png +0 -0
  309. ads/feature_store/docs/source/figures/feature_group_lineage.png +0 -0
  310. ads/feature_store/docs/source/figures/feature_group_statistics_viz.png +0 -0
  311. ads/feature_store/docs/source/figures/feature_store_deployment.png +0 -0
  312. ads/feature_store/docs/source/figures/feature_store_overview.png +0 -0
  313. ads/feature_store/docs/source/figures/featuregroup.gif +0 -0
  314. ads/feature_store/docs/source/figures/lineage_d1.png +0 -0
  315. ads/feature_store/docs/source/figures/lineage_d2.png +0 -0
  316. ads/feature_store/docs/source/figures/lineage_fg.png +0 -0
  317. ads/feature_store/docs/source/figures/logo-dark-mode.png +0 -0
  318. ads/feature_store/docs/source/figures/logo-light-mode.png +0 -0
  319. ads/feature_store/docs/source/figures/overview.png +0 -0
  320. ads/feature_store/docs/source/figures/resource_manager.png +0 -0
  321. ads/feature_store/docs/source/figures/resource_manager_feature_store_stack.png +0 -0
  322. ads/feature_store/docs/source/figures/resource_manager_home.png +0 -0
  323. ads/feature_store/docs/source/figures/stats_1.png +0 -0
  324. ads/feature_store/docs/source/figures/stats_2.png +0 -0
  325. ads/feature_store/docs/source/figures/stats_d.png +0 -0
  326. ads/feature_store/docs/source/figures/stats_fg.png +0 -0
  327. ads/feature_store/docs/source/figures/transformation.png +0 -0
  328. ads/feature_store/docs/source/figures/transformations.gif +0 -0
  329. ads/feature_store/docs/source/figures/validation.png +0 -0
  330. ads/feature_store/docs/source/figures/validation_fg.png +0 -0
  331. ads/feature_store/docs/source/figures/validation_results.png +0 -0
  332. ads/feature_store/docs/source/figures/validation_summary.png +0 -0
  333. ads/feature_store/docs/source/index.rst +0 -81
  334. ads/feature_store/docs/source/module.rst +0 -8
  335. ads/feature_store/docs/source/notebook.rst +0 -94
  336. ads/feature_store/docs/source/overview.rst +0 -47
  337. ads/feature_store/docs/source/quickstart.rst +0 -176
  338. ads/feature_store/docs/source/release_notes.rst +0 -194
  339. ads/feature_store/docs/source/setup_feature_store.rst +0 -81
  340. ads/feature_store/docs/source/statistics.rst +0 -58
  341. ads/feature_store/docs/source/transformation.rst +0 -199
  342. ads/feature_store/docs/source/ui.rst +0 -65
  343. ads/feature_store/docs/source/user_guides.setup.feature_store_operator.rst +0 -66
  344. ads/feature_store/docs/source/user_guides.setup.helm_chart.rst +0 -192
  345. ads/feature_store/docs/source/user_guides.setup.terraform.rst +0 -338
  346. ads/feature_store/entity.py +0 -718
  347. ads/feature_store/execution_strategy/__init__.py +0 -0
  348. ads/feature_store/execution_strategy/delta_lake/__init__.py +0 -0
  349. ads/feature_store/execution_strategy/delta_lake/delta_lake_service.py +0 -375
  350. ads/feature_store/execution_strategy/engine/__init__.py +0 -0
  351. ads/feature_store/execution_strategy/engine/spark_engine.py +0 -316
  352. ads/feature_store/execution_strategy/execution_strategy.py +0 -113
  353. ads/feature_store/execution_strategy/execution_strategy_provider.py +0 -47
  354. ads/feature_store/execution_strategy/spark/__init__.py +0 -0
  355. ads/feature_store/execution_strategy/spark/spark_execution.py +0 -618
  356. ads/feature_store/feature.py +0 -192
  357. ads/feature_store/feature_group.py +0 -1494
  358. ads/feature_store/feature_group_expectation.py +0 -346
  359. ads/feature_store/feature_group_job.py +0 -602
  360. ads/feature_store/feature_lineage/__init__.py +0 -0
  361. ads/feature_store/feature_lineage/graphviz_service.py +0 -180
  362. ads/feature_store/feature_option_details.py +0 -50
  363. ads/feature_store/feature_statistics/__init__.py +0 -0
  364. ads/feature_store/feature_statistics/statistics_service.py +0 -99
  365. ads/feature_store/feature_store.py +0 -699
  366. ads/feature_store/feature_store_registrar.py +0 -518
  367. ads/feature_store/input_feature_detail.py +0 -149
  368. ads/feature_store/mixin/__init__.py +0 -4
  369. ads/feature_store/mixin/oci_feature_store.py +0 -145
  370. ads/feature_store/model_details.py +0 -73
  371. ads/feature_store/query/__init__.py +0 -0
  372. ads/feature_store/query/filter.py +0 -266
  373. ads/feature_store/query/generator/__init__.py +0 -0
  374. ads/feature_store/query/generator/query_generator.py +0 -298
  375. ads/feature_store/query/join.py +0 -161
  376. ads/feature_store/query/query.py +0 -403
  377. ads/feature_store/query/validator/__init__.py +0 -0
  378. ads/feature_store/query/validator/query_validator.py +0 -57
  379. ads/feature_store/response/__init__.py +0 -0
  380. ads/feature_store/response/response_builder.py +0 -68
  381. ads/feature_store/service/__init__.py +0 -0
  382. ads/feature_store/service/oci_dataset.py +0 -139
  383. ads/feature_store/service/oci_dataset_job.py +0 -199
  384. ads/feature_store/service/oci_entity.py +0 -125
  385. ads/feature_store/service/oci_feature_group.py +0 -164
  386. ads/feature_store/service/oci_feature_group_job.py +0 -214
  387. ads/feature_store/service/oci_feature_store.py +0 -182
  388. ads/feature_store/service/oci_lineage.py +0 -87
  389. ads/feature_store/service/oci_transformation.py +0 -104
  390. ads/feature_store/statistics/__init__.py +0 -0
  391. ads/feature_store/statistics/abs_feature_value.py +0 -49
  392. ads/feature_store/statistics/charts/__init__.py +0 -0
  393. ads/feature_store/statistics/charts/abstract_feature_plot.py +0 -37
  394. ads/feature_store/statistics/charts/box_plot.py +0 -148
  395. ads/feature_store/statistics/charts/frequency_distribution.py +0 -65
  396. ads/feature_store/statistics/charts/probability_distribution.py +0 -68
  397. ads/feature_store/statistics/charts/top_k_frequent_elements.py +0 -98
  398. ads/feature_store/statistics/feature_stat.py +0 -126
  399. ads/feature_store/statistics/generic_feature_value.py +0 -33
  400. ads/feature_store/statistics/statistics.py +0 -41
  401. ads/feature_store/statistics_config.py +0 -101
  402. ads/feature_store/templates/feature_store_template.yaml +0 -45
  403. ads/feature_store/transformation.py +0 -499
  404. ads/feature_store/validation_output.py +0 -57
  405. ads/hpo/__init__.py +0 -9
  406. ads/hpo/_imports.py +0 -91
  407. ads/hpo/ads_search_space.py +0 -439
  408. ads/hpo/distributions.py +0 -325
  409. ads/hpo/objective.py +0 -280
  410. ads/hpo/search_cv.py +0 -1657
  411. ads/hpo/stopping_criterion.py +0 -75
  412. ads/hpo/tuner_artifact.py +0 -413
  413. ads/hpo/utils.py +0 -91
  414. ads/hpo/validation.py +0 -140
  415. ads/hpo/visualization/__init__.py +0 -5
  416. ads/hpo/visualization/_contour.py +0 -23
  417. ads/hpo/visualization/_edf.py +0 -20
  418. ads/hpo/visualization/_intermediate_values.py +0 -21
  419. ads/hpo/visualization/_optimization_history.py +0 -25
  420. ads/hpo/visualization/_parallel_coordinate.py +0 -169
  421. ads/hpo/visualization/_param_importances.py +0 -26
  422. ads/jobs/__init__.py +0 -53
  423. ads/jobs/ads_job.py +0 -663
  424. ads/jobs/builders/__init__.py +0 -5
  425. ads/jobs/builders/base.py +0 -156
  426. ads/jobs/builders/infrastructure/__init__.py +0 -6
  427. ads/jobs/builders/infrastructure/base.py +0 -165
  428. ads/jobs/builders/infrastructure/dataflow.py +0 -1252
  429. ads/jobs/builders/infrastructure/dsc_job.py +0 -1894
  430. ads/jobs/builders/infrastructure/dsc_job_runtime.py +0 -1233
  431. ads/jobs/builders/infrastructure/utils.py +0 -65
  432. ads/jobs/builders/runtimes/__init__.py +0 -5
  433. ads/jobs/builders/runtimes/artifact.py +0 -338
  434. ads/jobs/builders/runtimes/base.py +0 -325
  435. ads/jobs/builders/runtimes/container_runtime.py +0 -242
  436. ads/jobs/builders/runtimes/python_runtime.py +0 -1016
  437. ads/jobs/builders/runtimes/pytorch_runtime.py +0 -204
  438. ads/jobs/cli.py +0 -104
  439. ads/jobs/env_var_parser.py +0 -131
  440. ads/jobs/extension.py +0 -160
  441. ads/jobs/schema/__init__.py +0 -5
  442. ads/jobs/schema/infrastructure_schema.json +0 -116
  443. ads/jobs/schema/job_schema.json +0 -42
  444. ads/jobs/schema/runtime_schema.json +0 -183
  445. ads/jobs/schema/validator.py +0 -141
  446. ads/jobs/serializer.py +0 -296
  447. ads/jobs/templates/__init__.py +0 -5
  448. ads/jobs/templates/container.py +0 -6
  449. ads/jobs/templates/driver_notebook.py +0 -177
  450. ads/jobs/templates/driver_oci.py +0 -500
  451. ads/jobs/templates/driver_python.py +0 -48
  452. ads/jobs/templates/driver_pytorch.py +0 -852
  453. ads/jobs/templates/driver_utils.py +0 -615
  454. ads/jobs/templates/hostname_from_env.c +0 -55
  455. ads/jobs/templates/oci_metrics.py +0 -181
  456. ads/jobs/utils.py +0 -104
  457. ads/llm/__init__.py +0 -28
  458. ads/llm/autogen/__init__.py +0 -2
  459. ads/llm/autogen/constants.py +0 -15
  460. ads/llm/autogen/reports/__init__.py +0 -2
  461. ads/llm/autogen/reports/base.py +0 -67
  462. ads/llm/autogen/reports/data.py +0 -103
  463. ads/llm/autogen/reports/session.py +0 -526
  464. ads/llm/autogen/reports/templates/chat_box.html +0 -13
  465. ads/llm/autogen/reports/templates/chat_box_lt.html +0 -5
  466. ads/llm/autogen/reports/templates/chat_box_rt.html +0 -6
  467. ads/llm/autogen/reports/utils.py +0 -56
  468. ads/llm/autogen/v02/__init__.py +0 -4
  469. ads/llm/autogen/v02/client.py +0 -295
  470. ads/llm/autogen/v02/log_handlers/__init__.py +0 -2
  471. ads/llm/autogen/v02/log_handlers/oci_file_handler.py +0 -83
  472. ads/llm/autogen/v02/loggers/__init__.py +0 -6
  473. ads/llm/autogen/v02/loggers/metric_logger.py +0 -320
  474. ads/llm/autogen/v02/loggers/session_logger.py +0 -580
  475. ads/llm/autogen/v02/loggers/utils.py +0 -86
  476. ads/llm/autogen/v02/runtime_logging.py +0 -163
  477. ads/llm/chain.py +0 -268
  478. ads/llm/chat_template.py +0 -31
  479. ads/llm/deploy.py +0 -63
  480. ads/llm/guardrails/__init__.py +0 -5
  481. ads/llm/guardrails/base.py +0 -442
  482. ads/llm/guardrails/huggingface.py +0 -44
  483. ads/llm/langchain/__init__.py +0 -5
  484. ads/llm/langchain/plugins/__init__.py +0 -5
  485. ads/llm/langchain/plugins/chat_models/__init__.py +0 -5
  486. ads/llm/langchain/plugins/chat_models/oci_data_science.py +0 -1027
  487. ads/llm/langchain/plugins/embeddings/__init__.py +0 -4
  488. ads/llm/langchain/plugins/embeddings/oci_data_science_model_deployment_endpoint.py +0 -184
  489. ads/llm/langchain/plugins/llms/__init__.py +0 -5
  490. ads/llm/langchain/plugins/llms/oci_data_science_model_deployment_endpoint.py +0 -979
  491. ads/llm/requirements.txt +0 -3
  492. ads/llm/serialize.py +0 -219
  493. ads/llm/serializers/__init__.py +0 -0
  494. ads/llm/serializers/retrieval_qa.py +0 -153
  495. ads/llm/serializers/runnable_parallel.py +0 -27
  496. ads/llm/templates/score_chain.jinja2 +0 -155
  497. ads/llm/templates/tool_chat_template_hermes.jinja +0 -130
  498. ads/llm/templates/tool_chat_template_mistral_parallel.jinja +0 -94
  499. ads/model/__init__.py +0 -52
  500. ads/model/artifact.py +0 -573
  501. ads/model/artifact_downloader.py +0 -254
  502. ads/model/artifact_uploader.py +0 -267
  503. ads/model/base_properties.py +0 -238
  504. ads/model/common/.model-ignore +0 -66
  505. ads/model/common/__init__.py +0 -5
  506. ads/model/common/utils.py +0 -142
  507. ads/model/datascience_model.py +0 -2635
  508. ads/model/deployment/__init__.py +0 -20
  509. ads/model/deployment/common/__init__.py +0 -5
  510. ads/model/deployment/common/utils.py +0 -308
  511. ads/model/deployment/model_deployer.py +0 -466
  512. ads/model/deployment/model_deployment.py +0 -1846
  513. ads/model/deployment/model_deployment_infrastructure.py +0 -671
  514. ads/model/deployment/model_deployment_properties.py +0 -493
  515. ads/model/deployment/model_deployment_runtime.py +0 -838
  516. ads/model/extractor/__init__.py +0 -5
  517. ads/model/extractor/automl_extractor.py +0 -74
  518. ads/model/extractor/embedding_onnx_extractor.py +0 -80
  519. ads/model/extractor/huggingface_extractor.py +0 -88
  520. ads/model/extractor/keras_extractor.py +0 -84
  521. ads/model/extractor/lightgbm_extractor.py +0 -93
  522. ads/model/extractor/model_info_extractor.py +0 -114
  523. ads/model/extractor/model_info_extractor_factory.py +0 -105
  524. ads/model/extractor/pytorch_extractor.py +0 -87
  525. ads/model/extractor/sklearn_extractor.py +0 -112
  526. ads/model/extractor/spark_extractor.py +0 -89
  527. ads/model/extractor/tensorflow_extractor.py +0 -85
  528. ads/model/extractor/xgboost_extractor.py +0 -94
  529. ads/model/framework/__init__.py +0 -5
  530. ads/model/framework/automl_model.py +0 -178
  531. ads/model/framework/embedding_onnx_model.py +0 -438
  532. ads/model/framework/huggingface_model.py +0 -399
  533. ads/model/framework/lightgbm_model.py +0 -266
  534. ads/model/framework/pytorch_model.py +0 -266
  535. ads/model/framework/sklearn_model.py +0 -250
  536. ads/model/framework/spark_model.py +0 -326
  537. ads/model/framework/tensorflow_model.py +0 -254
  538. ads/model/framework/xgboost_model.py +0 -258
  539. ads/model/generic_model.py +0 -3518
  540. ads/model/model_artifact_boilerplate/README.md +0 -381
  541. ads/model/model_artifact_boilerplate/__init__.py +0 -5
  542. ads/model/model_artifact_boilerplate/artifact_introspection_test/__init__.py +0 -5
  543. ads/model/model_artifact_boilerplate/artifact_introspection_test/model_artifact_validate.py +0 -427
  544. ads/model/model_artifact_boilerplate/artifact_introspection_test/requirements.txt +0 -2
  545. ads/model/model_artifact_boilerplate/runtime.yaml +0 -7
  546. ads/model/model_artifact_boilerplate/score.py +0 -61
  547. ads/model/model_file_description_schema.json +0 -68
  548. ads/model/model_introspect.py +0 -331
  549. ads/model/model_metadata.py +0 -1810
  550. ads/model/model_metadata_mixin.py +0 -460
  551. ads/model/model_properties.py +0 -63
  552. ads/model/model_version_set.py +0 -739
  553. ads/model/runtime/__init__.py +0 -5
  554. ads/model/runtime/env_info.py +0 -306
  555. ads/model/runtime/model_deployment_details.py +0 -37
  556. ads/model/runtime/model_provenance_details.py +0 -58
  557. ads/model/runtime/runtime_info.py +0 -81
  558. ads/model/runtime/schemas/inference_env_info_schema.yaml +0 -16
  559. ads/model/runtime/schemas/model_provenance_schema.yaml +0 -36
  560. ads/model/runtime/schemas/training_env_info_schema.yaml +0 -16
  561. ads/model/runtime/utils.py +0 -201
  562. ads/model/serde/__init__.py +0 -5
  563. ads/model/serde/common.py +0 -40
  564. ads/model/serde/model_input.py +0 -547
  565. ads/model/serde/model_serializer.py +0 -1184
  566. ads/model/service/__init__.py +0 -5
  567. ads/model/service/oci_datascience_model.py +0 -1076
  568. ads/model/service/oci_datascience_model_deployment.py +0 -500
  569. ads/model/service/oci_datascience_model_version_set.py +0 -176
  570. ads/model/transformer/__init__.py +0 -5
  571. ads/model/transformer/onnx_transformer.py +0 -324
  572. ads/mysqldb/__init__.py +0 -5
  573. ads/mysqldb/mysql_db.py +0 -227
  574. ads/opctl/__init__.py +0 -18
  575. ads/opctl/anomaly_detection.py +0 -11
  576. ads/opctl/backend/__init__.py +0 -5
  577. ads/opctl/backend/ads_dataflow.py +0 -353
  578. ads/opctl/backend/ads_ml_job.py +0 -710
  579. ads/opctl/backend/ads_ml_pipeline.py +0 -164
  580. ads/opctl/backend/ads_model_deployment.py +0 -209
  581. ads/opctl/backend/base.py +0 -146
  582. ads/opctl/backend/local.py +0 -1053
  583. ads/opctl/backend/marketplace/__init__.py +0 -9
  584. ads/opctl/backend/marketplace/helm_helper.py +0 -173
  585. ads/opctl/backend/marketplace/local_marketplace.py +0 -271
  586. ads/opctl/backend/marketplace/marketplace_backend_runner.py +0 -71
  587. ads/opctl/backend/marketplace/marketplace_operator_interface.py +0 -44
  588. ads/opctl/backend/marketplace/marketplace_operator_runner.py +0 -24
  589. ads/opctl/backend/marketplace/marketplace_utils.py +0 -212
  590. ads/opctl/backend/marketplace/models/__init__.py +0 -5
  591. ads/opctl/backend/marketplace/models/bearer_token.py +0 -94
  592. ads/opctl/backend/marketplace/models/marketplace_type.py +0 -70
  593. ads/opctl/backend/marketplace/models/ocir_details.py +0 -56
  594. ads/opctl/backend/marketplace/prerequisite_checker.py +0 -238
  595. ads/opctl/cli.py +0 -707
  596. ads/opctl/cmds.py +0 -869
  597. ads/opctl/conda/__init__.py +0 -5
  598. ads/opctl/conda/cli.py +0 -193
  599. ads/opctl/conda/cmds.py +0 -749
  600. ads/opctl/conda/config.yaml +0 -34
  601. ads/opctl/conda/manifest_template.yaml +0 -13
  602. ads/opctl/conda/multipart_uploader.py +0 -188
  603. ads/opctl/conda/pack.py +0 -89
  604. ads/opctl/config/__init__.py +0 -5
  605. ads/opctl/config/base.py +0 -57
  606. ads/opctl/config/diagnostics/__init__.py +0 -5
  607. ads/opctl/config/diagnostics/distributed/default_requirements_config.yaml +0 -62
  608. ads/opctl/config/merger.py +0 -255
  609. ads/opctl/config/resolver.py +0 -297
  610. ads/opctl/config/utils.py +0 -79
  611. ads/opctl/config/validator.py +0 -17
  612. ads/opctl/config/versioner.py +0 -68
  613. ads/opctl/config/yaml_parsers/__init__.py +0 -7
  614. ads/opctl/config/yaml_parsers/base.py +0 -58
  615. ads/opctl/config/yaml_parsers/distributed/__init__.py +0 -7
  616. ads/opctl/config/yaml_parsers/distributed/yaml_parser.py +0 -201
  617. ads/opctl/constants.py +0 -66
  618. ads/opctl/decorator/__init__.py +0 -5
  619. ads/opctl/decorator/common.py +0 -129
  620. ads/opctl/diagnostics/__init__.py +0 -5
  621. ads/opctl/diagnostics/__main__.py +0 -25
  622. ads/opctl/diagnostics/check_distributed_job_requirements.py +0 -212
  623. ads/opctl/diagnostics/check_requirements.py +0 -144
  624. ads/opctl/diagnostics/requirement_exception.py +0 -9
  625. ads/opctl/distributed/README.md +0 -109
  626. ads/opctl/distributed/__init__.py +0 -5
  627. ads/opctl/distributed/certificates.py +0 -32
  628. ads/opctl/distributed/cli.py +0 -207
  629. ads/opctl/distributed/cmds.py +0 -731
  630. ads/opctl/distributed/common/__init__.py +0 -5
  631. ads/opctl/distributed/common/abstract_cluster_provider.py +0 -449
  632. ads/opctl/distributed/common/abstract_framework_spec_builder.py +0 -88
  633. ads/opctl/distributed/common/cluster_config_helper.py +0 -103
  634. ads/opctl/distributed/common/cluster_provider_factory.py +0 -21
  635. ads/opctl/distributed/common/cluster_runner.py +0 -54
  636. ads/opctl/distributed/common/framework_factory.py +0 -29
  637. ads/opctl/docker/Dockerfile.job +0 -103
  638. ads/opctl/docker/Dockerfile.job.arm +0 -107
  639. ads/opctl/docker/Dockerfile.job.gpu +0 -175
  640. ads/opctl/docker/base-env.yaml +0 -13
  641. ads/opctl/docker/cuda.repo +0 -6
  642. ads/opctl/docker/operator/.dockerignore +0 -0
  643. ads/opctl/docker/operator/Dockerfile +0 -41
  644. ads/opctl/docker/operator/Dockerfile.gpu +0 -85
  645. ads/opctl/docker/operator/cuda.repo +0 -6
  646. ads/opctl/docker/operator/environment.yaml +0 -8
  647. ads/opctl/forecast.py +0 -11
  648. ads/opctl/index.yaml +0 -3
  649. ads/opctl/model/__init__.py +0 -5
  650. ads/opctl/model/cli.py +0 -65
  651. ads/opctl/model/cmds.py +0 -73
  652. ads/opctl/operator/README.md +0 -4
  653. ads/opctl/operator/__init__.py +0 -31
  654. ads/opctl/operator/cli.py +0 -344
  655. ads/opctl/operator/cmd.py +0 -596
  656. ads/opctl/operator/common/__init__.py +0 -5
  657. ads/opctl/operator/common/backend_factory.py +0 -460
  658. ads/opctl/operator/common/const.py +0 -27
  659. ads/opctl/operator/common/data/synthetic.csv +0 -16001
  660. ads/opctl/operator/common/dictionary_merger.py +0 -148
  661. ads/opctl/operator/common/errors.py +0 -42
  662. ads/opctl/operator/common/operator_config.py +0 -99
  663. ads/opctl/operator/common/operator_loader.py +0 -811
  664. ads/opctl/operator/common/operator_schema.yaml +0 -130
  665. ads/opctl/operator/common/operator_yaml_generator.py +0 -152
  666. ads/opctl/operator/common/utils.py +0 -208
  667. ads/opctl/operator/lowcode/__init__.py +0 -5
  668. ads/opctl/operator/lowcode/anomaly/MLoperator +0 -16
  669. ads/opctl/operator/lowcode/anomaly/README.md +0 -207
  670. ads/opctl/operator/lowcode/anomaly/__init__.py +0 -5
  671. ads/opctl/operator/lowcode/anomaly/__main__.py +0 -103
  672. ads/opctl/operator/lowcode/anomaly/cmd.py +0 -35
  673. ads/opctl/operator/lowcode/anomaly/const.py +0 -167
  674. ads/opctl/operator/lowcode/anomaly/environment.yaml +0 -10
  675. ads/opctl/operator/lowcode/anomaly/model/__init__.py +0 -5
  676. ads/opctl/operator/lowcode/anomaly/model/anomaly_dataset.py +0 -146
  677. ads/opctl/operator/lowcode/anomaly/model/anomaly_merlion.py +0 -162
  678. ads/opctl/operator/lowcode/anomaly/model/automlx.py +0 -99
  679. ads/opctl/operator/lowcode/anomaly/model/autots.py +0 -115
  680. ads/opctl/operator/lowcode/anomaly/model/base_model.py +0 -404
  681. ads/opctl/operator/lowcode/anomaly/model/factory.py +0 -110
  682. ads/opctl/operator/lowcode/anomaly/model/isolationforest.py +0 -78
  683. ads/opctl/operator/lowcode/anomaly/model/oneclasssvm.py +0 -78
  684. ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py +0 -120
  685. ads/opctl/operator/lowcode/anomaly/model/tods.py +0 -119
  686. ads/opctl/operator/lowcode/anomaly/operator_config.py +0 -127
  687. ads/opctl/operator/lowcode/anomaly/schema.yaml +0 -401
  688. ads/opctl/operator/lowcode/anomaly/utils.py +0 -88
  689. ads/opctl/operator/lowcode/common/__init__.py +0 -5
  690. ads/opctl/operator/lowcode/common/const.py +0 -10
  691. ads/opctl/operator/lowcode/common/data.py +0 -116
  692. ads/opctl/operator/lowcode/common/errors.py +0 -47
  693. ads/opctl/operator/lowcode/common/transformations.py +0 -296
  694. ads/opctl/operator/lowcode/common/utils.py +0 -293
  695. ads/opctl/operator/lowcode/feature_store_marketplace/MLoperator +0 -13
  696. ads/opctl/operator/lowcode/feature_store_marketplace/README.md +0 -30
  697. ads/opctl/operator/lowcode/feature_store_marketplace/__init__.py +0 -5
  698. ads/opctl/operator/lowcode/feature_store_marketplace/__main__.py +0 -116
  699. ads/opctl/operator/lowcode/feature_store_marketplace/cmd.py +0 -85
  700. ads/opctl/operator/lowcode/feature_store_marketplace/const.py +0 -15
  701. ads/opctl/operator/lowcode/feature_store_marketplace/environment.yaml +0 -0
  702. ads/opctl/operator/lowcode/feature_store_marketplace/models/__init__.py +0 -4
  703. ads/opctl/operator/lowcode/feature_store_marketplace/models/apigw_config.py +0 -32
  704. ads/opctl/operator/lowcode/feature_store_marketplace/models/db_config.py +0 -43
  705. ads/opctl/operator/lowcode/feature_store_marketplace/models/mysql_config.py +0 -120
  706. ads/opctl/operator/lowcode/feature_store_marketplace/models/serializable_yaml_model.py +0 -34
  707. ads/opctl/operator/lowcode/feature_store_marketplace/operator_utils.py +0 -386
  708. ads/opctl/operator/lowcode/feature_store_marketplace/schema.yaml +0 -160
  709. ads/opctl/operator/lowcode/forecast/MLoperator +0 -25
  710. ads/opctl/operator/lowcode/forecast/README.md +0 -209
  711. ads/opctl/operator/lowcode/forecast/__init__.py +0 -5
  712. ads/opctl/operator/lowcode/forecast/__main__.py +0 -89
  713. ads/opctl/operator/lowcode/forecast/cmd.py +0 -40
  714. ads/opctl/operator/lowcode/forecast/const.py +0 -92
  715. ads/opctl/operator/lowcode/forecast/environment.yaml +0 -20
  716. ads/opctl/operator/lowcode/forecast/errors.py +0 -26
  717. ads/opctl/operator/lowcode/forecast/model/__init__.py +0 -5
  718. ads/opctl/operator/lowcode/forecast/model/arima.py +0 -279
  719. ads/opctl/operator/lowcode/forecast/model/automlx.py +0 -542
  720. ads/opctl/operator/lowcode/forecast/model/autots.py +0 -312
  721. ads/opctl/operator/lowcode/forecast/model/base_model.py +0 -863
  722. ads/opctl/operator/lowcode/forecast/model/factory.py +0 -106
  723. ads/opctl/operator/lowcode/forecast/model/forecast_datasets.py +0 -492
  724. ads/opctl/operator/lowcode/forecast/model/ml_forecast.py +0 -243
  725. ads/opctl/operator/lowcode/forecast/model/neuralprophet.py +0 -486
  726. ads/opctl/operator/lowcode/forecast/model/prophet.py +0 -445
  727. ads/opctl/operator/lowcode/forecast/model_evaluator.py +0 -244
  728. ads/opctl/operator/lowcode/forecast/operator_config.py +0 -234
  729. ads/opctl/operator/lowcode/forecast/schema.yaml +0 -506
  730. ads/opctl/operator/lowcode/forecast/utils.py +0 -413
  731. ads/opctl/operator/lowcode/forecast/whatifserve/__init__.py +0 -7
  732. ads/opctl/operator/lowcode/forecast/whatifserve/deployment_manager.py +0 -285
  733. ads/opctl/operator/lowcode/forecast/whatifserve/score.py +0 -246
  734. ads/opctl/operator/lowcode/pii/MLoperator +0 -17
  735. ads/opctl/operator/lowcode/pii/README.md +0 -208
  736. ads/opctl/operator/lowcode/pii/__init__.py +0 -5
  737. ads/opctl/operator/lowcode/pii/__main__.py +0 -78
  738. ads/opctl/operator/lowcode/pii/cmd.py +0 -39
  739. ads/opctl/operator/lowcode/pii/constant.py +0 -84
  740. ads/opctl/operator/lowcode/pii/environment.yaml +0 -17
  741. ads/opctl/operator/lowcode/pii/errors.py +0 -27
  742. ads/opctl/operator/lowcode/pii/model/__init__.py +0 -5
  743. ads/opctl/operator/lowcode/pii/model/factory.py +0 -82
  744. ads/opctl/operator/lowcode/pii/model/guardrails.py +0 -167
  745. ads/opctl/operator/lowcode/pii/model/pii.py +0 -145
  746. ads/opctl/operator/lowcode/pii/model/processor/__init__.py +0 -34
  747. ads/opctl/operator/lowcode/pii/model/processor/email_replacer.py +0 -34
  748. ads/opctl/operator/lowcode/pii/model/processor/mbi_replacer.py +0 -35
  749. ads/opctl/operator/lowcode/pii/model/processor/name_replacer.py +0 -225
  750. ads/opctl/operator/lowcode/pii/model/processor/number_replacer.py +0 -73
  751. ads/opctl/operator/lowcode/pii/model/processor/remover.py +0 -26
  752. ads/opctl/operator/lowcode/pii/model/report.py +0 -487
  753. ads/opctl/operator/lowcode/pii/operator_config.py +0 -95
  754. ads/opctl/operator/lowcode/pii/schema.yaml +0 -108
  755. ads/opctl/operator/lowcode/pii/utils.py +0 -43
  756. ads/opctl/operator/lowcode/recommender/MLoperator +0 -16
  757. ads/opctl/operator/lowcode/recommender/README.md +0 -206
  758. ads/opctl/operator/lowcode/recommender/__init__.py +0 -5
  759. ads/opctl/operator/lowcode/recommender/__main__.py +0 -82
  760. ads/opctl/operator/lowcode/recommender/cmd.py +0 -33
  761. ads/opctl/operator/lowcode/recommender/constant.py +0 -30
  762. ads/opctl/operator/lowcode/recommender/environment.yaml +0 -11
  763. ads/opctl/operator/lowcode/recommender/model/base_model.py +0 -212
  764. ads/opctl/operator/lowcode/recommender/model/factory.py +0 -56
  765. ads/opctl/operator/lowcode/recommender/model/recommender_dataset.py +0 -25
  766. ads/opctl/operator/lowcode/recommender/model/svd.py +0 -106
  767. ads/opctl/operator/lowcode/recommender/operator_config.py +0 -81
  768. ads/opctl/operator/lowcode/recommender/schema.yaml +0 -265
  769. ads/opctl/operator/lowcode/recommender/utils.py +0 -13
  770. ads/opctl/operator/runtime/__init__.py +0 -5
  771. ads/opctl/operator/runtime/const.py +0 -17
  772. ads/opctl/operator/runtime/container_runtime_schema.yaml +0 -50
  773. ads/opctl/operator/runtime/marketplace_runtime.py +0 -50
  774. ads/opctl/operator/runtime/python_marketplace_runtime_schema.yaml +0 -21
  775. ads/opctl/operator/runtime/python_runtime_schema.yaml +0 -21
  776. ads/opctl/operator/runtime/runtime.py +0 -115
  777. ads/opctl/schema.yaml.yml +0 -36
  778. ads/opctl/script.py +0 -40
  779. ads/opctl/spark/__init__.py +0 -5
  780. ads/opctl/spark/cli.py +0 -43
  781. ads/opctl/spark/cmds.py +0 -147
  782. ads/opctl/templates/diagnostic_report_template.jinja2 +0 -102
  783. ads/opctl/utils.py +0 -344
  784. ads/oracledb/__init__.py +0 -5
  785. ads/oracledb/oracle_db.py +0 -346
  786. ads/pipeline/__init__.py +0 -39
  787. ads/pipeline/ads_pipeline.py +0 -2279
  788. ads/pipeline/ads_pipeline_run.py +0 -772
  789. ads/pipeline/ads_pipeline_step.py +0 -605
  790. ads/pipeline/builders/__init__.py +0 -5
  791. ads/pipeline/builders/infrastructure/__init__.py +0 -5
  792. ads/pipeline/builders/infrastructure/custom_script.py +0 -32
  793. ads/pipeline/cli.py +0 -119
  794. ads/pipeline/extension.py +0 -291
  795. ads/pipeline/schema/__init__.py +0 -5
  796. ads/pipeline/schema/cs_step_schema.json +0 -35
  797. ads/pipeline/schema/ml_step_schema.json +0 -31
  798. ads/pipeline/schema/pipeline_schema.json +0 -71
  799. ads/pipeline/visualizer/__init__.py +0 -5
  800. ads/pipeline/visualizer/base.py +0 -570
  801. ads/pipeline/visualizer/graph_renderer.py +0 -272
  802. ads/pipeline/visualizer/text_renderer.py +0 -84
  803. ads/secrets/__init__.py +0 -11
  804. ads/secrets/adb.py +0 -386
  805. ads/secrets/auth_token.py +0 -86
  806. ads/secrets/big_data_service.py +0 -365
  807. ads/secrets/mysqldb.py +0 -149
  808. ads/secrets/oracledb.py +0 -160
  809. ads/secrets/secrets.py +0 -407
  810. ads/telemetry/__init__.py +0 -7
  811. ads/telemetry/base.py +0 -69
  812. ads/telemetry/client.py +0 -125
  813. ads/telemetry/telemetry.py +0 -257
  814. ads/templates/dataflow_pyspark.jinja2 +0 -13
  815. ads/templates/dataflow_sparksql.jinja2 +0 -22
  816. ads/templates/func.jinja2 +0 -20
  817. ads/templates/schemas/openapi.json +0 -1740
  818. ads/templates/score-pkl.jinja2 +0 -173
  819. ads/templates/score.jinja2 +0 -322
  820. ads/templates/score_embedding_onnx.jinja2 +0 -202
  821. ads/templates/score_generic.jinja2 +0 -165
  822. ads/templates/score_huggingface_pipeline.jinja2 +0 -217
  823. ads/templates/score_lightgbm.jinja2 +0 -185
  824. ads/templates/score_onnx.jinja2 +0 -407
  825. ads/templates/score_onnx_new.jinja2 +0 -473
  826. ads/templates/score_oracle_automl.jinja2 +0 -185
  827. ads/templates/score_pyspark.jinja2 +0 -154
  828. ads/templates/score_pytorch.jinja2 +0 -219
  829. ads/templates/score_scikit-learn.jinja2 +0 -184
  830. ads/templates/score_tensorflow.jinja2 +0 -184
  831. ads/templates/score_xgboost.jinja2 +0 -178
  832. ads/text_dataset/__init__.py +0 -5
  833. ads/text_dataset/backends.py +0 -211
  834. ads/text_dataset/dataset.py +0 -445
  835. ads/text_dataset/extractor.py +0 -207
  836. ads/text_dataset/options.py +0 -53
  837. ads/text_dataset/udfs.py +0 -22
  838. ads/text_dataset/utils.py +0 -49
  839. ads/type_discovery/__init__.py +0 -9
  840. ads/type_discovery/abstract_detector.py +0 -21
  841. ads/type_discovery/constant_detector.py +0 -41
  842. ads/type_discovery/continuous_detector.py +0 -54
  843. ads/type_discovery/credit_card_detector.py +0 -99
  844. ads/type_discovery/datetime_detector.py +0 -92
  845. ads/type_discovery/discrete_detector.py +0 -118
  846. ads/type_discovery/document_detector.py +0 -146
  847. ads/type_discovery/ip_detector.py +0 -68
  848. ads/type_discovery/latlon_detector.py +0 -90
  849. ads/type_discovery/phone_number_detector.py +0 -63
  850. ads/type_discovery/type_discovery_driver.py +0 -87
  851. ads/type_discovery/typed_feature.py +0 -594
  852. ads/type_discovery/unknown_detector.py +0 -41
  853. ads/type_discovery/zipcode_detector.py +0 -48
  854. ads/vault/__init__.py +0 -7
  855. ads/vault/vault.py +0 -237
  856. oracle_ads-2.13.8.dist-info/RECORD +0 -858
  857. {oracle_ads-2.13.8.dist-info → oracle_ads-2.13.9rc0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,445 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8; -*-
3
-
4
- # Copyright (c) 2021, 2022 Oracle and/or its affiliates.
5
- # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
-
7
- import itertools
8
- from typing import Any, Callable, Dict, Generator, List, Union
9
-
10
- import ads
11
- import ads.text_dataset.extractor as te
12
- import fsspec
13
- import pandas as pd
14
- from ads.text_dataset import backends
15
- from ads.text_dataset.options import OptionFactory, Options
16
- from ads.text_dataset.udfs import UDF
17
- from ads.text_dataset.utils import NotSupportedError
18
-
19
-
20
- class DataLoader:
21
- """
22
- DataLoader binds engine, FileProcessor and File handler(in this case it is fsspec)
23
- together to produce a dataframe of parsed text from files.
24
-
25
- This class is expected to be used mainly from TextDatasetFactory class.
26
-
27
- Attributes
28
- ----------
29
- processor: `ads.text_dataset.extractor.FileProcessor`
30
- processor that is used for loading data.
31
-
32
- Examples
33
- --------
34
- >>> import oci
35
- >>> from ads.text_dataset.dataset import TextDatasetFactory as textfactory
36
- >>> from ads.text_dataset.options import Options
37
- >>> df = textfactory.format('pdf').engine('pandas').read_line(
38
- ... 'oci://<bucket-name>@<namespace>/<path>/*.pdf',
39
- ... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
40
- ... )
41
- >>> data_gen = textfactory.format('pdf').option(Options.FILE_NAME).backend('pdfplumber').read_text(
42
- ... 'oci://<bucket-name>@<namespace>/<path>/*.pdf',
43
- ... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
44
- ... )
45
- >>> textfactory.format('docx').convert_to_text(
46
- ... 'oci://<bucket-name>@<namespace>/<path>/*.docx',
47
- ... './extracted',
48
- ... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
49
- ... )
50
- >>> textfactory.format('docx').convert_to_text(
51
- ... 'oci://<bucket-name>@<namespace>/<path>/*.docx',
52
- ... 'oci://<bucket-name>@<namespace>/<out_path>',
53
- ... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
54
- ... )
55
- >>> meta_gen = textfactory.format('docx').metadata_schema(
56
- ... 'oci://<bucket-name>@<namespace>/papers/*.pdf',
57
- ... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
58
- ... )
59
- >>> df = textfactory.format('pdf').engine('pandas').option(Options.FILE_METADATA, {'extract': ['Author']}).read_text(
60
- ... 'oci://<bucket-name>@<namespace>/<path>/*.pdf',
61
- ... storage_options={"config": oci.config.from_file(os.path.join("~/.oci", "config"))},
62
- ... total_files=10,
63
- ... )
64
- >>> df = textfactory.format('txt').engine('cudf').read_line(
65
- ... 'oci://<bucket-name>@<namespace>/<path>/*.log',
66
- ... udf=r'^\[(\S+)\s(\S+)\s(\d+)\s(\d+\:\d+\:\d+)\s(\d+)]\s(\S+)\s(\S+)\s(\S+)\s(\S+)',
67
- ... df_args={"columns":["day", "month", "date", "time", "year", "type", "method", "status", "file"]},
68
- ... n_lines_per_file=10,
69
- ... )
70
- """
71
-
72
- def __init__(self, engine: str = None) -> None:
73
- """Initialize a DataLoader object.
74
-
75
- Parameters
76
- ----------
77
- engine : str, optional
78
- dataframe engine, by default None.
79
-
80
- Returns
81
- -------
82
- None
83
- """
84
- self.engine(engine)
85
- self.filemanager = fsspec
86
- self.processor = te.FileProcessorFactory.get_processor("txt")
87
- self.options = []
88
- self._data = None
89
-
90
- def with_processor(self, processor_type: str) -> None:
91
- """Set file processor.
92
-
93
- Parameters
94
- ----------
95
- processor_type : str
96
- type of processor, which corresponds to format of the file.
97
-
98
- Returns
99
- -------
100
- None
101
- """
102
- self.processor = te.FileProcessorFactory.get_processor(processor_type)()
103
- return self
104
-
105
- def engine(self, eng: str) -> None:
106
- """Set engine for dataloader. Can be pandas or cudf.
107
-
108
- Parameters
109
- ----------
110
- eng : str
111
- name of engine
112
-
113
- Returns
114
- -------
115
- None
116
-
117
- Raises
118
- ------
119
- NotSupportedError
120
- raises error if engine passed in is not supported.
121
- """
122
- if eng is None:
123
- self._engine = None
124
- self._format_output = lambda *args, **kwargs: args[0]
125
- return self
126
- if eng not in ["pandas", "cudf"]:
127
- raise NotSupportedError("Only pandas and cudf currently.")
128
- else:
129
- if eng == "pandas":
130
- import pandas
131
-
132
- self._engine = pandas
133
- self._format_output = pandas.DataFrame
134
- else:
135
- import cudf
136
-
137
- self._engine = cudf
138
- self._format_output = lambda data, **kwargs: cudf.DataFrame(
139
- [row for row in data], **kwargs
140
- ) # cuDF cannot be initialized with a generator
141
- return self
142
-
143
- def backend(self, backend: Union[str, backends.Base]) -> None:
144
- """Set backend used for extracting text from files.
145
-
146
- Parameters
147
- ----------
148
- backend : (str | `ads.text_dataset.backends.Base`)
149
- backend for extracting text from raw files.
150
-
151
- Returns
152
- -------
153
- None
154
- """
155
- self.processor.backend(backend)
156
- return self
157
-
158
- def option(self, opt: Options, spec: Any = None) -> None:
159
- """Set extraction options.
160
-
161
- Parameters
162
- ----------
163
- opt : `ads.text_dataset.options.Options`
164
- an option defined in `ads.text_dataset.options.Options`
165
- spec : Any, optional
166
- specifications that will be passed to option handler, by default None
167
-
168
- Returns
169
- -------
170
- None
171
- """
172
- self.options.append((OptionFactory.option_handler(opt), spec))
173
- return self
174
-
175
- def __load_data__(
176
- self,
177
- reader: Callable,
178
- path: str,
179
- udf: Union[str, Callable] = None,
180
- storage_options: Dict = None,
181
- encoding: str = "utf-8",
182
- n_rows_per_file: int = None,
183
- total_rows: int = None,
184
- ) -> Generator[Union[str, List[str]], None, None]:
185
- storage_options = storage_options if storage_options is not None else {}
186
- fhs = self.filemanager.open_files(
187
- path, mode="rb", encoding=encoding, **storage_options
188
- )
189
- if udf is not None:
190
- if isinstance(udf, str):
191
- fn = UDF.from_regex(udf)
192
- else:
193
- fn = udf
194
- else:
195
- fn = lambda x: x
196
-
197
- total_line_count = [0]
198
-
199
- # function to apply to each element
200
- def func(fh, reader):
201
- out = [option(self).handle(fh, spec) for option, spec in self.options]
202
- line_count = 0
203
- for text in reader(fh):
204
- if total_rows is None or total_line_count[0] < total_rows:
205
- if n_rows_per_file is None or line_count < n_rows_per_file:
206
- content = fn(text)
207
- if content is not None:
208
- yield out + list(content) if (
209
- isinstance(content, list) or isinstance(content, tuple)
210
- ) else out + [content]
211
- line_count += 1
212
- total_line_count[0] += 1
213
-
214
- return itertools.chain.from_iterable((func(fh, reader) for fh in fhs))
215
-
216
- def read_line(
217
- self,
218
- path: str,
219
- udf: Union[str, Callable] = None,
220
- n_lines_per_file: int = None,
221
- total_lines: int = None,
222
- df_args: Dict = None,
223
- storage_options: Dict = None,
224
- encoding: str = "utf-8",
225
- ) -> Union[Generator[Union[str, List[str]], None, None], "DataFrame"]:
226
- """Read each file into lines. If path matches multiple files, will combine lines from all files.
227
-
228
- Parameters
229
- ----------
230
- path : str
231
- path to data files. can have glob pattern.
232
- udf : (callable | str), optional
233
- user defined function for processing each line, can be a callable or regex, by default None
234
- n_lines_per_file : int, optional
235
- max number of lines read from each file, by default None
236
- total_lines : int, optional
237
- max number of lines read from all files, by default None
238
- df_args : dict, optional
239
- arguments passed to dataframe engine (e.g. pandas), by default None
240
- storage_options : dict, optional
241
- storage options for cloud storage, by default None
242
- encoding : str, optional
243
- encoding of files, by default 'utf-8'
244
-
245
- Returns
246
- -------
247
- (Generator | DataFrame)
248
- returns either a data generator or a dataframe.
249
- """
250
- df_args = df_args if df_args is not None else {}
251
- self._data = self.__load_data__(
252
- self.processor.read_line,
253
- path,
254
- udf,
255
- storage_options,
256
- encoding,
257
- n_lines_per_file,
258
- total_lines,
259
- )
260
- return self._format_output(self._data, **df_args)
261
-
262
- def read_text(
263
- self,
264
- path: str,
265
- udf: Union[str, Callable] = None,
266
- total_files: int = None,
267
- storage_options: Dict = None,
268
- df_args: Dict = None,
269
- encoding: str = "utf-8",
270
- ) -> Union[Generator[Union[str, List[str]], None, None], "DataFrame"]:
271
- """Read each file into a text string. If path matches multiple files, each file corresponds to one record.
272
-
273
- Parameters
274
- ----------
275
- path : str
276
- path to data files. can have glob pattern.
277
- udf : (callable | str), optional
278
- user defined function for processing each line, can be a callable or regex, by default None
279
- total_files : int, optional
280
- max number of files to read, by default None
281
- df_args : dict, optional
282
- arguments passed to dataframe engine (e.g. pandas), by default None
283
- storage_options : dict, optional
284
- storage options for cloud storage, by default None
285
- encoding : str, optional
286
- encoding of files, by default 'utf-8'
287
-
288
- Returns
289
- -------
290
- (Generator | DataFrame)
291
- returns either a data generator or a dataframe.
292
- """
293
- df_args = df_args if df_args is not None else {}
294
- self._data = self.__load_data__(
295
- self.processor.read_text,
296
- path,
297
- udf,
298
- storage_options,
299
- encoding,
300
- 1,
301
- total_files,
302
- )
303
- return self._format_output(self._data, **df_args)
304
-
305
- def convert_to_text(
306
- self,
307
- src_path: str,
308
- dst_path: str,
309
- encoding: str = "utf-8",
310
- storage_options: Dict = None,
311
- ) -> None:
312
- """Convert files to plain text files.
313
-
314
- Parameters
315
- ----------
316
- src_path : str
317
- path to source data file(s). can use glob pattern
318
- dst_path: str
319
- local folder or cloud storage (e.g., OCI object storage) prefix to save converted text files
320
- encoding: str, optional
321
- encoding for files, by default utf-8
322
- storage_options : Dict, optional
323
- storage options for cloud storage, by default None
324
-
325
- Returns
326
- -------
327
- None
328
- """
329
- storage_options = storage_options if storage_options is not None else {}
330
- fhs = self.filemanager.open_files(
331
- src_path, mode="rb", encoding=encoding, **storage_options
332
- )
333
- for fh in fhs:
334
- self.processor.convert_to_text(
335
- fh,
336
- dst_path,
337
- storage_options=storage_options,
338
- )
339
-
340
- def metadata_all(
341
- self, path: str, storage_options: Dict = None, encoding: str = "utf-8"
342
- ) -> Generator[Dict[str, Any], None, None]:
343
- """Get metadata of all files that matches the given path. Return a generator.
344
-
345
- Parameters
346
- ----------
347
- path : str
348
- path to data files. can use glob pattern.
349
- storage_options : Dict, optional
350
- storage options for cloud storage, by default None
351
- encoding : str, optional
352
- encoding of files, by default 'utf-8'
353
-
354
- Returns
355
- -------
356
- Generator
357
- generator of extracted metedata from files.
358
- """
359
- storage_options = storage_options if storage_options is not None else {}
360
- fhs = self.filemanager.open_files(
361
- path, mode="rb", encoding=encoding, **storage_options
362
- )
363
- return (self.processor.get_metadata(fh) for fh in fhs)
364
-
365
- def metadata_schema(
366
- self,
367
- path: str,
368
- n_files: int = 1,
369
- storage_options: Dict = None,
370
- encoding: str = "utf-8",
371
- ) -> List[str]:
372
- """
373
- Get available fields in metadata by looking at the first `n_files` that
374
- matches the given path.
375
-
376
- Parameters
377
- ----------
378
- path: str
379
- path to data files. can have glob pattern
380
- n_files: int, optional
381
- number of files to look up, default to be 1
382
- storage_options: dict, optional
383
- storage options for cloud storage, by default None
384
- encoding: str, optional
385
- encoding of files, by default utf-8
386
-
387
- Returns
388
- -------
389
- List[str]
390
- list of available fields in metadata
391
- """
392
-
393
- metadata = self.metadata_all(
394
- path, storage_options=storage_options, encoding=encoding
395
- )
396
- fields = set()
397
- for _ in range(n_files):
398
- try:
399
- fields.update(list(next(metadata).keys()))
400
- except StopIteration:
401
- break
402
- return list(fields)
403
-
404
- # ----- not currently used, but in case we want to consider chaining in the future -----
405
- def _transform(self, udf, udf_type="fn"): # pragma: no cover
406
- if udf_type == "fn":
407
- func = UDF.from_lambda(udf)
408
- elif udf_type == "regex":
409
- func = UDF.from_regex(udf)
410
- else:
411
- raise NotImplementedError("Other types of UDF not yet supported.")
412
-
413
- # convert df into iterator
414
- if isinstance(self._data, pd.DataFrame) or isinstance(self._data, pd.Series):
415
- self._data = (
416
- row.values if len(row.values) > 1 else row.values[0]
417
- for i, row in self._data.iterrows()
418
- )
419
-
420
- self._data = (func(row) for row in self._data)
421
- self._data = (row for row in self._data if row is not None)
422
- return self
423
-
424
-
425
- class TextDatasetFactory:
426
- """A class that generates a dataloader given a file format."""
427
-
428
- @staticmethod
429
- def format(format_name: str) -> DataLoader:
430
- """
431
- Instantiates DataLoader class and seeds it with the right kind of FileProcessor.
432
- Eg. PDFProcessor for pdf. The FileProcessorFactory returns the processor based
433
- on the format Type.
434
-
435
- Parameters
436
- ----------
437
- format_name : str
438
- name of format
439
-
440
- Returns
441
- -------
442
- `ads.text_dataset.dataset.DataLoader`
443
- a `DataLoader` object.
444
- """
445
- return DataLoader().with_processor(format_name)
@@ -1,207 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8; -*-
3
-
4
- # Copyright (c) 2021, 2022 Oracle and/or its affiliates.
5
- # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
-
7
- import logging
8
- import os
9
- from typing import Dict, Generator, List, Union
10
-
11
- from ads.text_dataset import backends
12
- from ads.text_dataset.backends import OITCC, Base, PDFPlumber, Tika
13
- from ads.text_dataset.utils import NotSupportedError
14
- from fsspec.core import OpenFile
15
-
16
- logger = logging.getLogger("ads.text_dataset")
17
-
18
-
19
- class FileProcessor:
20
- """
21
- Base class for all the file processor. Files are opened using fsspec library.
22
- The default implementation in the base class assumes text files.
23
-
24
- This class is expected to be used inside `ads.text_dataset.dataset.DataLoader`.
25
- """
26
-
27
- backend_map = {"default": Base, "tika": Tika}
28
-
29
- def __init__(self, backend: Union[str, backends.Base] = "default") -> None:
30
- self.backend(backend)
31
-
32
- def backend(self, backend: Union[str, backends.Base]) -> None:
33
- """Set backend for file processor.
34
-
35
- Parameters
36
- ----------
37
- backend : `ads.text_dataset.backends.Base`
38
- a backend for file processor
39
-
40
- Returns
41
- -------
42
- None
43
-
44
- Raises
45
- ------
46
- NotSupportedError
47
- when specified backend is not supported.
48
- """
49
- if isinstance(backend, str) and backend in self.backend_map:
50
- self._backend = self.backend_map[backend]()
51
- elif isinstance(backend, Base):
52
- self._backend = backend
53
- else:
54
- raise NotSupportedError(
55
- f"backend {backend} is not recognized or not a subclass of ads.text_dataset.backends.Base."
56
- )
57
- return self
58
-
59
- def read_line(
60
- self, fhandler: OpenFile, **format_reader_kwargs: Dict
61
- ) -> Generator[Union[str, List[str]], None, None]:
62
- """Yields lines from a file.
63
-
64
- Parameters
65
- ----------
66
- fhandler : `fsspec.core.OpenFile`
67
- file handler returned by `fsspec`
68
-
69
- Returns
70
- -------
71
- Generator
72
- a generator that yields lines from a file
73
- """
74
- return self._backend.read_line(fhandler, **format_reader_kwargs)
75
-
76
- def read_text(
77
- self, fhandler: OpenFile, **format_reader_kwargs: Dict
78
- ) -> Generator[Union[str, List[str]], None, None]:
79
- """Yield contents from the entire file.
80
-
81
- Parameters
82
- ----------
83
- fhandler : `fsspec.core.OpenFile`
84
- a file handler returned by fsspec
85
-
86
- Returns
87
- -------
88
- Generator
89
- a generator that yield text from a file
90
- """
91
- return self._backend.read_text(fhandler, **format_reader_kwargs)
92
-
93
- def convert_to_text(
94
- self,
95
- fhandler: OpenFile,
96
- dst_path: str,
97
- fname: str = None,
98
- storage_options: Dict = None,
99
- ) -> str:
100
- """Convert input file to a text file.
101
-
102
- Parameters
103
- ----------
104
- fhandler : `fsspec.core.OpenFile`
105
- a file handler returned by `fsspec`
106
- dst_path: str
107
- local folder or cloud storage (e.g. OCI object storage) prefix to save converted text files
108
- fname: str, optional
109
- filename for converted output, relative to dirname or prefix, by default None
110
- storage_options: dict, optional
111
- storage options for cloud storage, by default None
112
-
113
- Returns
114
- -------
115
- str
116
- path to saved output
117
- """
118
- return self._backend.convert_to_text(fhandler, dst_path, fname, storage_options)
119
-
120
- def get_metadata(self, fhandler: OpenFile) -> Dict:
121
- """Get metadata of a file.
122
-
123
- Parameters
124
- ----------
125
- fhandler : `fsspec.core.OpenFile`
126
- a file handler returned by fsspec
127
-
128
- Returns
129
- -------
130
- dict
131
- dictionary of metadata
132
- """
133
- return self._backend.get_metadata(fhandler)
134
-
135
-
136
- class PDFProcessor(FileProcessor):
137
- """
138
- Extracts text content from PDF
139
- """
140
-
141
- backend_map = {"tika": Tika, "pdfplumber": PDFPlumber, "default": Tika}
142
-
143
-
144
- class WordProcessor(FileProcessor):
145
- """
146
- Extracts text content from doc or docx format.
147
- """
148
-
149
- backend_map = {"default": Tika, "tika": Tika}
150
-
151
-
152
- class FileProcessorFactory:
153
- """Factory that manages all file processors.
154
- Provides functionality to get a processor corresponding to a given file type,
155
- or register custom processor for a specific file format.
156
-
157
- Examples
158
- --------
159
- >>> from ads.text_dataset.extractor import FileProcessor, FileProcessorFactory
160
- >>> FileProcessorFactory.get_processor('pdf')
161
- >>> class CustomProcessor(FileProcessor):
162
- ... # custom logic here
163
- ... pass
164
- >>> FileProcessorFactory.register('new_format', CustomProcessor)
165
- """
166
-
167
- processor_map = {
168
- "pdf": PDFProcessor,
169
- "docx": WordProcessor,
170
- "doc": WordProcessor,
171
- "txt": FileProcessor,
172
- }
173
-
174
- @classmethod
175
- def register(cls, fmt: str, processor: FileProcessor) -> None:
176
- """Register custom file processor for a file format.
177
-
178
- Parameters
179
- ----------
180
- fmt : str
181
- file format
182
- processor : `FileProcessor`
183
- custom processor
184
-
185
- Raises
186
- ------
187
- TypeError
188
- raised when processor is not a subclass of `FileProcessor`.
189
- """
190
- if issubclass(processor, FileProcessor):
191
- cls.processor_map[fmt] = processor
192
- else:
193
- raise TypeError(f"Processor must inherit from FileProcessor class.")
194
-
195
- @staticmethod
196
- def get_processor(format):
197
- if format in FileProcessorFactory.processor_map:
198
- return FileProcessorFactory.processor_map[format]
199
- else:
200
- logger.warning(
201
- f"""
202
- Format {format} is not supported natively.
203
- A generic FileProcessor is returned.
204
- You can define and register a custom processor.
205
- """
206
- )
207
- return FileProcessor
@@ -1,53 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8; -*-
3
-
4
- # Copyright (c) 2021, 2022 Oracle and/or its affiliates.
5
- # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
-
7
- from enum import Enum, auto
8
- from typing import Any, List, Dict
9
-
10
- from fsspec.core import OpenFile
11
-
12
-
13
- class Options(Enum):
14
- FILE_NAME = auto()
15
- FILE_METADATA = auto()
16
-
17
-
18
- class OptionHandler:
19
- def __init__(self, dataloader: "ads.text_dataset.dataset.DataLoader") -> None:
20
- self.dataloader = dataloader
21
-
22
- def handle(self, fhandler: OpenFile, spec: Any) -> Any:
23
- raise NotImplementedError()
24
-
25
-
26
- class FileOption(OptionHandler):
27
- def handle(self, fhandler: OpenFile, spec: Any) -> Any:
28
- return fhandler.path
29
-
30
-
31
- class MetadataOption(OptionHandler):
32
- def handle(self, fhandler: OpenFile, spec: Dict) -> List:
33
- metadata = self.dataloader.processor.get_metadata(fhandler)
34
- return [metadata.get(k, None) for k in spec["extract"]]
35
-
36
-
37
- class OptionFactory:
38
-
39
- option_handlers = {
40
- Options.FILE_NAME: FileOption,
41
- Options.FILE_METADATA: MetadataOption,
42
- }
43
-
44
- @staticmethod
45
- def option_handler(option: Options) -> OptionHandler:
46
- handler = OptionFactory.option_handlers.get(option, None)
47
- if handler is None:
48
- raise RuntimeError(f"Option {option} Not Recognized.")
49
- return handler
50
-
51
- @classmethod
52
- def register_option(cls, option: Options, handler) -> None:
53
- cls.option_handlers[option] = handler