scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,627 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-10-07 10:47:02 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/scitex_repo/src/scitex/scholar/core/Paper.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = (
9
+ "./src/scitex/scholar/core/Paper.py"
10
+ )
11
+ __DIR__ = os.path.dirname(__FILE__)
12
+ # ----------------------------------------
13
+
14
+ """
15
+ Type-safe metadata structures for Scholar papers with runtime validation.
16
+
17
+ This module uses Pydantic for:
18
+ - Runtime type validation
19
+ - Automatic type coercion
20
+ - JSON key aliasing (e.g., "2025" -> y2025)
21
+ - Clean serialization/deserialization
22
+ """
23
+
24
+ from typing import Any, Dict, List, Optional
25
+
26
+ from pydantic import BaseModel, Field, field_validator, model_validator
27
+
28
+
29
+ class IDMetadata(BaseModel):
30
+ """Identification metadata with source tracking."""
31
+
32
+ doi: Optional[str] = None
33
+ doi_engines: List[str] = Field(default_factory=list)
34
+
35
+ arxiv_id: Optional[str] = None
36
+ arxiv_id_engines: List[str] = Field(default_factory=list)
37
+
38
+ pmid: Optional[str] = None
39
+ pmid_engines: List[str] = Field(default_factory=list)
40
+
41
+ corpus_id: Optional[str] = None
42
+ corpus_id_engines: List[str] = Field(default_factory=list)
43
+
44
+ semantic_id: Optional[str] = None
45
+ semantic_id_engines: List[str] = Field(default_factory=list)
46
+
47
+ ieee_id: Optional[str] = None
48
+ ieee_id_engines: List[str] = Field(default_factory=list)
49
+
50
+ scholar_id: Optional[str] = None
51
+ scholar_id_engines: List[str] = Field(default_factory=list)
52
+
53
+ class Config:
54
+ populate_by_name = True
55
+ validate_assignment = True # Validate on attribute assignment too
56
+
57
+
58
+ class BasicMetadata(BaseModel):
59
+ """Basic bibliographic metadata with source tracking."""
60
+
61
+ title: Optional[str] = None
62
+ title_engines: List[str] = Field(default_factory=list)
63
+
64
+ authors: Optional[List[str]] = None
65
+ authors_engines: List[str] = Field(default_factory=list)
66
+
67
+ year: Optional[int] = None
68
+ year_engines: List[str] = Field(default_factory=list)
69
+
70
+ abstract: Optional[str] = None
71
+ abstract_engines: List[str] = Field(default_factory=list)
72
+
73
+ keywords: Optional[List[str]] = None
74
+ keywords_engines: List[str] = Field(default_factory=list)
75
+
76
+ type: Optional[str] = None # article, conference, preprint, etc.
77
+ type_engines: List[str] = Field(default_factory=list)
78
+
79
+ @field_validator("year")
80
+ @classmethod
81
+ def validate_year(cls, v):
82
+ """Validate year is reasonable."""
83
+ if v is not None and (v < 1900 or v > 2100):
84
+ raise ValueError(
85
+ f"Year {v} is outside reasonable range (1900-2100)"
86
+ )
87
+ return v
88
+
89
+ class Config:
90
+ populate_by_name = True
91
+ validate_assignment = True # Validate on attribute assignment too
92
+
93
+
94
+ class CitationCountMetadata(BaseModel):
95
+ """Citation count metadata with yearly breakdown and source tracking."""
96
+
97
+ total: Optional[int] = None
98
+ total_engines: List[str] = Field(default_factory=list)
99
+
100
+ # Yearly counts - use Field(alias=...) to map JSON "2025" to Python y2025
101
+ y2025: Optional[int] = Field(None, alias="2025")
102
+ y2025_engines: List[str] = Field(
103
+ default_factory=list, alias="2025_engines"
104
+ )
105
+
106
+ y2024: Optional[int] = Field(None, alias="2024")
107
+ y2024_engines: List[str] = Field(
108
+ default_factory=list, alias="2024_engines"
109
+ )
110
+
111
+ y2023: Optional[int] = Field(None, alias="2023")
112
+ y2023_engines: List[str] = Field(
113
+ default_factory=list, alias="2023_engines"
114
+ )
115
+
116
+ y2022: Optional[int] = Field(None, alias="2022")
117
+ y2022_engines: List[str] = Field(
118
+ default_factory=list, alias="2022_engines"
119
+ )
120
+
121
+ y2021: Optional[int] = Field(None, alias="2021")
122
+ y2021_engines: List[str] = Field(
123
+ default_factory=list, alias="2021_engines"
124
+ )
125
+
126
+ y2020: Optional[int] = Field(None, alias="2020")
127
+ y2020_engines: List[str] = Field(
128
+ default_factory=list, alias="2020_engines"
129
+ )
130
+
131
+ y2019: Optional[int] = Field(None, alias="2019")
132
+ y2019_engines: List[str] = Field(
133
+ default_factory=list, alias="2019_engines"
134
+ )
135
+
136
+ y2018: Optional[int] = Field(None, alias="2018")
137
+ y2018_engines: List[str] = Field(
138
+ default_factory=list, alias="2018_engines"
139
+ )
140
+
141
+ y2017: Optional[int] = Field(None, alias="2017")
142
+ y2017_engines: List[str] = Field(
143
+ default_factory=list, alias="2017_engines"
144
+ )
145
+
146
+ y2016: Optional[int] = Field(None, alias="2016")
147
+ y2016_engines: List[str] = Field(
148
+ default_factory=list, alias="2016_engines"
149
+ )
150
+
151
+ y2015: Optional[int] = Field(None, alias="2015")
152
+ y2015_engines: List[str] = Field(
153
+ default_factory=list, alias="2015_engines"
154
+ )
155
+
156
+ @field_validator(
157
+ "total",
158
+ "y2025",
159
+ "y2024",
160
+ "y2023",
161
+ "y2022",
162
+ "y2021",
163
+ "y2020",
164
+ "y2019",
165
+ "y2018",
166
+ "y2017",
167
+ "y2016",
168
+ "y2015",
169
+ )
170
+ @classmethod
171
+ def validate_citation_counts(cls, v):
172
+ """Validate citation counts are non-negative."""
173
+ if v is not None and v < 0:
174
+ raise ValueError(f"Citation count cannot be negative: {v}")
175
+ return v
176
+
177
+ class Config:
178
+ populate_by_name = True
179
+ validate_assignment = True # Validate on attribute assignment too # Allow both "2025" and "y2025"
180
+
181
+ def model_dump(self, **kwargs) -> Dict[str, Any]:
182
+ """Custom serialization to use aliases in output."""
183
+ # Remove by_alias from kwargs if present to avoid duplicate
184
+ kwargs.pop("by_alias", None)
185
+ data = super().model_dump(by_alias=True, **kwargs)
186
+ return data
187
+
188
+
189
+ class PublicationMetadata(BaseModel):
190
+ """Publication venue metadata with source tracking."""
191
+
192
+ journal: Optional[str] = None
193
+ journal_engines: List[str] = Field(default_factory=list)
194
+
195
+ short_journal: Optional[str] = None
196
+ short_journal_engines: List[str] = Field(default_factory=list)
197
+
198
+ impact_factor: Optional[float] = None
199
+ impact_factor_engines: List[str] = Field(default_factory=list)
200
+
201
+ issn: Optional[str] = None
202
+ issn_engines: List[str] = Field(default_factory=list)
203
+
204
+ volume: Optional[str] = None
205
+ volume_engines: List[str] = Field(default_factory=list)
206
+
207
+ issue: Optional[str] = None
208
+ issue_engines: List[str] = Field(default_factory=list)
209
+
210
+ first_page: Optional[str] = None
211
+ first_page_engines: List[str] = Field(default_factory=list)
212
+
213
+ last_page: Optional[str] = None
214
+ last_page_engines: List[str] = Field(default_factory=list)
215
+
216
+ pages: Optional[str] = None
217
+ pages_engines: List[str] = Field(default_factory=list)
218
+
219
+ publisher: Optional[str] = None
220
+ publisher_engines: List[str] = Field(default_factory=list)
221
+
222
+ @field_validator("impact_factor")
223
+ @classmethod
224
+ def validate_impact_factor(cls, v):
225
+ """Validate impact factor is non-negative."""
226
+ if v is not None and v < 0:
227
+ raise ValueError(f"Impact factor cannot be negative: {v}")
228
+ return v
229
+
230
+ class Config:
231
+ populate_by_name = True
232
+ validate_assignment = True # Validate on attribute assignment too
233
+
234
+
235
+ class URLMetadata(BaseModel):
236
+ """URL metadata with source tracking."""
237
+
238
+ doi: Optional[str] = None
239
+ doi_engines: List[str] = Field(default_factory=list)
240
+
241
+ publisher: Optional[str] = None
242
+ publisher_engines: List[str] = Field(default_factory=list)
243
+
244
+ arxiv: Optional[str] = None
245
+ arxiv_engines: List[str] = Field(default_factory=list)
246
+
247
+ corpus_id: Optional[str] = None
248
+ corpus_id_engines: List[str] = Field(default_factory=list)
249
+
250
+ openurl_query: Optional[str] = None
251
+ openurl_engines: List[str] = Field(default_factory=list)
252
+
253
+ openurl_resolved: List[str] = Field(default_factory=list)
254
+ openurl_resolved_engines: List[str] = Field(default_factory=list)
255
+
256
+ pdfs: List[Dict[str, str]] = Field(default_factory=list)
257
+ pdfs_engines: List[str] = Field(default_factory=list)
258
+
259
+ supplementary_files: List[str] = Field(default_factory=list)
260
+ supplementary_files_engines: List[str] = Field(default_factory=list)
261
+
262
+ additional_files: List[str] = Field(default_factory=list)
263
+ additional_files_engines: List[str] = Field(default_factory=list)
264
+
265
+ class Config:
266
+ populate_by_name = True
267
+ validate_assignment = True # Validate on attribute assignment too
268
+
269
+
270
+ class PathMetadata(BaseModel):
271
+ """Local file path metadata with source tracking."""
272
+
273
+ pdfs: List[str] = Field(default_factory=list)
274
+ pdfs_engines: List[str] = Field(default_factory=list)
275
+
276
+ supplementary_files: List[str] = Field(default_factory=list)
277
+ supplementary_files_engines: List[str] = Field(default_factory=list)
278
+
279
+ additional_files: List[str] = Field(default_factory=list)
280
+ additional_files_engines: List[str] = Field(default_factory=list)
281
+
282
+ class Config:
283
+ populate_by_name = True
284
+ validate_assignment = True # Validate on attribute assignment too
285
+
286
+
287
+ class SystemMetadata(BaseModel):
288
+ """System tracking metadata (which engines were used to search)."""
289
+
290
+ searched_by_arXiv: Optional[bool] = None
291
+ searched_by_CrossRef: Optional[bool] = None
292
+ searched_by_CrossRefLocal: Optional[bool] = None
293
+ searched_by_OpenAlex: Optional[bool] = None
294
+ searched_by_PubMed: Optional[bool] = None
295
+ searched_by_Semantic_Scholar: Optional[bool] = None
296
+ searched_by_URL: Optional[bool] = None
297
+
298
+ class Config:
299
+ populate_by_name = True
300
+ validate_assignment = True # Validate on attribute assignment too
301
+
302
+
303
+ class PaperMetadataStructure(BaseModel):
304
+ """Complete paper metadata structure with nested typed sections."""
305
+
306
+ id: IDMetadata = Field(default_factory=IDMetadata)
307
+ basic: BasicMetadata = Field(default_factory=BasicMetadata)
308
+ citation_count: CitationCountMetadata = Field(
309
+ default_factory=CitationCountMetadata
310
+ )
311
+ publication: PublicationMetadata = Field(
312
+ default_factory=PublicationMetadata
313
+ )
314
+ url: URLMetadata = Field(default_factory=URLMetadata)
315
+ path: PathMetadata = Field(default_factory=PathMetadata)
316
+ system: SystemMetadata = Field(default_factory=SystemMetadata)
317
+
318
+ class Config:
319
+ populate_by_name = True
320
+ validate_assignment = True # Validate on attribute assignment too
321
+
322
+ @model_validator(mode="after")
323
+ def sync_ids_and_urls(self):
324
+ """Automatically sync ID and URL fields with source tracking.
325
+
326
+ Generates URLs from IDs and vice versa for:
327
+ - DOI ↔ url.doi
328
+ - arXiv ID ↔ url.arxiv
329
+ - Corpus ID ↔ url.corpus_id
330
+ """
331
+ # DOI sync
332
+ if self.id.doi and not self.url.doi:
333
+ self.url.doi = f"https://doi.org/{self.id.doi}"
334
+ if self.id.doi_engines and "PaperMetadataStructure" not in self.url.doi_engines:
335
+ self.url.doi_engines = self.id.doi_engines.copy() if self.id.doi_engines else []
336
+ if "PaperMetadataStructure" not in self.url.doi_engines:
337
+ self.url.doi_engines.append("PaperMetadataStructure")
338
+ elif self.url.doi and not self.id.doi:
339
+ url = self.url.doi
340
+ if "doi.org/" in url:
341
+ self.id.doi = url.split("doi.org/")[-1]
342
+ if self.url.doi_engines and "PaperMetadataStructure" not in self.id.doi_engines:
343
+ self.id.doi_engines = self.url.doi_engines.copy() if self.url.doi_engines else []
344
+ if "PaperMetadataStructure" not in self.id.doi_engines:
345
+ self.id.doi_engines.append("PaperMetadataStructure")
346
+ elif self.id.doi and self.url.doi:
347
+ if not self.url.doi.startswith("https://"):
348
+ if self.url.doi.startswith("http://"):
349
+ self.url.doi = "https://" + self.url.doi[7:]
350
+ else:
351
+ self.url.doi = f"https://doi.org/{self.id.doi}"
352
+
353
+ # arXiv sync
354
+ if self.id.arxiv_id and not self.url.arxiv:
355
+ self.url.arxiv = f"https://arxiv.org/abs/{self.id.arxiv_id}"
356
+ if self.id.arxiv_id_engines and "PaperMetadataStructure" not in self.url.arxiv_engines:
357
+ self.url.arxiv_engines = self.id.arxiv_id_engines.copy() if self.id.arxiv_id_engines else []
358
+ if "PaperMetadataStructure" not in self.url.arxiv_engines:
359
+ self.url.arxiv_engines.append("PaperMetadataStructure")
360
+ elif self.url.arxiv and not self.id.arxiv_id:
361
+ url = self.url.arxiv
362
+ if "arxiv.org/abs/" in url:
363
+ self.id.arxiv_id = url.split("arxiv.org/abs/")[-1].split("?")[0].split("#")[0]
364
+ if self.url.arxiv_engines and "PaperMetadataStructure" not in self.id.arxiv_id_engines:
365
+ self.id.arxiv_id_engines = self.url.arxiv_engines.copy() if self.url.arxiv_engines else []
366
+ if "PaperMetadataStructure" not in self.id.arxiv_id_engines:
367
+ self.id.arxiv_id_engines.append("PaperMetadataStructure")
368
+
369
+ # Corpus ID sync
370
+ if self.id.corpus_id and not self.url.corpus_id:
371
+ corpus_id_clean = str(self.id.corpus_id).replace("CorpusId:", "")
372
+ self.url.corpus_id = f"https://www.semanticscholar.org/paper/{corpus_id_clean}"
373
+ if self.id.corpus_id_engines and "PaperMetadataStructure" not in self.url.corpus_id_engines:
374
+ self.url.corpus_id_engines = self.id.corpus_id_engines.copy() if self.id.corpus_id_engines else []
375
+ if "PaperMetadataStructure" not in self.url.corpus_id_engines:
376
+ self.url.corpus_id_engines.append("PaperMetadataStructure")
377
+ elif self.url.corpus_id and not self.id.corpus_id:
378
+ url = self.url.corpus_id
379
+ if "semanticscholar.org/paper/" in url:
380
+ self.id.corpus_id = url.split("semanticscholar.org/paper/")[-1].split("?")[0].split("#")[0]
381
+ if self.url.corpus_id_engines and "PaperMetadataStructure" not in self.id.corpus_id_engines:
382
+ self.id.corpus_id_engines = self.url.corpus_id_engines.copy() if self.url.corpus_id_engines else []
383
+ if "PaperMetadataStructure" not in self.id.corpus_id_engines:
384
+ self.id.corpus_id_engines.append("PaperMetadataStructure")
385
+
386
+ return self
387
+
388
+ def set_doi(self, doi: str):
389
+ """Set DOI and automatically sync URL.
390
+
391
+ Use this method instead of direct assignment for automatic sync.
392
+ """
393
+ self.id.doi = doi
394
+ if doi:
395
+ self.url.doi = f"https://doi.org/{doi}"
396
+
397
+ def set_doi_url(self, url: str):
398
+ """Set DOI URL and automatically extract/sync DOI.
399
+
400
+ Use this method instead of direct assignment for automatic sync.
401
+ """
402
+ self.url.doi = url
403
+ if url and "doi.org/" in url:
404
+ self.id.doi = url.split("doi.org/")[-1]
405
+
406
+ def model_dump(self, **kwargs) -> Dict[str, Any]:
407
+ """Custom serialization to ensure nested models use aliases."""
408
+ # Remove by_alias from kwargs if present to avoid duplicate
409
+ kwargs.pop("by_alias", None)
410
+ return {
411
+ "id": self.id.model_dump(by_alias=True, **kwargs),
412
+ "basic": self.basic.model_dump(by_alias=True, **kwargs),
413
+ "citation_count": self.citation_count.model_dump(
414
+ by_alias=True, **kwargs
415
+ ),
416
+ "publication": self.publication.model_dump(
417
+ by_alias=True, **kwargs
418
+ ),
419
+ "url": self.url.model_dump(by_alias=True, **kwargs),
420
+ "path": self.path.model_dump(by_alias=True, **kwargs),
421
+ "system": self.system.model_dump(by_alias=True, **kwargs),
422
+ }
423
+
424
+
425
+ class ContainerMetadata(BaseModel):
426
+ """Container metadata for system tracking."""
427
+
428
+ scitex_id: Optional[str] = None
429
+ library_id: Optional[str] = None
430
+ created_at: Optional[str] = None
431
+ created_by: Optional[str] = None
432
+ updated_at: Optional[str] = None
433
+ projects: List[str] = Field(default_factory=list)
434
+ master_storage_path: Optional[str] = None
435
+ readable_name: Optional[str] = None
436
+ metadata_file: Optional[str] = None
437
+ pdf_downloaded_at: Optional[str] = None
438
+ pdf_size_bytes: Optional[int] = None
439
+
440
+ @field_validator("pdf_size_bytes")
441
+ @classmethod
442
+ def validate_pdf_size(cls, v):
443
+ """Validate PDF size is non-negative."""
444
+ if v is not None and v < 0:
445
+ raise ValueError(f"PDF size cannot be negative: {v}")
446
+ return v
447
+
448
+ class Config:
449
+ populate_by_name = True
450
+ validate_assignment = True # Validate on attribute assignment too
451
+
452
+
453
+ class Paper(BaseModel):
454
+ """Complete paper with metadata and container."""
455
+
456
+ metadata: PaperMetadataStructure = Field(
457
+ default_factory=PaperMetadataStructure
458
+ )
459
+ container: ContainerMetadata = Field(default_factory=ContainerMetadata)
460
+
461
+ class Config:
462
+ populate_by_name = True
463
+ validate_assignment = True # Validate on attribute assignment too
464
+
465
+ def model_dump(self, **kwargs) -> Dict[str, Any]:
466
+ """Custom serialization to ensure all nested models use aliases."""
467
+ # Remove by_alias from kwargs if present to avoid duplicate
468
+ kwargs.pop("by_alias", None)
469
+ return {
470
+ "metadata": self.metadata.model_dump(by_alias=True, **kwargs),
471
+ "container": self.container.model_dump(by_alias=True, **kwargs),
472
+ }
473
+
474
+ @classmethod
475
+ def from_dict(cls, data: Dict[str, Any]) -> "Paper":
476
+ """Create from dictionary (for loading from JSON).
477
+
478
+ Uses Pydantic's model_validate which handles:
479
+ - Type validation
480
+ - Type coercion (e.g., "2024" -> 2024)
481
+ - Field aliases (e.g., "2025" -> y2025)
482
+ """
483
+ return cls.model_validate(data)
484
+
485
+ def to_dict(self) -> Dict[str, Any]:
486
+ """Convert to dictionary for JSON serialization.
487
+
488
+ Alias for model_dump() for backward compatibility.
489
+ """
490
+ return self.model_dump()
491
+
492
+
493
+ if __name__ == "__main__":
494
+ import json
495
+ from pprint import pprint
496
+
497
+ print("=" * 80)
498
+ print("Paper Class - Pydantic Type-Safe Metadata with Runtime Validation")
499
+ print("=" * 80)
500
+
501
+ # 1. Create empty paper
502
+ print("\n1. Create empty Paper:")
503
+ paper = Paper()
504
+ print(f" Empty paper created: {type(paper).__name__}")
505
+
506
+ # 2. Set basic metadata
507
+ print("\n2. Set basic metadata:")
508
+ paper.metadata.basic.title = "Attention Is All You Need"
509
+ paper.metadata.basic.authors = [
510
+ "Vaswani, Ashish",
511
+ "Shazeer, Noam",
512
+ "Parmar, Niki",
513
+ ]
514
+ paper.metadata.basic.year = 2017
515
+ paper.metadata.basic.abstract = "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks."
516
+ paper.metadata.basic.keywords = [
517
+ "transformer",
518
+ "attention",
519
+ "neural networks",
520
+ ]
521
+ print(f" Title: {paper.metadata.basic.title}")
522
+ print(f" Authors: {paper.metadata.basic.authors[:2]}...")
523
+ print(f" Year: {paper.metadata.basic.year}")
524
+
525
+ # 3. Set DOI (auto-syncs URL)
526
+ print("\n3. Set DOI (auto-syncs DOI URL):")
527
+ paper.metadata.set_doi("10.48550/arXiv.1706.03762")
528
+ print(f" DOI: {paper.metadata.id.doi}")
529
+ print(f" DOI URL (auto-synced): {paper.metadata.url.doi}")
530
+
531
+ # 4. Set publication details
532
+ print("\n4. Set publication details:")
533
+ paper.metadata.publication.journal = "NeurIPS"
534
+ paper.metadata.publication.volume = "30"
535
+ paper.metadata.publication.impact_factor = 12.345
536
+ print(f" Journal: {paper.metadata.publication.journal}")
537
+ print(f" Volume: {paper.metadata.publication.volume}")
538
+ print(f" Impact Factor: {paper.metadata.publication.impact_factor}")
539
+
540
+ # 5. Set citation counts with year breakdown
541
+ print("\n5. Set citation counts:")
542
+ paper.metadata.citation_count.total = 85432
543
+ paper.metadata.citation_count.y2024 = 15234
544
+ paper.metadata.citation_count.y2023 = 18765
545
+ print(f" Total citations: {paper.metadata.citation_count.total}")
546
+ print(f" 2024 citations: {paper.metadata.citation_count.y2024}")
547
+ print(f" 2023 citations: {paper.metadata.citation_count.y2023}")
548
+
549
+ # 6. Set container metadata
550
+ print("\n6. Set container metadata:")
551
+ paper.container.projects = ["transformers_research", "nlp_2024"]
552
+ paper.container.library_id = "ABC12345"
553
+ paper.container.readable_name = "Vaswani-2017-NeurIPS"
554
+ print(f" Projects: {paper.container.projects}")
555
+ print(f" Library ID: {paper.container.library_id}")
556
+ print(f" Readable name: {paper.container.readable_name}")
557
+
558
+ # 7. Demonstrate type validation
559
+ print("\n7. Type validation (validate_assignment=True):")
560
+ print(" ✓ Automatic type coercion: year='2017' -> 2017 (int)")
561
+ paper.metadata.basic.year = "2017" # String coerced to int
562
+ print(
563
+ f" Result: {paper.metadata.basic.year} (type: {type(paper.metadata.basic.year).__name__})"
564
+ )
565
+
566
+ print(" ✓ Range validation: year must be 1900-2100")
567
+ try:
568
+ paper.metadata.basic.year = 1800 # Too old
569
+ print(" ERROR: Should have raised ValidationError")
570
+ except Exception as e:
571
+ print(f" Correctly rejected: {type(e).__name__}")
572
+
573
+ print(" ✓ Non-negative validation: citations cannot be negative")
574
+ try:
575
+ paper.metadata.citation_count.total = -100
576
+ print(" ERROR: Should have raised ValidationError")
577
+ except Exception as e:
578
+ print(f" Correctly rejected: {type(e).__name__}")
579
+
580
+ # Reset to valid value
581
+ paper.metadata.basic.year = 2017
582
+ paper.metadata.citation_count.total = 85432
583
+
584
+ # 8. Serialize to JSON (with aliases)
585
+ print("\n8. Serialize to JSON with field aliases:")
586
+ paper_dict = paper.to_dict()
587
+ print(" Year fields use numeric keys in JSON:")
588
+ print(
589
+ f" '2024': {paper_dict['metadata']['citation_count'].get('2024')}"
590
+ )
591
+ print(
592
+ f" '2023': {paper_dict['metadata']['citation_count'].get('2023')}"
593
+ )
594
+
595
+ # 9. Create from dictionary
596
+ print("\n9. Load from dictionary (from_dict):")
597
+ sample_data = {
598
+ "metadata": {
599
+ "basic": {
600
+ "title": "BERT: Pre-training of Deep Bidirectional Transformers",
601
+ "year": 2019,
602
+ },
603
+ "id": {"doi": "10.18653/v1/N19-1423"},
604
+ "citation_count": {
605
+ "2024": 5678, # Numeric key maps to y2024
606
+ "total": 45000,
607
+ },
608
+ }
609
+ }
610
+
611
+ paper2 = Paper.from_dict(sample_data)
612
+ print(f" Title: {paper2.metadata.basic.title}")
613
+ print(f" Year: {paper2.metadata.basic.year}")
614
+ print(f" DOI: {paper2.metadata.id.doi}")
615
+ print(f" DOI URL (auto-synced): {paper2.metadata.url.doi}")
616
+ print(f" 2024 citations: {paper2.metadata.citation_count.y2024}")
617
+
618
+ # 10. Show JSON structure
619
+ print("\n10. Full JSON structure (first 500 chars):")
620
+ json_str = json.dumps(paper.to_dict(), indent=2)
621
+ print(f" {json_str[:500]}...")
622
+
623
+ print("\n" + "=" * 80)
624
+ print("✅ Paper class demonstration complete!")
625
+ print("=" * 80)
626
+
627
+ # EOF