scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1018 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Timestamp: "2025-08-22 23:01:42 (ywatanabe)"
4
+ # File: /home/ywatanabe/proj/SciTeX-Code/src/scitex/scholar/storage/_BibTeXHandler.py
5
+ # ----------------------------------------
6
+ from __future__ import annotations
7
+ import os
8
+ __FILE__ = __file__
9
+ __DIR__ = os.path.dirname(__FILE__)
10
+ # ----------------------------------------
11
+ import tempfile
12
+ from pathlib import Path
13
+ from typing import Any, Dict, List, Optional, Union
14
+
15
+ from scitex import logging
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class BibTeXHandler:
21
+ """Handles BibTeX parsing and conversion to Paper objects."""
22
+
23
+ def __init__(self, project: str = None, config=None):
24
+ self.name = self.__class__.__name__
25
+ self.project = project
26
+ self.config = config
27
+
28
+ def _extract_primitive(self, value):
29
+ """Extract primitive value from DotDict or nested structure."""
30
+ from scitex.dict import DotDict
31
+
32
+ if value is None:
33
+ return None
34
+ if isinstance(value, DotDict):
35
+ # Convert DotDict to plain dict first
36
+ value = dict(value)
37
+ if isinstance(value, dict):
38
+ # For nested dict structures, return as-is
39
+ return value
40
+ # Return primitive types as-is
41
+ return value
42
+
43
+ def papers_from_bibtex(
44
+ self, bibtex_input: Union[str, Path]
45
+ ) -> List["Paper"]:
46
+ """Create Papers from BibTeX file or content."""
47
+ is_path = False
48
+ input_str = str(bibtex_input)
49
+
50
+ if len(input_str) < 500:
51
+ if (
52
+ input_str.endswith(".bib")
53
+ or input_str.endswith(".bibtex")
54
+ or "/" in input_str
55
+ or "\\" in input_str
56
+ or input_str.startswith("~")
57
+ or input_str.startswith(".")
58
+ or os.path.exists(os.path.expanduser(input_str))
59
+ ):
60
+ is_path = True
61
+
62
+ if "\n@" in input_str or input_str.strip().startswith("@"):
63
+ is_path = False
64
+
65
+ if is_path:
66
+ return self._papers_from_bibtex_file(input_str)
67
+ else:
68
+ return self._papers_from_bibtex_text(input_str)
69
+
70
+ def _papers_from_bibtex_file(
71
+ self, file_path: Union[str, Path]
72
+ ) -> List["Paper"]:
73
+ """Create Papers from a BibTeX file."""
74
+ bibtex_path = Path(os.path.expanduser(str(file_path)))
75
+ if not bibtex_path.exists():
76
+ raise ValueError(f"BibTeX file not found: {bibtex_path}")
77
+
78
+ from scitex.io import load
79
+
80
+ entries = load(str(bibtex_path))
81
+
82
+ papers = []
83
+ for entry in entries:
84
+ paper = self.paper_from_bibtex_entry(entry)
85
+ if paper:
86
+ papers.append(paper)
87
+
88
+ logger.info(f"Created {len(papers)} papers from BibTeX file")
89
+ return papers
90
+
91
+ def _papers_from_bibtex_text(self, bibtex_content: str) -> List["Paper"]:
92
+ """Create Papers from BibTeX content string."""
93
+ with tempfile.NamedTemporaryFile(
94
+ mode="w", suffix=".bib", delete=False
95
+ ) as f:
96
+ f.write(bibtex_content)
97
+ temp_path = f.name
98
+
99
+ try:
100
+ from scitex.io import load
101
+
102
+ entries = load(temp_path)
103
+ finally:
104
+ os.unlink(temp_path)
105
+
106
+ papers = []
107
+ for entry in entries:
108
+ paper = self.paper_from_bibtex_entry(entry)
109
+ if paper:
110
+ papers.append(paper)
111
+
112
+ logger.info(f"Created {len(papers)} papers from BibTeX text")
113
+ return papers
114
+
115
+ def paper_from_bibtex_entry(
116
+ self, entry: Dict[str, Any]
117
+ ) -> Optional["Paper"]:
118
+ """Convert BibTeX entry to Paper."""
119
+ from ..core.Paper import Paper
120
+
121
+ fields = entry.get("fields", {})
122
+ title = fields.get("title", "")
123
+ if not title:
124
+ return None
125
+
126
+ author_str = fields.get("author", "")
127
+ authors = []
128
+ if author_str:
129
+ authors = [a.strip() for a in author_str.split(" and ")]
130
+
131
+ basic_data = {
132
+ "title": title,
133
+ "title_source": "input",
134
+ "authors": authors,
135
+ "authors_source": "input" if authors else None,
136
+ "abstract": fields.get("abstract", ""),
137
+ "abstract_source": "input" if fields.get("abstract") else None,
138
+ "year": int(fields.get("year")) if fields.get("year") else None,
139
+ "year_source": "input" if fields.get("year") else None,
140
+ "keywords": (
141
+ fields.get("keywords", "").split(", ")
142
+ if fields.get("keywords")
143
+ else []
144
+ ),
145
+ }
146
+
147
+ id_data = {
148
+ "doi": fields.get("doi"),
149
+ "doi_source": "input" if fields.get("doi") else None,
150
+ "pmid": fields.get("pmid"),
151
+ "pmid_source": "input" if fields.get("pmid") else None,
152
+ "arxiv_id": fields.get("eprint"),
153
+ "arxiv_id_source": "input" if fields.get("eprint") else None,
154
+ }
155
+
156
+ publication_data = {
157
+ "journal": fields.get("journal"),
158
+ "journal_source": "input" if fields.get("journal") else None,
159
+ }
160
+
161
+ # Parse citation count
162
+ citation_count_data = None
163
+ if "citation_count" in fields:
164
+ try:
165
+ # Try parsing as JSON first (for enriched BibTeX files)
166
+ import json
167
+ cc_raw = fields["citation_count"]
168
+ if isinstance(cc_raw, str) and cc_raw.strip().startswith("{"):
169
+ citation_count_data = json.loads(cc_raw)
170
+ # Add source if not present
171
+ if "total_source" not in citation_count_data:
172
+ citation_count_data["total_source"] = "input"
173
+ else:
174
+ # Simple integer format
175
+ citation_count_data = {
176
+ "total": int(cc_raw),
177
+ "total_source": "input"
178
+ }
179
+ except (ValueError, TypeError, json.JSONDecodeError):
180
+ pass
181
+
182
+ url_data = {
183
+ "pdf": fields.get("url"),
184
+ }
185
+
186
+ # Create Paper with Pydantic structure
187
+ paper = Paper()
188
+
189
+ # Set basic metadata
190
+ paper.metadata.basic.title = basic_data.get("title", "")
191
+ paper.metadata.basic.authors = basic_data.get("authors")
192
+ paper.metadata.basic.abstract = basic_data.get("abstract")
193
+ paper.metadata.basic.year = basic_data.get("year")
194
+ paper.metadata.basic.keywords = basic_data.get("keywords")
195
+
196
+ # Set ID metadata
197
+ if id_data.get("doi"):
198
+ paper.metadata.set_doi(id_data["doi"])
199
+ paper.metadata.id.pmid = id_data.get("pmid")
200
+ paper.metadata.id.arxiv_id = id_data.get("arxiv_id")
201
+
202
+ # Set publication metadata
203
+ paper.metadata.publication.journal = publication_data.get("journal")
204
+ paper.metadata.publication.volume = publication_data.get("volume")
205
+ paper.metadata.publication.issue = publication_data.get("issue")
206
+ paper.metadata.publication.publisher = publication_data.get("publisher")
207
+
208
+ # Set citation count
209
+ if citation_count_data and citation_count_data.get("total") is not None:
210
+ paper.metadata.citation_count.total = citation_count_data["total"]
211
+
212
+ # Set impact factor
213
+ if "journal_impact_factor" in fields:
214
+ impact_str = str(fields["journal_impact_factor"])
215
+ if impact_str.replace(".", "").isdigit():
216
+ paper.metadata.publication.impact_factor = float(impact_str)
217
+
218
+ # Set URL metadata
219
+ if url_data.get("pdf"):
220
+ paper.metadata.url.pdfs.append({"url": url_data["pdf"], "source": "bibtex"})
221
+
222
+ # Set container metadata
223
+ paper.container.projects = [self.project] if self.project else []
224
+
225
+ # Set BibTeX metadata as special fields
226
+ paper._original_bibtex_fields = fields.copy()
227
+ paper._bibtex_entry_type = entry.get("entry_type", "misc")
228
+ paper._bibtex_key = entry.get("key", "")
229
+
230
+ self._handle_enriched_metadata(paper, fields)
231
+
232
+ return paper
233
+
234
+ def _handle_enriched_metadata(
235
+ self, paper: "Paper", fields: Dict[str, Any]
236
+ ) -> None:
237
+ """Handle enriched metadata from BibTeX fields."""
238
+ if "citation_count" in fields:
239
+ try:
240
+ citation_str = str(fields["citation_count"]).replace(",", "")
241
+ paper.citation_count.total = int(citation_str)
242
+ paper.citation_count.total_engines = fields.get(
243
+ "citation_count_source", "bibtex"
244
+ )
245
+ except (ValueError, AttributeError):
246
+ pass
247
+
248
+ for field_name in fields:
249
+ if "impact_factor" in field_name and "JCR" in field_name:
250
+ try:
251
+ paper.publication.impact_factor = float(fields[field_name])
252
+ paper.publication.impact_factor_engines = fields.get(
253
+ "impact_factor_source", "bibtex"
254
+ )
255
+ break
256
+ except (ValueError, AttributeError):
257
+ pass
258
+
259
+ for field_name in fields:
260
+ if "quartile" in field_name and "JCR" in field_name:
261
+ try:
262
+ # Store in system or publication section
263
+ paper.publication["journal_quartile"] = fields[field_name]
264
+ break
265
+ except AttributeError:
266
+ pass
267
+
268
+ if "volume" in fields:
269
+ try:
270
+ paper.publication.volume = fields["volume"]
271
+ except AttributeError:
272
+ pass
273
+ if "pages" in fields:
274
+ try:
275
+ # Split pages into first_page and last_page
276
+ pages = fields["pages"]
277
+ if pages and "-" in str(pages):
278
+ first, last = str(pages).split("-", 1)
279
+ paper.publication.first_page = first.strip()
280
+ paper.publication.last_page = last.strip()
281
+ else:
282
+ paper.publication.first_page = pages
283
+ except AttributeError:
284
+ pass
285
+
286
+ def paper_to_bibtex_entry(self, paper: "Paper") -> Dict[str, Any]:
287
+ """Convert a Paper object to a BibTeX entry dictionary."""
288
+ # Create entry type based on available data
289
+ entry_type = getattr(paper, "_bibtex_entry_type", "misc")
290
+ if paper.metadata.publication.journal:
291
+ entry_type = "article"
292
+ elif hasattr(paper, "booktitle") and paper.booktitle:
293
+ entry_type = "inproceedings"
294
+
295
+ # Create a unique key from authors and year
296
+ authors = paper.metadata.basic.authors
297
+ first_author = authors[0].split()[-1] if authors else "Unknown"
298
+ year = paper.metadata.basic.year or "NoYear"
299
+ key = getattr(paper, "_bibtex_key", f"{first_author}-{year}")
300
+
301
+ # Build fields dictionary with all available data
302
+ fields = {}
303
+
304
+ # Basic fields
305
+ if paper.metadata.basic.title:
306
+ fields["title"] = paper.metadata.basic.title
307
+ if paper.metadata.basic.authors:
308
+ fields["author"] = " and ".join(paper.metadata.basic.authors)
309
+ if paper.metadata.basic.year:
310
+ fields["year"] = str(paper.metadata.basic.year)
311
+ if paper.metadata.basic.abstract:
312
+ fields["abstract"] = paper.metadata.basic.abstract
313
+ if paper.metadata.basic.keywords:
314
+ fields["keywords"] = ", ".join(paper.metadata.basic.keywords)
315
+
316
+ # Identifiers
317
+ if paper.metadata.id.doi:
318
+ fields["doi"] = paper.metadata.id.doi
319
+ if paper.metadata.id.pmid:
320
+ fields["pmid"] = paper.metadata.id.pmid
321
+ if paper.metadata.id.arxiv_id:
322
+ fields["eprint"] = paper.metadata.id.arxiv_id
323
+
324
+ # Publication info
325
+ if paper.metadata.publication.journal:
326
+ fields["journal"] = paper.metadata.publication.journal
327
+ if paper.metadata.publication.volume:
328
+ fields["volume"] = paper.metadata.publication.volume
329
+ if paper.metadata.publication.pages:
330
+ fields["pages"] = paper.metadata.publication.pages
331
+
332
+ # Metrics
333
+ citation_count_val = paper.metadata.citation_count.total
334
+ if citation_count_val is not None and citation_count_val != 0:
335
+ fields["citation_count"] = str(int(citation_count_val))
336
+
337
+ impact_factor_val = paper.metadata.publication.impact_factor
338
+ if impact_factor_val is not None:
339
+ fields["journal_impact_factor"] = str(impact_factor_val)
340
+
341
+ # URLs
342
+ if paper.metadata.url.pdfs and len(paper.metadata.url.pdfs) > 0:
343
+ # Use the first PDF URL
344
+ pdf_url = paper.metadata.url.pdfs[0].get("url")
345
+ if pdf_url:
346
+ fields["url"] = pdf_url if isinstance(pdf_url, str) else str(pdf_url)
347
+
348
+ # Include original BibTeX fields if they exist
349
+ if hasattr(paper, "_original_bibtex_fields"):
350
+ for k, v in paper._original_bibtex_fields.items():
351
+ if k not in fields: # Don't override updated fields
352
+ fields[k] = v
353
+
354
+ return {
355
+ "entry_type": entry_type,
356
+ "key": key,
357
+ "fields": fields
358
+ }
359
+
360
+ def papers_to_bibtex(
361
+ self, papers: Union[List["Paper"], "Papers"], output_path: Optional[Union[str, Path]] = None
362
+ ) -> str:
363
+ """Convert Papers collection to BibTeX format.
364
+
365
+ Args:
366
+ papers: Papers object or list of Paper objects
367
+ output_path: Optional path to save the BibTeX file
368
+
369
+ Returns:
370
+ BibTeX content as string
371
+ """
372
+ # Handle Papers object
373
+ if hasattr(papers, "papers"):
374
+ paper_list = papers.papers
375
+ else:
376
+ paper_list = papers
377
+
378
+ # Convert each paper to BibTeX entry
379
+ entries = []
380
+ for paper in paper_list:
381
+ entry = self.paper_to_bibtex_entry(paper)
382
+ entries.append(entry)
383
+
384
+ # Generate BibTeX content
385
+ bibtex_lines = []
386
+ for entry in entries:
387
+ entry_type = entry["entry_type"]
388
+ key = entry["key"]
389
+ fields = entry["fields"]
390
+
391
+ bibtex_lines.append(f"@{entry_type}{{{key},")
392
+ for field, value in fields.items():
393
+ # Escape special characters in BibTeX
394
+ value = str(value).replace("{", "\\{").replace("}", "\\}")
395
+ bibtex_lines.append(f" {field} = {{{value}}},")
396
+ bibtex_lines.append("}\n")
397
+
398
+ bibtex_content = "\n".join(bibtex_lines)
399
+
400
+ # Save to file if path provided
401
+ if output_path:
402
+ output_path = Path(output_path)
403
+ output_path.parent.mkdir(parents=True, exist_ok=True)
404
+ output_path.write_text(bibtex_content)
405
+ logger.success(f"Saved BibTeX to {output_path}")
406
+
407
+ return bibtex_content
408
+
409
+ def merge_bibtex_files(
410
+ self,
411
+ file_paths: List[Union[str, Path]],
412
+ output_path: Optional[Union[str, Path]] = None,
413
+ dedup_strategy: str = "smart",
414
+ return_details: bool = False
415
+ ) -> Union["Papers", Dict[str, Any]]:
416
+ """Merge multiple BibTeX files intelligently handling duplicates.
417
+
418
+ Args:
419
+ file_paths: List of BibTeX files to merge
420
+ output_path: Optional path to save merged BibTeX
421
+ dedup_strategy: 'smart' (merge metadata), 'keep_first', 'keep_all'
422
+ return_details: If True, return dict with papers and metadata
423
+
424
+ Returns:
425
+ Merged Papers collection, or dict with 'papers', 'file_papers', 'stats'
426
+ """
427
+ from ..core.Papers import Papers
428
+
429
+ all_papers = []
430
+ file_papers = {} # Track which papers came from which file
431
+ duplicate_stats = {
432
+ 'total_input': 0,
433
+ 'duplicates_found': 0,
434
+ 'duplicates_merged': 0,
435
+ 'unique_papers': 0,
436
+ 'files_processed': []
437
+ }
438
+
439
+ # Load all papers from files
440
+ for file_path in file_paths:
441
+ file_path = Path(file_path)
442
+ try:
443
+ papers = self.papers_from_bibtex(file_path)
444
+ all_papers.extend(papers)
445
+ file_papers[file_path.stem] = papers # Store papers by source file
446
+ duplicate_stats['total_input'] += len(papers)
447
+ duplicate_stats['files_processed'].append(file_path)
448
+ logger.info(f"Loaded {len(papers)} papers from {file_path}")
449
+ except Exception as e:
450
+ logger.warning(f"Failed to load {file_path}: {e}")
451
+
452
+ if dedup_strategy == "keep_all":
453
+ merged_papers = Papers(all_papers)
454
+ else:
455
+ # Deduplicate papers
456
+ unique_papers = self._deduplicate_papers(
457
+ all_papers,
458
+ strategy=dedup_strategy,
459
+ stats=duplicate_stats
460
+ )
461
+ merged_papers = Papers(unique_papers)
462
+
463
+ # Save if output path provided
464
+ if output_path:
465
+ self.papers_to_bibtex_with_sources(
466
+ merged_papers,
467
+ output_path,
468
+ source_files=duplicate_stats['files_processed'],
469
+ file_papers=file_papers,
470
+ stats=duplicate_stats
471
+ )
472
+
473
+ # Log statistics
474
+ logger.info(f"Merge complete: {duplicate_stats['unique_papers']} unique papers "
475
+ f"from {duplicate_stats['total_input']} total "
476
+ f"({duplicate_stats['duplicates_found']} duplicates)")
477
+
478
+ if return_details:
479
+ return {
480
+ "papers": merged_papers,
481
+ "file_papers": file_papers,
482
+ "stats": duplicate_stats
483
+ }
484
+ else:
485
+ return merged_papers
486
+
487
+ def _deduplicate_papers(
488
+ self,
489
+ papers: List["Paper"],
490
+ strategy: str = "smart",
491
+ stats: Optional[Dict] = None
492
+ ) -> List["Paper"]:
493
+ """Deduplicate a list of papers based on strategy.
494
+
495
+ Args:
496
+ papers: List of Paper objects
497
+ strategy: 'smart' or 'keep_first'
498
+ stats: Optional dict to track statistics
499
+
500
+ Returns:
501
+ List of unique papers
502
+ """
503
+ if not stats:
504
+ stats = {'duplicates_found': 0, 'duplicates_merged': 0}
505
+
506
+ unique_papers = []
507
+ paper_index = {} # Track papers by DOI and title
508
+
509
+ for paper in papers:
510
+ # Create keys for indexing
511
+ doi = paper.metadata.id.doi
512
+ doi_key = doi.lower() if doi else None
513
+ title = paper.metadata.basic.title
514
+ title_key = self._normalize_title(title) if title else None
515
+
516
+ is_duplicate = False
517
+ merge_with = None
518
+
519
+ # Check by DOI first (most reliable)
520
+ if doi_key and doi_key in paper_index:
521
+ is_duplicate = True
522
+ merge_with = paper_index[doi_key]
523
+
524
+ # Check by title if no DOI match
525
+ elif title_key and title_key in paper_index:
526
+ existing = paper_index[title_key]
527
+ if self._are_same_paper(existing, paper):
528
+ is_duplicate = True
529
+ merge_with = existing
530
+
531
+ if is_duplicate and merge_with:
532
+ stats['duplicates_found'] += 1
533
+
534
+ if strategy == "smart":
535
+ # Merge metadata from both papers
536
+ merged = self._merge_paper_metadata(merge_with, paper)
537
+ # Update the paper in our list
538
+ idx = unique_papers.index(merge_with)
539
+ unique_papers[idx] = merged
540
+ # Update index
541
+ if doi_key:
542
+ paper_index[doi_key] = merged
543
+ if title_key:
544
+ paper_index[title_key] = merged
545
+ stats['duplicates_merged'] += 1
546
+ # else: keep_first - do nothing
547
+
548
+ else:
549
+ # New unique paper
550
+ unique_papers.append(paper)
551
+ if doi_key:
552
+ paper_index[doi_key] = paper
553
+ if title_key:
554
+ paper_index[title_key] = paper
555
+
556
+ stats['unique_papers'] = len(unique_papers)
557
+ return unique_papers
558
+
559
+ def _normalize_title(self, title: str) -> str:
560
+ """Normalize title for comparison."""
561
+ if not title:
562
+ return ""
563
+ # Remove punctuation, lowercase, collapse whitespace
564
+ import re
565
+ normalized = re.sub(r'[^\w\s]', '', title.lower())
566
+ normalized = ' '.join(normalized.split())
567
+ return normalized
568
+
569
+ def _are_same_paper(self, paper1: "Paper", paper2: "Paper") -> bool:
570
+ """Determine if two papers are the same based on metadata."""
571
+ # If both have DOIs and they match
572
+ doi1 = paper1.metadata.id.doi
573
+ doi2 = paper2.metadata.id.doi
574
+ if doi1 and doi2:
575
+ return doi1.lower() == doi2.lower()
576
+
577
+ # Check title similarity
578
+ title1_raw = paper1.metadata.basic.title
579
+ title2_raw = paper2.metadata.basic.title
580
+ if title1_raw and title2_raw:
581
+ title1 = self._normalize_title(title1_raw)
582
+ title2 = self._normalize_title(title2_raw)
583
+
584
+ if title1 == title2:
585
+ # Check year (allow 1 year difference for online vs print)
586
+ year1 = paper1.metadata.basic.year
587
+ year2 = paper2.metadata.basic.year
588
+ if year1 and year2:
589
+ if abs(year1 - year2) <= 1:
590
+ return True
591
+ else:
592
+ # No year to compare, assume same if title matches
593
+ return True
594
+
595
+ return False
596
+
597
+ def _merge_paper_metadata(self, paper1: "Paper", paper2: "Paper") -> "Paper":
598
+ """Merge metadata from two papers, keeping the most complete information."""
599
+ from copy import deepcopy
600
+
601
+ # Calculate completeness score for each paper
602
+ score1 = sum([
603
+ 1 for field in [
604
+ paper1.metadata.id.doi, paper1.metadata.basic.abstract,
605
+ paper1.metadata.publication.journal,
606
+ paper1.metadata.citation_count.total,
607
+ paper1.metadata.url.pdfs, paper1.metadata.basic.authors
608
+ ] if field
609
+ ])
610
+ score2 = sum([
611
+ 1 for field in [
612
+ paper2.metadata.id.doi, paper2.metadata.basic.abstract,
613
+ paper2.metadata.publication.journal,
614
+ paper2.metadata.citation_count.total,
615
+ paper2.metadata.url.pdfs, paper2.metadata.basic.authors
616
+ ] if field
617
+ ])
618
+
619
+ # Start with the more complete paper
620
+ if score1 >= score2:
621
+ merged = deepcopy(paper1)
622
+ donor = paper2
623
+ else:
624
+ merged = deepcopy(paper2)
625
+ donor = paper1
626
+
627
+ # Fill in missing fields from donor
628
+ if not merged.metadata.id.doi and donor.metadata.id.doi:
629
+ merged.metadata.set_doi(donor.metadata.id.doi)
630
+ if not merged.metadata.basic.abstract and donor.metadata.basic.abstract:
631
+ merged.metadata.basic.abstract = donor.metadata.basic.abstract
632
+ if not merged.metadata.publication.journal and donor.metadata.publication.journal:
633
+ merged.metadata.publication.journal = donor.metadata.publication.journal
634
+ if not merged.metadata.publication.publisher and donor.metadata.publication.publisher:
635
+ merged.metadata.publication.publisher = donor.metadata.publication.publisher
636
+ if not merged.metadata.publication.volume and donor.metadata.publication.volume:
637
+ merged.metadata.publication.volume = donor.metadata.publication.volume
638
+ if not merged.metadata.publication.issue and donor.metadata.publication.issue:
639
+ merged.metadata.publication.issue = donor.metadata.publication.issue
640
+ if not merged.metadata.publication.pages and donor.metadata.publication.pages:
641
+ merged.metadata.publication.pages = donor.metadata.publication.pages
642
+ # Merge PDF URLs (union)
643
+ for donor_pdf in donor.metadata.url.pdfs:
644
+ if not any(p.get("url") == donor_pdf.get("url") for p in merged.metadata.url.pdfs):
645
+ merged.metadata.url.pdfs.append(donor_pdf)
646
+ if not merged.metadata.url.publisher and donor.metadata.url.publisher:
647
+ merged.metadata.url.publisher = donor.metadata.url.publisher
648
+
649
+ # Take maximum citation count
650
+ donor_cc = donor.metadata.citation_count.total or 0
651
+ merged_cc = merged.metadata.citation_count.total or 0
652
+
653
+ if donor_cc > merged_cc:
654
+ merged.metadata.citation_count.total = donor_cc
655
+
656
+ # Merge authors (union, preserving order)
657
+ if donor.metadata.basic.authors and not merged.metadata.basic.authors:
658
+ merged.metadata.basic.authors = donor.metadata.basic.authors
659
+ elif donor.metadata.basic.authors and merged.metadata.basic.authors:
660
+ # Add unique authors from donor
661
+ for author in donor.metadata.basic.authors:
662
+ if author not in merged.metadata.basic.authors:
663
+ merged.metadata.basic.authors.append(author)
664
+
665
+ # Merge keywords (union)
666
+ donor_keywords = donor.metadata.basic.keywords
667
+ merged_keywords = merged.metadata.basic.keywords
668
+ if donor_keywords:
669
+ if merged_keywords:
670
+ all_keywords = list(set(merged_keywords + donor_keywords))
671
+ merged.metadata.basic.keywords = sorted(all_keywords)
672
+ else:
673
+ merged.metadata.basic.keywords = donor_keywords
674
+
675
+ return merged
676
+
677
+ def papers_to_bibtex_with_sources(
678
+ self,
679
+ papers: Union[List["Paper"], "Papers"],
680
+ output_path: Union[str, Path],
681
+ source_files: List[Path] = None,
682
+ file_papers: Dict[str, List["Paper"]] = None,
683
+ stats: Dict = None
684
+ ) -> str:
685
+ """Save papers to BibTeX with source file comments and SciTeX header.
686
+
687
+ Args:
688
+ papers: Papers collection to save
689
+ output_path: Path to save the BibTeX file
690
+ source_files: List of source file paths
691
+ file_papers: Dict mapping source file names to their papers
692
+ stats: Merge statistics
693
+
694
+ Returns:
695
+ BibTeX content as string
696
+ """
697
+ from datetime import datetime
698
+
699
+ # Handle Papers object
700
+ if hasattr(papers, "papers"):
701
+ paper_list = papers.papers
702
+ else:
703
+ paper_list = papers
704
+
705
+ output_path = Path(output_path)
706
+
707
+ # Generate header
708
+ bibtex_lines = []
709
+ bibtex_lines.append("% ============================================================")
710
+ bibtex_lines.append("% SciTeX Scholar - Merged BibTeX File")
711
+ bibtex_lines.append(f"% Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
712
+ bibtex_lines.append("% Author: Yusuke Watanabe (ywatanabe@scitex.ai)")
713
+ bibtex_lines.append("% ============================================================")
714
+
715
+ if source_files:
716
+ bibtex_lines.append("%")
717
+ bibtex_lines.append("% Source Files:")
718
+ for i, source_file in enumerate(source_files, 1):
719
+ bibtex_lines.append(f"% {i}. {source_file.name}")
720
+
721
+ if stats:
722
+ bibtex_lines.append("%")
723
+ bibtex_lines.append("% Merge Statistics:")
724
+ bibtex_lines.append(f"% Total entries loaded: {stats.get('total_input', 0)}")
725
+ bibtex_lines.append(f"% Unique entries: {stats.get('unique_papers', len(paper_list))}")
726
+ bibtex_lines.append(f"% Duplicates found: {stats.get('duplicates_found', 0)}")
727
+ if stats.get('duplicates_merged'):
728
+ bibtex_lines.append(f"% Duplicates merged: {stats['duplicates_merged']}")
729
+
730
+ bibtex_lines.append("% ============================================================")
731
+ bibtex_lines.append("")
732
+
733
+ # Group papers by source file if available
734
+ if file_papers:
735
+ for source_name, source_papers in file_papers.items():
736
+ # Add section comment
737
+ bibtex_lines.append("")
738
+ bibtex_lines.append(f"% ============================================================")
739
+ bibtex_lines.append(f"% Source: {source_name}.bib")
740
+ bibtex_lines.append(f"% Entries: {len(source_papers)}")
741
+ bibtex_lines.append(f"% ============================================================")
742
+ bibtex_lines.append("")
743
+
744
+ # Add papers from this source
745
+ source_paper_set = set(
746
+ p.metadata.basic.title
747
+ for p in source_papers
748
+ if p.metadata.basic.title
749
+ )
750
+ for paper in paper_list:
751
+ title = paper.metadata.basic.title
752
+ if title and title in source_paper_set:
753
+ entry = self.paper_to_bibtex_entry(paper)
754
+ bibtex_lines.append(self._format_bibtex_entry(entry))
755
+ # Remove from set to avoid duplicates
756
+ source_paper_set.discard(title)
757
+
758
+ # Add any papers not assigned to a source (e.g., merged duplicates)
759
+ all_source_titles = set()
760
+ for source_papers in file_papers.values():
761
+ all_source_titles.update(p.title for p in source_papers if p.title)
762
+
763
+ unassigned = [p for p in paper_list if not p.title or p.title not in all_source_titles]
764
+ if unassigned:
765
+ bibtex_lines.append("")
766
+ bibtex_lines.append(f"% ============================================================")
767
+ bibtex_lines.append(f"% Merged/Unassigned Entries")
768
+ bibtex_lines.append(f"% Entries: {len(unassigned)}")
769
+ bibtex_lines.append(f"% ============================================================")
770
+ bibtex_lines.append("")
771
+ for paper in unassigned:
772
+ entry = self.paper_to_bibtex_entry(paper)
773
+ bibtex_lines.append(self._format_bibtex_entry(entry))
774
+ else:
775
+ # No source tracking, just convert all papers
776
+ for paper in paper_list:
777
+ entry = self.paper_to_bibtex_entry(paper)
778
+ bibtex_lines.append(self._format_bibtex_entry(entry))
779
+
780
+ bibtex_content = "\n".join(bibtex_lines)
781
+
782
+ # Save to file
783
+ output_path.parent.mkdir(parents=True, exist_ok=True)
784
+ output_path.write_text(bibtex_content)
785
+ logger.success(f"Saved merged BibTeX to {output_path}")
786
+
787
+ return bibtex_content
788
+
789
+ def _format_bibtex_entry(self, entry: Dict) -> str:
790
+ """Format a single BibTeX entry."""
791
+ lines = []
792
+ entry_type = entry["entry_type"]
793
+ key = entry["key"]
794
+ fields = entry["fields"]
795
+
796
+ lines.append(f"@{entry_type}{{{key},")
797
+ for field, value in fields.items():
798
+ # Escape special characters in BibTeX
799
+ value = str(value).replace("{", "\\{").replace("}", "\\}")
800
+ lines.append(f" {field} = {{{value}}},")
801
+ lines.append("}\n")
802
+
803
+ return "\n".join(lines)
804
+
805
+ # =========================================================================
806
+ # Bibliography Directory Management
807
+ # =========================================================================
808
+
809
+ def setup_project_bibliography(
810
+ self,
811
+ project: str,
812
+ bibtex_files: Optional[List[Union[str, Path]]] = None,
813
+ ) -> Path:
814
+ """Setup info/bibliography directory structure for a project.
815
+
816
+ Creates:
817
+ - info/bibliography/
818
+ - info/bibliography/*.bib (symlinks to source files)
819
+ - info/bibliography/combined.bib (merged unique entries)
820
+ - info/{project}.bib -> bibliography/combined.bib
821
+
822
+ Args:
823
+ project: Project name
824
+ bibtex_files: Optional list of BibTeX files to include
825
+
826
+ Returns:
827
+ Path to combined.bib file
828
+ """
829
+ if not self.config:
830
+ raise ValueError("Config required for project bibliography management")
831
+
832
+ # Get project directory
833
+ project_dir = self.config.path_manager.get_library_project_dir(project)
834
+ bib_dir = project_dir / "info" / "bibliography"
835
+ bib_dir.mkdir(parents=True, exist_ok=True)
836
+
837
+ logger.info(f"Setting up bibliography for project: {project}")
838
+
839
+ # Link provided BibTeX files
840
+ if bibtex_files:
841
+ for bib_file in bibtex_files:
842
+ bib_file = Path(bib_file)
843
+ if bib_file.exists():
844
+ link_name = bib_dir / f"{bib_file.stem}.bib"
845
+ if not link_name.exists():
846
+ link_name.symlink_to(bib_file.absolute())
847
+ logger.info(f"Linked: {link_name.name} -> {bib_file}")
848
+
849
+ # Merge all BibTeX files in bibliography directory
850
+ combined_path = self.update_combined_bibliography(project)
851
+
852
+ # Create convenience symlink at project root
853
+ project_bib_link = project_dir / "info" / f"{project}.bib"
854
+ if project_bib_link.exists() or project_bib_link.is_symlink():
855
+ project_bib_link.unlink()
856
+ project_bib_link.symlink_to(f"bibliography/combined.bib")
857
+ logger.success(f"Created {project}.bib -> bibliography/combined.bib")
858
+
859
+ return combined_path
860
+
861
+ def update_combined_bibliography(self, project: str) -> Path:
862
+ """Update combined.bib with all BibTeX files in bibliography directory.
863
+
864
+ Args:
865
+ project: Project name
866
+
867
+ Returns:
868
+ Path to updated combined.bib
869
+ """
870
+ if not self.config:
871
+ raise ValueError("Config required for project bibliography management")
872
+
873
+ project_dir = self.config.path_manager.get_library_project_dir(project)
874
+ bib_dir = project_dir / "info" / "bibliography"
875
+
876
+ if not bib_dir.exists():
877
+ logger.warning(f"Bibliography directory not found: {bib_dir}")
878
+ return None
879
+
880
+ # Find all BibTeX files (excluding combined.bib itself)
881
+ bib_files = [
882
+ f for f in bib_dir.glob("*.bib")
883
+ if f.name not in ["combined.bib", "merged.bib"]
884
+ ]
885
+
886
+ if not bib_files:
887
+ logger.warning("No BibTeX files found in bibliography directory")
888
+ return None
889
+
890
+ logger.info(f"Merging {len(bib_files)} BibTeX files...")
891
+
892
+ # Merge files
893
+ combined_path = bib_dir / "combined.bib"
894
+ merged_papers = self.merge_bibtex_files(
895
+ bib_files,
896
+ output_path=combined_path,
897
+ dedup_strategy="smart"
898
+ )
899
+
900
+ logger.success(
901
+ f"Updated combined.bib: {len(merged_papers)} unique papers "
902
+ f"from {len(bib_files)} files"
903
+ )
904
+
905
+ return combined_path
906
+
907
+ def export_project_bibliography(
908
+ self,
909
+ project: str,
910
+ output_path: Optional[Union[str, Path]] = None,
911
+ include_all_entries: bool = True
912
+ ) -> Path:
913
+ """Export all papers from project library to BibTeX file.
914
+
915
+ This creates a BibTeX file from ALL papers in the project library,
916
+ not just from existing BibTeX files. Useful for exporting the complete
917
+ project bibliography after downloads and enrichment.
918
+
919
+ Args:
920
+ project: Project name
921
+ output_path: Optional output path (default: info/bibliography/library_export.bib)
922
+ include_all_entries: If True, export all papers; if False, only papers with PDFs
923
+
924
+ Returns:
925
+ Path to exported BibTeX file
926
+ """
927
+ if not self.config:
928
+ raise ValueError("Config required for project bibliography export")
929
+
930
+ project_dir = self.config.path_manager.get_library_project_dir(project)
931
+ master_dir = self.config.path_manager.get_library_master_dir()
932
+
933
+ # Default output path
934
+ if output_path is None:
935
+ bib_dir = project_dir / "info" / "bibliography"
936
+ bib_dir.mkdir(parents=True, exist_ok=True)
937
+ output_path = bib_dir / "library_export.bib"
938
+ else:
939
+ output_path = Path(output_path)
940
+
941
+ logger.info(f"Exporting project bibliography: {project}")
942
+
943
+ # Collect all papers from project symlinks
944
+ from ..core.Paper import Paper
945
+ papers = []
946
+
947
+ for item in project_dir.iterdir():
948
+ if not item.is_symlink():
949
+ continue
950
+
951
+ # Resolve symlink to master directory
952
+ try:
953
+ master_path = item.resolve()
954
+ if not master_path.exists():
955
+ logger.warning(f"Broken symlink: {item.name}")
956
+ continue
957
+
958
+ # Load metadata.json
959
+ metadata_file = master_path / "metadata.json"
960
+ if not metadata_file.exists():
961
+ logger.warning(f"No metadata: {master_path.name}")
962
+ continue
963
+
964
+ # Check for PDF if filtering
965
+ if not include_all_entries:
966
+ pdf_files = list(master_path.glob("*.pdf"))
967
+ if not pdf_files:
968
+ continue
969
+
970
+ # Load paper
971
+ paper = Paper.from_file(metadata_file)
972
+ if paper:
973
+ papers.append(paper)
974
+
975
+ except Exception as e:
976
+ logger.warning(f"Error loading {item.name}: {e}")
977
+ continue
978
+
979
+ logger.info(f"Found {len(papers)} papers in project library")
980
+
981
+ if not papers:
982
+ logger.warning("No papers found to export")
983
+ return None
984
+
985
+ # Convert to BibTeX
986
+ from datetime import datetime
987
+ from ..core.Papers import Papers
988
+
989
+ papers_collection = Papers(papers, project=project)
990
+
991
+ # Save with project info header
992
+ bibtex_content = []
993
+ bibtex_content.append("% ============================================================")
994
+ bibtex_content.append(f"% SciTeX Scholar - Project Library Export")
995
+ bibtex_content.append(f"% Project: {project}")
996
+ bibtex_content.append(f"% Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
997
+ bibtex_content.append(f"% Entries: {len(papers)}")
998
+ bibtex_content.append(f"% Filter: {'All papers' if include_all_entries else 'Papers with PDFs only'}")
999
+ bibtex_content.append("% ============================================================")
1000
+ bibtex_content.append("")
1001
+
1002
+ # Add papers
1003
+ for paper in papers:
1004
+ entry = self.paper_to_bibtex_entry(paper)
1005
+ bibtex_content.append(self._format_bibtex_entry(entry))
1006
+
1007
+ # Write to file
1008
+ output_path.parent.mkdir(parents=True, exist_ok=True)
1009
+ output_path.write_text("\n".join(bibtex_content))
1010
+
1011
+ logger.success(f"Exported {len(papers)} papers to: {output_path}")
1012
+
1013
+ # Update combined.bib to include this export
1014
+ self.update_combined_bibliography(project)
1015
+
1016
+ return output_path
1017
+
1018
+ # EOF