scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -1,454 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # Time-stamp: "2024-12-06 10:15:00"
4
- # Author: Claude
5
- # Filename: _local_search.py
6
-
7
- """
8
- Local search engine for PDF papers and documents.
9
- """
10
-
11
- import os
12
- import re
13
- from pathlib import Path
14
- from typing import List, Optional, Dict, Any, Tuple
15
- import logging
16
- import hashlib
17
- from datetime import datetime
18
- import json
19
-
20
- from ._paper import Paper
21
-
22
-
23
- logger = logging.getLogger(__name__)
24
-
25
-
26
- class LocalSearchEngine:
27
- """Search engine for local PDF papers and documents."""
28
-
29
- def __init__(
30
- self,
31
- index_path: Optional[Path] = None,
32
- cache_metadata: bool = True,
33
- ):
34
- """Initialize local search engine.
35
-
36
- Parameters
37
- ----------
38
- index_path : Path, optional
39
- Path to save/load the metadata index
40
- cache_metadata : bool
41
- Whether to cache extracted metadata
42
- """
43
- self.index_path = Path(index_path) if index_path else None
44
- self.cache_metadata = cache_metadata
45
-
46
- # Metadata cache: file_path -> metadata
47
- self.metadata_cache: Dict[str, Dict[str, Any]] = {}
48
-
49
- # Load existing cache if available
50
- if self.index_path and self.index_path.exists():
51
- self.load_cache()
52
-
53
- # PDF reader (lazy loaded)
54
- self._pdf_reader = None
55
-
56
- def _get_pdf_reader(self):
57
- """Lazy load PDF reader."""
58
- if self._pdf_reader is None:
59
- try:
60
- import fitz # PyMuPDF
61
- self._pdf_reader = "pymupdf"
62
- except ImportError:
63
- try:
64
- import PyPDF2
65
- self._pdf_reader = "pypdf2"
66
- except ImportError:
67
- logger.warning("No PDF reader available (install pymupdf or PyPDF2)")
68
- self._pdf_reader = "none"
69
-
70
- return self._pdf_reader
71
-
72
- def search(
73
- self,
74
- query: str,
75
- paths: List[Path],
76
- recursive: bool = True,
77
- file_pattern: str = "*.pdf",
78
- max_results: Optional[int] = None,
79
- ) -> List[Tuple[Paper, float]]:
80
- """Search for papers in local directories.
81
-
82
- Parameters
83
- ----------
84
- query : str
85
- Search query
86
- paths : List[Path]
87
- Directories to search
88
- recursive : bool
89
- Whether to search recursively
90
- file_pattern : str
91
- File pattern to match (e.g., "*.pdf")
92
- max_results : int, optional
93
- Maximum number of results
94
-
95
- Returns
96
- -------
97
- List[Tuple[Paper, float]]
98
- List of (paper, relevance_score) tuples
99
- """
100
- results = []
101
- query_lower = query.lower()
102
- query_terms = set(query_lower.split())
103
-
104
- # Find all matching files
105
- pdf_files = []
106
- for path in paths:
107
- path = Path(path)
108
- if not path.exists():
109
- logger.warning(f"Path does not exist: {path}")
110
- continue
111
-
112
- if path.is_file():
113
- pdf_files.append(path)
114
- else:
115
- if recursive:
116
- pdf_files.extend(path.rglob(file_pattern))
117
- else:
118
- pdf_files.extend(path.glob(file_pattern))
119
-
120
- # Search through files
121
- for pdf_path in pdf_files:
122
- try:
123
- # Get metadata (from cache or extract)
124
- metadata = self._get_pdf_metadata(pdf_path)
125
-
126
- if not metadata:
127
- continue
128
-
129
- # Calculate relevance score
130
- score = self._calculate_relevance(query_lower, query_terms, metadata)
131
-
132
- if score > 0:
133
- # Create Paper object
134
- paper = self._create_paper_from_metadata(pdf_path, metadata)
135
- results.append((paper, score))
136
-
137
- if max_results and len(results) >= max_results:
138
- break
139
-
140
- except Exception as e:
141
- logger.error(f"Error processing {pdf_path}: {e}")
142
- continue
143
-
144
- # Sort by relevance score
145
- results.sort(key=lambda x: x[1], reverse=True)
146
-
147
- if max_results:
148
- results = results[:max_results]
149
-
150
- return results
151
-
152
- def _get_pdf_metadata(self, pdf_path: Path) -> Optional[Dict[str, Any]]:
153
- """Extract or retrieve metadata from PDF."""
154
- # Check cache first
155
- cache_key = str(pdf_path.absolute())
156
-
157
- if self.cache_metadata and cache_key in self.metadata_cache:
158
- cached = self.metadata_cache[cache_key]
159
- # Check if file has been modified
160
- if cached.get("mtime") == pdf_path.stat().st_mtime:
161
- return cached
162
-
163
- # Extract metadata
164
- metadata = self._extract_pdf_metadata(pdf_path)
165
-
166
- if metadata and self.cache_metadata:
167
- # Add file modification time
168
- metadata["mtime"] = pdf_path.stat().st_mtime
169
- self.metadata_cache[cache_key] = metadata
170
- # Save cache periodically
171
- if len(self.metadata_cache) % 10 == 0:
172
- self.save_cache()
173
-
174
- return metadata
175
-
176
- def _extract_pdf_metadata(self, pdf_path: Path) -> Optional[Dict[str, Any]]:
177
- """Extract metadata from PDF file."""
178
- reader_type = self._get_pdf_reader()
179
-
180
- metadata = {
181
- "filename": pdf_path.name,
182
- "path": str(pdf_path),
183
- "size": pdf_path.stat().st_size,
184
- "modified": datetime.fromtimestamp(pdf_path.stat().st_mtime).isoformat(),
185
- }
186
-
187
- if reader_type == "none":
188
- # Fallback: use filename
189
- metadata["title"] = pdf_path.stem.replace("_", " ").replace("-", " ")
190
- metadata["content"] = metadata["title"]
191
- return metadata
192
-
193
- try:
194
- if reader_type == "pymupdf":
195
- import fitz
196
-
197
- with fitz.open(pdf_path) as doc:
198
- # Get document metadata
199
- info = doc.metadata
200
- metadata["title"] = info.get("title", "") or pdf_path.stem
201
- metadata["author"] = info.get("author", "")
202
- metadata["subject"] = info.get("subject", "")
203
- metadata["keywords"] = info.get("keywords", "")
204
-
205
- # Extract text from first few pages
206
- text_parts = []
207
- for i in range(min(3, len(doc))): # First 3 pages
208
- page = doc[i]
209
- text = page.get_text()
210
- if text:
211
- text_parts.append(text)
212
-
213
- metadata["content"] = " ".join(text_parts)[:5000] # Limit content
214
-
215
- # Try to extract abstract
216
- full_text = " ".join(text_parts)
217
- abstract = self._extract_abstract(full_text)
218
- if abstract:
219
- metadata["abstract"] = abstract
220
-
221
- elif reader_type == "pypdf2":
222
- import PyPDF2
223
-
224
- with open(pdf_path, "rb") as f:
225
- reader = PyPDF2.PdfReader(f)
226
-
227
- # Get metadata
228
- info = reader.metadata
229
- if info:
230
- metadata["title"] = info.get("/Title", "") or pdf_path.stem
231
- metadata["author"] = info.get("/Author", "")
232
- metadata["subject"] = info.get("/Subject", "")
233
- metadata["keywords"] = info.get("/Keywords", "")
234
-
235
- # Extract text from first few pages
236
- text_parts = []
237
- for i in range(min(3, len(reader.pages))):
238
- page = reader.pages[i]
239
- text = page.extract_text()
240
- if text:
241
- text_parts.append(text)
242
-
243
- metadata["content"] = " ".join(text_parts)[:5000]
244
-
245
- # Try to extract abstract
246
- full_text = " ".join(text_parts)
247
- abstract = self._extract_abstract(full_text)
248
- if abstract:
249
- metadata["abstract"] = abstract
250
-
251
- except Exception as e:
252
- logger.error(f"Error extracting metadata from {pdf_path}: {e}")
253
- # Fallback to filename
254
- metadata["title"] = pdf_path.stem.replace("_", " ").replace("-", " ")
255
- metadata["content"] = metadata["title"]
256
-
257
- return metadata
258
-
259
- def _extract_abstract(self, text: str) -> Optional[str]:
260
- """Try to extract abstract from text."""
261
- # Look for abstract section
262
- patterns = [
263
- r"abstract[:\s]*(.+?)(?=introduction|keywords|1\.|1\s+introduction)",
264
- r"summary[:\s]*(.+?)(?=introduction|keywords|1\.|1\s+introduction)",
265
- ]
266
-
267
- for pattern in patterns:
268
- match = re.search(pattern, text, re.IGNORECASE | re.DOTALL)
269
- if match:
270
- abstract = match.group(1).strip()
271
- # Clean up
272
- abstract = re.sub(r"\s+", " ", abstract)
273
- if len(abstract) > 100: # Reasonable abstract length
274
- return abstract[:1000] # Limit length
275
-
276
- return None
277
-
278
- def _calculate_relevance(
279
- self,
280
- query_lower: str,
281
- query_terms: set,
282
- metadata: Dict[str, Any]
283
- ) -> float:
284
- """Calculate relevance score for a document."""
285
- score = 0.0
286
-
287
- # Search in different fields with different weights
288
- fields = [
289
- ("title", 5.0),
290
- ("abstract", 3.0),
291
- ("keywords", 3.0),
292
- ("author", 2.0),
293
- ("subject", 2.0),
294
- ("content", 1.0),
295
- ("filename", 1.0),
296
- ]
297
-
298
- for field, weight in fields:
299
- field_value = metadata.get(field, "").lower()
300
- if not field_value:
301
- continue
302
-
303
- # Exact match
304
- if query_lower in field_value:
305
- score += weight * 2
306
-
307
- # Term matches
308
- field_terms = set(field_value.split())
309
- matching_terms = query_terms & field_terms
310
- if matching_terms:
311
- score += weight * len(matching_terms) / len(query_terms)
312
-
313
- return score
314
-
315
- def _create_paper_from_metadata(
316
- self,
317
- pdf_path: Path,
318
- metadata: Dict[str, Any]
319
- ) -> Paper:
320
- """Create Paper object from metadata."""
321
- # Parse authors
322
- authors = []
323
- author_str = metadata.get("author", "")
324
- if author_str:
325
- # Try to split authors
326
- if ";" in author_str:
327
- authors = [a.strip() for a in author_str.split(";")]
328
- elif "," in author_str and author_str.count(",") > 1:
329
- authors = [a.strip() for a in author_str.split(",")]
330
- else:
331
- authors = [author_str.strip()]
332
-
333
- # Parse keywords
334
- keywords = []
335
- keyword_str = metadata.get("keywords", "")
336
- if keyword_str:
337
- keywords = [k.strip() for k in re.split(r"[;,]", keyword_str) if k.strip()]
338
-
339
- # Create paper
340
- paper = Paper(
341
- title=metadata.get("title", pdf_path.stem),
342
- authors=authors,
343
- abstract=metadata.get("abstract", metadata.get("content", "")[:500]),
344
- source="local",
345
- keywords=keywords,
346
- pdf_path=pdf_path,
347
- metadata={
348
- "filename": metadata.get("filename"),
349
- "size": metadata.get("size"),
350
- "modified": metadata.get("modified"),
351
- "subject": metadata.get("subject"),
352
- },
353
- )
354
-
355
- return paper
356
-
357
- def build_index(self, paths: List[Path], recursive: bool = True) -> int:
358
- """Build metadata index for given paths.
359
-
360
- Parameters
361
- ----------
362
- paths : List[Path]
363
- Directories to index
364
- recursive : bool
365
- Whether to search recursively
366
-
367
- Returns
368
- -------
369
- int
370
- Number of files indexed
371
- """
372
- count = 0
373
-
374
- for path in paths:
375
- path = Path(path)
376
- if not path.exists():
377
- continue
378
-
379
- if path.is_file() and path.suffix.lower() == ".pdf":
380
- pdf_files = [path]
381
- else:
382
- if recursive:
383
- pdf_files = list(path.rglob("*.pdf"))
384
- else:
385
- pdf_files = list(path.glob("*.pdf"))
386
-
387
- for pdf_path in pdf_files:
388
- try:
389
- metadata = self._get_pdf_metadata(pdf_path)
390
- if metadata:
391
- count += 1
392
- if count % 10 == 0:
393
- logger.info(f"Indexed {count} files...")
394
- except Exception as e:
395
- logger.error(f"Error indexing {pdf_path}: {e}")
396
-
397
- # Save cache
398
- if self.cache_metadata:
399
- self.save_cache()
400
-
401
- logger.info(f"Indexed {count} PDF files")
402
- return count
403
-
404
- def save_cache(self) -> None:
405
- """Save metadata cache to disk."""
406
- if not self.index_path:
407
- return
408
-
409
- self.index_path.parent.mkdir(parents=True, exist_ok=True)
410
-
411
- with open(self.index_path, "w") as f:
412
- json.dump(self.metadata_cache, f, indent=2)
413
-
414
- logger.debug(f"Saved cache with {len(self.metadata_cache)} entries")
415
-
416
- def load_cache(self) -> None:
417
- """Load metadata cache from disk."""
418
- if not self.index_path or not self.index_path.exists():
419
- return
420
-
421
- try:
422
- with open(self.index_path, "r") as f:
423
- self.metadata_cache = json.load(f)
424
- logger.debug(f"Loaded cache with {len(self.metadata_cache)} entries")
425
- except Exception as e:
426
- logger.error(f"Error loading cache: {e}")
427
-
428
- def clear_cache(self) -> None:
429
- """Clear the metadata cache."""
430
- self.metadata_cache.clear()
431
- if self.index_path and self.index_path.exists():
432
- self.index_path.unlink()
433
-
434
-
435
- # Example usage
436
- if __name__ == "__main__":
437
- # Create local search engine
438
- engine = LocalSearchEngine()
439
-
440
- # Search in current directory
441
- results = engine.search("machine learning", [Path(".")], max_results=5)
442
-
443
- print(f"Found {len(results)} papers:")
444
- for paper, score in results:
445
- print(f"\nScore: {score:.2f}")
446
- print(f"Title: {paper.title}")
447
- print(f"Path: {paper.pdf_path}")
448
- if paper.authors:
449
- print(f"Authors: {', '.join(paper.authors)}")
450
-
451
- # Build index for faster future searches
452
- print("\nBuilding index...")
453
- count = engine.build_index([Path(".")])
454
- print(f"Indexed {count} files")
scitex/scholar/_paper.py DELETED
@@ -1,244 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- # Time-stamp: "2024-12-06 10:00:00"
4
- # Author: Claude
5
- # Filename: _paper.py
6
-
7
- """
8
- Paper class for representing scientific papers.
9
- """
10
-
11
- from typing import Dict, Optional, List, Any
12
- from datetime import datetime
13
- from pathlib import Path
14
- import json
15
-
16
-
17
- class Paper:
18
- """Represents a scientific paper with metadata and content."""
19
-
20
- def __init__(
21
- self,
22
- title: str,
23
- authors: List[str],
24
- abstract: str,
25
- source: str, # 'pubmed', 'arxiv', 'local', etc.
26
- year: Optional[int] = None,
27
- doi: Optional[str] = None,
28
- pmid: Optional[str] = None,
29
- arxiv_id: Optional[str] = None,
30
- journal: Optional[str] = None,
31
- keywords: Optional[List[str]] = None,
32
- pdf_path: Optional[Path] = None,
33
- embedding: Optional[Any] = None, # numpy array or tensor
34
- metadata: Optional[Dict[str, Any]] = None,
35
- ):
36
- """Initialize a Paper object.
37
-
38
- Parameters
39
- ----------
40
- title : str
41
- Paper title
42
- authors : List[str]
43
- List of author names
44
- abstract : str
45
- Paper abstract
46
- source : str
47
- Source of the paper (pubmed, arxiv, local, etc.)
48
- year : int, optional
49
- Publication year
50
- doi : str, optional
51
- Digital Object Identifier
52
- pmid : str, optional
53
- PubMed ID
54
- arxiv_id : str, optional
55
- arXiv identifier
56
- journal : str, optional
57
- Journal name
58
- keywords : List[str], optional
59
- Keywords/tags
60
- pdf_path : Path, optional
61
- Path to local PDF file
62
- embedding : Any, optional
63
- Vector embedding of the paper
64
- metadata : Dict[str, Any], optional
65
- Additional metadata
66
- """
67
- self.title = title
68
- self.authors = authors
69
- self.abstract = abstract
70
- self.source = source
71
- self.year = year
72
- self.doi = doi
73
- self.pmid = pmid
74
- self.arxiv_id = arxiv_id
75
- self.journal = journal
76
- self.keywords = keywords or []
77
- self.pdf_path = Path(pdf_path) if pdf_path else None
78
- self.embedding = embedding
79
- self.metadata = metadata or {}
80
- self.retrieved_at = datetime.now()
81
-
82
- def __repr__(self) -> str:
83
- """String representation of the paper."""
84
- return f"Paper(title='{self.title[:50]}...', authors={len(self.authors)}, source='{self.source}')"
85
-
86
- def __str__(self) -> str:
87
- """Human-readable string representation."""
88
- authors_str = ", ".join(self.authors[:3])
89
- if len(self.authors) > 3:
90
- authors_str += f" et al. ({len(self.authors)} authors)"
91
-
92
- parts = [
93
- f"Title: {self.title}",
94
- f"Authors: {authors_str}",
95
- f"Year: {self.year or 'Unknown'}",
96
- f"Source: {self.source}",
97
- ]
98
-
99
- if self.journal:
100
- parts.append(f"Journal: {self.journal}")
101
- if self.doi:
102
- parts.append(f"DOI: {self.doi}")
103
- if self.pmid:
104
- parts.append(f"PMID: {self.pmid}")
105
- if self.arxiv_id:
106
- parts.append(f"arXiv: {self.arxiv_id}")
107
-
108
- return "\n".join(parts)
109
-
110
- def to_dict(self) -> Dict[str, Any]:
111
- """Convert paper to dictionary representation."""
112
- return {
113
- "title": self.title,
114
- "authors": self.authors,
115
- "abstract": self.abstract,
116
- "source": self.source,
117
- "year": self.year,
118
- "doi": self.doi,
119
- "pmid": self.pmid,
120
- "arxiv_id": self.arxiv_id,
121
- "journal": self.journal,
122
- "keywords": self.keywords,
123
- "pdf_path": str(self.pdf_path) if self.pdf_path else None,
124
- "has_embedding": self.embedding is not None,
125
- "metadata": self.metadata,
126
- "retrieved_at": self.retrieved_at.isoformat(),
127
- }
128
-
129
- @classmethod
130
- def from_dict(cls, data: Dict[str, Any]) -> "Paper":
131
- """Create Paper from dictionary representation."""
132
- # Remove computed fields
133
- data = data.copy()
134
- data.pop("has_embedding", None)
135
- data.pop("retrieved_at", None)
136
-
137
- # Convert pdf_path back to Path if present
138
- if data.get("pdf_path"):
139
- data["pdf_path"] = Path(data["pdf_path"])
140
-
141
- return cls(**data)
142
-
143
- def to_bibtex(self) -> str:
144
- """Generate BibTeX entry for the paper."""
145
- # Generate citation key
146
- first_author = self.authors[0].split()[-1] if self.authors else "Unknown"
147
- year = self.year or "0000"
148
- title_word = self.title.split()[0].lower() if self.title else "paper"
149
- cite_key = f"{first_author}{year}{title_word}"
150
-
151
- # Determine entry type
152
- if self.arxiv_id:
153
- entry_type = "@misc"
154
- else:
155
- entry_type = "@article"
156
-
157
- # Build BibTeX entry
158
- lines = [f"{entry_type}{{{cite_key},"]
159
- lines.append(f' title = "{{{self.title}}}",')
160
-
161
- if self.authors:
162
- authors_str = " and ".join(self.authors)
163
- lines.append(f' author = "{{{authors_str}}}",')
164
-
165
- if self.year:
166
- lines.append(f' year = "{{{self.year}}}",')
167
-
168
- if self.journal:
169
- lines.append(f' journal = "{{{self.journal}}}",')
170
-
171
- if self.doi:
172
- lines.append(f' doi = "{{{self.doi}}}",')
173
-
174
- if self.arxiv_id:
175
- lines.append(f' eprint = "{{{self.arxiv_id}}}",')
176
- lines.append(' archivePrefix = "{arXiv}",')
177
-
178
- lines.append("}")
179
-
180
- return "\n".join(lines)
181
-
182
- def get_identifier(self) -> str:
183
- """Get a unique identifier for the paper."""
184
- if self.doi:
185
- return f"doi:{self.doi}"
186
- elif self.pmid:
187
- return f"pmid:{self.pmid}"
188
- elif self.arxiv_id:
189
- return f"arxiv:{self.arxiv_id}"
190
- else:
191
- # Create a hash from title and authors
192
- import hashlib
193
- content = f"{self.title}_{';'.join(self.authors)}"
194
- return f"hash:{hashlib.md5(content.encode()).hexdigest()[:12]}"
195
-
196
- def has_pdf(self) -> bool:
197
- """Check if paper has an associated PDF file."""
198
- return self.pdf_path is not None and self.pdf_path.exists()
199
-
200
- def similarity_score(self, other: "Paper") -> float:
201
- """Calculate similarity score with another paper (0-1)."""
202
- score = 0.0
203
- weights = {"title": 0.4, "authors": 0.3, "abstract": 0.3}
204
-
205
- # Title similarity (simple word overlap)
206
- title_words1 = set(self.title.lower().split())
207
- title_words2 = set(other.title.lower().split())
208
- if title_words1 and title_words2:
209
- title_overlap = len(title_words1 & title_words2) / len(title_words1 | title_words2)
210
- score += weights["title"] * title_overlap
211
-
212
- # Author similarity
213
- authors1 = set(self.authors)
214
- authors2 = set(other.authors)
215
- if authors1 and authors2:
216
- author_overlap = len(authors1 & authors2) / len(authors1 | authors2)
217
- score += weights["authors"] * author_overlap
218
-
219
- # Abstract similarity (simple word overlap)
220
- abstract_words1 = set(self.abstract.lower().split())
221
- abstract_words2 = set(other.abstract.lower().split())
222
- if abstract_words1 and abstract_words2:
223
- abstract_overlap = len(abstract_words1 & abstract_words2) / len(abstract_words1 | abstract_words2)
224
- score += weights["abstract"] * abstract_overlap
225
-
226
- return score
227
-
228
-
229
- # Example usage
230
- if __name__ == "__main__":
231
- paper = Paper(
232
- title="Deep Learning for Scientific Discovery",
233
- authors=["John Doe", "Jane Smith", "Bob Johnson"],
234
- abstract="This paper explores the application of deep learning...",
235
- source="arxiv",
236
- year=2024,
237
- arxiv_id="2401.12345",
238
- keywords=["deep learning", "scientific computing", "AI"],
239
- )
240
-
241
- print(paper)
242
- print("\nBibTeX:")
243
- print(paper.to_bibtex())
244
- print("\nIdentifier:", paper.get_identifier())