scitex 2.0.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (572) hide show
  1. scitex/__init__.py +73 -0
  2. scitex/__main__.py +89 -0
  3. scitex/__version__.py +14 -0
  4. scitex/_sh.py +59 -0
  5. scitex/ai/_LearningCurveLogger.py +583 -0
  6. scitex/ai/__Classifiers.py +101 -0
  7. scitex/ai/__init__.py +55 -0
  8. scitex/ai/_gen_ai/_Anthropic.py +173 -0
  9. scitex/ai/_gen_ai/_BaseGenAI.py +336 -0
  10. scitex/ai/_gen_ai/_DeepSeek.py +175 -0
  11. scitex/ai/_gen_ai/_Google.py +161 -0
  12. scitex/ai/_gen_ai/_Groq.py +97 -0
  13. scitex/ai/_gen_ai/_Llama.py +142 -0
  14. scitex/ai/_gen_ai/_OpenAI.py +230 -0
  15. scitex/ai/_gen_ai/_PARAMS.py +565 -0
  16. scitex/ai/_gen_ai/_Perplexity.py +191 -0
  17. scitex/ai/_gen_ai/__init__.py +32 -0
  18. scitex/ai/_gen_ai/_calc_cost.py +78 -0
  19. scitex/ai/_gen_ai/_format_output_func.py +183 -0
  20. scitex/ai/_gen_ai/_genai_factory.py +71 -0
  21. scitex/ai/act/__init__.py +8 -0
  22. scitex/ai/act/_define.py +11 -0
  23. scitex/ai/classification/__init__.py +7 -0
  24. scitex/ai/classification/classification_reporter.py +1137 -0
  25. scitex/ai/classification/classifier_server.py +131 -0
  26. scitex/ai/classification/classifiers.py +101 -0
  27. scitex/ai/classification_reporter.py +1161 -0
  28. scitex/ai/classifier_server.py +131 -0
  29. scitex/ai/clustering/__init__.py +11 -0
  30. scitex/ai/clustering/_pca.py +115 -0
  31. scitex/ai/clustering/_umap.py +376 -0
  32. scitex/ai/early_stopping.py +149 -0
  33. scitex/ai/feature_extraction/__init__.py +56 -0
  34. scitex/ai/feature_extraction/vit.py +148 -0
  35. scitex/ai/genai/__init__.py +277 -0
  36. scitex/ai/genai/anthropic.py +177 -0
  37. scitex/ai/genai/anthropic_provider.py +320 -0
  38. scitex/ai/genai/anthropic_refactored.py +109 -0
  39. scitex/ai/genai/auth_manager.py +200 -0
  40. scitex/ai/genai/base_genai.py +336 -0
  41. scitex/ai/genai/base_provider.py +291 -0
  42. scitex/ai/genai/calc_cost.py +78 -0
  43. scitex/ai/genai/chat_history.py +307 -0
  44. scitex/ai/genai/cost_tracker.py +276 -0
  45. scitex/ai/genai/deepseek.py +188 -0
  46. scitex/ai/genai/deepseek_provider.py +251 -0
  47. scitex/ai/genai/format_output_func.py +183 -0
  48. scitex/ai/genai/genai_factory.py +71 -0
  49. scitex/ai/genai/google.py +169 -0
  50. scitex/ai/genai/google_provider.py +228 -0
  51. scitex/ai/genai/groq.py +104 -0
  52. scitex/ai/genai/groq_provider.py +248 -0
  53. scitex/ai/genai/image_processor.py +250 -0
  54. scitex/ai/genai/llama.py +155 -0
  55. scitex/ai/genai/llama_provider.py +214 -0
  56. scitex/ai/genai/mock_provider.py +127 -0
  57. scitex/ai/genai/model_registry.py +304 -0
  58. scitex/ai/genai/openai.py +230 -0
  59. scitex/ai/genai/openai_provider.py +293 -0
  60. scitex/ai/genai/params.py +565 -0
  61. scitex/ai/genai/perplexity.py +202 -0
  62. scitex/ai/genai/perplexity_provider.py +205 -0
  63. scitex/ai/genai/provider_base.py +302 -0
  64. scitex/ai/genai/provider_factory.py +370 -0
  65. scitex/ai/genai/response_handler.py +235 -0
  66. scitex/ai/layer/_Pass.py +21 -0
  67. scitex/ai/layer/__init__.py +10 -0
  68. scitex/ai/layer/_switch.py +8 -0
  69. scitex/ai/loss/_L1L2Losses.py +34 -0
  70. scitex/ai/loss/__init__.py +12 -0
  71. scitex/ai/loss/multi_task_loss.py +47 -0
  72. scitex/ai/metrics/__init__.py +9 -0
  73. scitex/ai/metrics/_bACC.py +51 -0
  74. scitex/ai/metrics/silhoute_score_block.py +496 -0
  75. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  76. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  77. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  78. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  79. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  80. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  81. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  82. scitex/ai/optim/__init__.py +13 -0
  83. scitex/ai/optim/_get_set.py +31 -0
  84. scitex/ai/optim/_optimizers.py +71 -0
  85. scitex/ai/plt/__init__.py +21 -0
  86. scitex/ai/plt/_conf_mat.py +592 -0
  87. scitex/ai/plt/_learning_curve.py +194 -0
  88. scitex/ai/plt/_optuna_study.py +111 -0
  89. scitex/ai/plt/aucs/__init__.py +2 -0
  90. scitex/ai/plt/aucs/example.py +60 -0
  91. scitex/ai/plt/aucs/pre_rec_auc.py +223 -0
  92. scitex/ai/plt/aucs/roc_auc.py +246 -0
  93. scitex/ai/sampling/undersample.py +29 -0
  94. scitex/ai/sk/__init__.py +11 -0
  95. scitex/ai/sk/_clf.py +58 -0
  96. scitex/ai/sk/_to_sktime.py +100 -0
  97. scitex/ai/sklearn/__init__.py +26 -0
  98. scitex/ai/sklearn/clf.py +58 -0
  99. scitex/ai/sklearn/to_sktime.py +100 -0
  100. scitex/ai/training/__init__.py +7 -0
  101. scitex/ai/training/early_stopping.py +150 -0
  102. scitex/ai/training/learning_curve_logger.py +555 -0
  103. scitex/ai/utils/__init__.py +22 -0
  104. scitex/ai/utils/_check_params.py +50 -0
  105. scitex/ai/utils/_default_dataset.py +46 -0
  106. scitex/ai/utils/_format_samples_for_sktime.py +26 -0
  107. scitex/ai/utils/_label_encoder.py +134 -0
  108. scitex/ai/utils/_merge_labels.py +22 -0
  109. scitex/ai/utils/_sliding_window_data_augmentation.py +11 -0
  110. scitex/ai/utils/_under_sample.py +51 -0
  111. scitex/ai/utils/_verify_n_gpus.py +16 -0
  112. scitex/ai/utils/grid_search.py +148 -0
  113. scitex/context/__init__.py +9 -0
  114. scitex/context/_suppress_output.py +38 -0
  115. scitex/db/_BaseMixins/_BaseBackupMixin.py +30 -0
  116. scitex/db/_BaseMixins/_BaseBatchMixin.py +31 -0
  117. scitex/db/_BaseMixins/_BaseBlobMixin.py +81 -0
  118. scitex/db/_BaseMixins/_BaseConnectionMixin.py +43 -0
  119. scitex/db/_BaseMixins/_BaseImportExportMixin.py +39 -0
  120. scitex/db/_BaseMixins/_BaseIndexMixin.py +29 -0
  121. scitex/db/_BaseMixins/_BaseMaintenanceMixin.py +33 -0
  122. scitex/db/_BaseMixins/_BaseQueryMixin.py +52 -0
  123. scitex/db/_BaseMixins/_BaseRowMixin.py +32 -0
  124. scitex/db/_BaseMixins/_BaseSchemaMixin.py +44 -0
  125. scitex/db/_BaseMixins/_BaseTableMixin.py +66 -0
  126. scitex/db/_BaseMixins/_BaseTransactionMixin.py +52 -0
  127. scitex/db/_BaseMixins/__init__.py +30 -0
  128. scitex/db/_PostgreSQL.py +126 -0
  129. scitex/db/_PostgreSQLMixins/_BackupMixin.py +166 -0
  130. scitex/db/_PostgreSQLMixins/_BatchMixin.py +82 -0
  131. scitex/db/_PostgreSQLMixins/_BlobMixin.py +231 -0
  132. scitex/db/_PostgreSQLMixins/_ConnectionMixin.py +92 -0
  133. scitex/db/_PostgreSQLMixins/_ImportExportMixin.py +59 -0
  134. scitex/db/_PostgreSQLMixins/_IndexMixin.py +64 -0
  135. scitex/db/_PostgreSQLMixins/_MaintenanceMixin.py +175 -0
  136. scitex/db/_PostgreSQLMixins/_QueryMixin.py +108 -0
  137. scitex/db/_PostgreSQLMixins/_RowMixin.py +75 -0
  138. scitex/db/_PostgreSQLMixins/_SchemaMixin.py +126 -0
  139. scitex/db/_PostgreSQLMixins/_TableMixin.py +176 -0
  140. scitex/db/_PostgreSQLMixins/_TransactionMixin.py +57 -0
  141. scitex/db/_PostgreSQLMixins/__init__.py +34 -0
  142. scitex/db/_SQLite3.py +2136 -0
  143. scitex/db/_SQLite3Mixins/_BatchMixin.py +243 -0
  144. scitex/db/_SQLite3Mixins/_BlobMixin.py +229 -0
  145. scitex/db/_SQLite3Mixins/_ConnectionMixin.py +108 -0
  146. scitex/db/_SQLite3Mixins/_ImportExportMixin.py +80 -0
  147. scitex/db/_SQLite3Mixins/_IndexMixin.py +32 -0
  148. scitex/db/_SQLite3Mixins/_MaintenanceMixin.py +176 -0
  149. scitex/db/_SQLite3Mixins/_QueryMixin.py +83 -0
  150. scitex/db/_SQLite3Mixins/_RowMixin.py +75 -0
  151. scitex/db/_SQLite3Mixins/_TableMixin.py +183 -0
  152. scitex/db/_SQLite3Mixins/_TransactionMixin.py +71 -0
  153. scitex/db/_SQLite3Mixins/__init__.py +30 -0
  154. scitex/db/__init__.py +14 -0
  155. scitex/db/_delete_duplicates.py +397 -0
  156. scitex/db/_inspect.py +163 -0
  157. scitex/decorators/__init__.py +54 -0
  158. scitex/decorators/_auto_order.py +172 -0
  159. scitex/decorators/_batch_fn.py +127 -0
  160. scitex/decorators/_cache_disk.py +32 -0
  161. scitex/decorators/_cache_mem.py +12 -0
  162. scitex/decorators/_combined.py +98 -0
  163. scitex/decorators/_converters.py +282 -0
  164. scitex/decorators/_deprecated.py +26 -0
  165. scitex/decorators/_not_implemented.py +30 -0
  166. scitex/decorators/_numpy_fn.py +86 -0
  167. scitex/decorators/_pandas_fn.py +121 -0
  168. scitex/decorators/_preserve_doc.py +19 -0
  169. scitex/decorators/_signal_fn.py +95 -0
  170. scitex/decorators/_timeout.py +55 -0
  171. scitex/decorators/_torch_fn.py +136 -0
  172. scitex/decorators/_wrap.py +39 -0
  173. scitex/decorators/_xarray_fn.py +88 -0
  174. scitex/dev/__init__.py +15 -0
  175. scitex/dev/_analyze_code_flow.py +284 -0
  176. scitex/dev/_reload.py +59 -0
  177. scitex/dict/_DotDict.py +442 -0
  178. scitex/dict/__init__.py +18 -0
  179. scitex/dict/_listed_dict.py +42 -0
  180. scitex/dict/_pop_keys.py +36 -0
  181. scitex/dict/_replace.py +13 -0
  182. scitex/dict/_safe_merge.py +62 -0
  183. scitex/dict/_to_str.py +32 -0
  184. scitex/dsp/__init__.py +72 -0
  185. scitex/dsp/_crop.py +122 -0
  186. scitex/dsp/_demo_sig.py +331 -0
  187. scitex/dsp/_detect_ripples.py +212 -0
  188. scitex/dsp/_ensure_3d.py +18 -0
  189. scitex/dsp/_hilbert.py +78 -0
  190. scitex/dsp/_listen.py +702 -0
  191. scitex/dsp/_misc.py +30 -0
  192. scitex/dsp/_mne.py +32 -0
  193. scitex/dsp/_modulation_index.py +79 -0
  194. scitex/dsp/_pac.py +319 -0
  195. scitex/dsp/_psd.py +102 -0
  196. scitex/dsp/_resample.py +65 -0
  197. scitex/dsp/_time.py +36 -0
  198. scitex/dsp/_transform.py +68 -0
  199. scitex/dsp/_wavelet.py +212 -0
  200. scitex/dsp/add_noise.py +111 -0
  201. scitex/dsp/example.py +253 -0
  202. scitex/dsp/filt.py +155 -0
  203. scitex/dsp/norm.py +18 -0
  204. scitex/dsp/params.py +51 -0
  205. scitex/dsp/reference.py +43 -0
  206. scitex/dsp/template.py +25 -0
  207. scitex/dsp/utils/__init__.py +15 -0
  208. scitex/dsp/utils/_differential_bandpass_filters.py +120 -0
  209. scitex/dsp/utils/_ensure_3d.py +18 -0
  210. scitex/dsp/utils/_ensure_even_len.py +10 -0
  211. scitex/dsp/utils/_zero_pad.py +48 -0
  212. scitex/dsp/utils/filter.py +408 -0
  213. scitex/dsp/utils/pac.py +177 -0
  214. scitex/dt/__init__.py +8 -0
  215. scitex/dt/_linspace.py +130 -0
  216. scitex/etc/__init__.py +15 -0
  217. scitex/etc/wait_key.py +34 -0
  218. scitex/gen/_DimHandler.py +196 -0
  219. scitex/gen/_TimeStamper.py +244 -0
  220. scitex/gen/__init__.py +95 -0
  221. scitex/gen/_alternate_kwarg.py +13 -0
  222. scitex/gen/_cache.py +11 -0
  223. scitex/gen/_check_host.py +34 -0
  224. scitex/gen/_ci.py +12 -0
  225. scitex/gen/_close.py +222 -0
  226. scitex/gen/_embed.py +78 -0
  227. scitex/gen/_inspect_module.py +257 -0
  228. scitex/gen/_is_ipython.py +12 -0
  229. scitex/gen/_less.py +48 -0
  230. scitex/gen/_list_packages.py +139 -0
  231. scitex/gen/_mat2py.py +88 -0
  232. scitex/gen/_norm.py +170 -0
  233. scitex/gen/_paste.py +18 -0
  234. scitex/gen/_print_config.py +84 -0
  235. scitex/gen/_shell.py +48 -0
  236. scitex/gen/_src.py +111 -0
  237. scitex/gen/_start.py +451 -0
  238. scitex/gen/_symlink.py +55 -0
  239. scitex/gen/_symlog.py +27 -0
  240. scitex/gen/_tee.py +238 -0
  241. scitex/gen/_title2path.py +60 -0
  242. scitex/gen/_title_case.py +88 -0
  243. scitex/gen/_to_even.py +84 -0
  244. scitex/gen/_to_odd.py +34 -0
  245. scitex/gen/_to_rank.py +39 -0
  246. scitex/gen/_transpose.py +37 -0
  247. scitex/gen/_type.py +78 -0
  248. scitex/gen/_var_info.py +73 -0
  249. scitex/gen/_wrap.py +17 -0
  250. scitex/gen/_xml2dict.py +76 -0
  251. scitex/gen/misc.py +730 -0
  252. scitex/gen/path.py +0 -0
  253. scitex/general/__init__.py +5 -0
  254. scitex/gists/_SigMacro_processFigure_S.py +128 -0
  255. scitex/gists/_SigMacro_toBlue.py +172 -0
  256. scitex/gists/__init__.py +12 -0
  257. scitex/io/_H5Explorer.py +292 -0
  258. scitex/io/__init__.py +82 -0
  259. scitex/io/_cache.py +101 -0
  260. scitex/io/_flush.py +24 -0
  261. scitex/io/_glob.py +103 -0
  262. scitex/io/_json2md.py +113 -0
  263. scitex/io/_load.py +168 -0
  264. scitex/io/_load_configs.py +146 -0
  265. scitex/io/_load_modules/__init__.py +38 -0
  266. scitex/io/_load_modules/_catboost.py +66 -0
  267. scitex/io/_load_modules/_con.py +20 -0
  268. scitex/io/_load_modules/_db.py +24 -0
  269. scitex/io/_load_modules/_docx.py +42 -0
  270. scitex/io/_load_modules/_eeg.py +110 -0
  271. scitex/io/_load_modules/_hdf5.py +196 -0
  272. scitex/io/_load_modules/_image.py +19 -0
  273. scitex/io/_load_modules/_joblib.py +19 -0
  274. scitex/io/_load_modules/_json.py +18 -0
  275. scitex/io/_load_modules/_markdown.py +103 -0
  276. scitex/io/_load_modules/_matlab.py +37 -0
  277. scitex/io/_load_modules/_numpy.py +39 -0
  278. scitex/io/_load_modules/_optuna.py +155 -0
  279. scitex/io/_load_modules/_pandas.py +69 -0
  280. scitex/io/_load_modules/_pdf.py +31 -0
  281. scitex/io/_load_modules/_pickle.py +24 -0
  282. scitex/io/_load_modules/_torch.py +16 -0
  283. scitex/io/_load_modules/_txt.py +126 -0
  284. scitex/io/_load_modules/_xml.py +49 -0
  285. scitex/io/_load_modules/_yaml.py +23 -0
  286. scitex/io/_mv_to_tmp.py +19 -0
  287. scitex/io/_path.py +286 -0
  288. scitex/io/_reload.py +78 -0
  289. scitex/io/_save.py +539 -0
  290. scitex/io/_save_modules/__init__.py +66 -0
  291. scitex/io/_save_modules/_catboost.py +22 -0
  292. scitex/io/_save_modules/_csv.py +89 -0
  293. scitex/io/_save_modules/_excel.py +49 -0
  294. scitex/io/_save_modules/_hdf5.py +249 -0
  295. scitex/io/_save_modules/_html.py +48 -0
  296. scitex/io/_save_modules/_image.py +140 -0
  297. scitex/io/_save_modules/_joblib.py +25 -0
  298. scitex/io/_save_modules/_json.py +25 -0
  299. scitex/io/_save_modules/_listed_dfs_as_csv.py +57 -0
  300. scitex/io/_save_modules/_listed_scalars_as_csv.py +42 -0
  301. scitex/io/_save_modules/_matlab.py +24 -0
  302. scitex/io/_save_modules/_mp4.py +29 -0
  303. scitex/io/_save_modules/_numpy.py +57 -0
  304. scitex/io/_save_modules/_optuna_study_as_csv_and_pngs.py +38 -0
  305. scitex/io/_save_modules/_pickle.py +45 -0
  306. scitex/io/_save_modules/_plotly.py +27 -0
  307. scitex/io/_save_modules/_text.py +23 -0
  308. scitex/io/_save_modules/_torch.py +26 -0
  309. scitex/io/_save_modules/_yaml.py +29 -0
  310. scitex/life/__init__.py +10 -0
  311. scitex/life/_monitor_rain.py +49 -0
  312. scitex/linalg/__init__.py +17 -0
  313. scitex/linalg/_distance.py +63 -0
  314. scitex/linalg/_geometric_median.py +64 -0
  315. scitex/linalg/_misc.py +73 -0
  316. scitex/nn/_AxiswiseDropout.py +27 -0
  317. scitex/nn/_BNet.py +126 -0
  318. scitex/nn/_BNet_Res.py +164 -0
  319. scitex/nn/_ChannelGainChanger.py +44 -0
  320. scitex/nn/_DropoutChannels.py +50 -0
  321. scitex/nn/_Filters.py +489 -0
  322. scitex/nn/_FreqGainChanger.py +110 -0
  323. scitex/nn/_GaussianFilter.py +48 -0
  324. scitex/nn/_Hilbert.py +111 -0
  325. scitex/nn/_MNet_1000.py +157 -0
  326. scitex/nn/_ModulationIndex.py +221 -0
  327. scitex/nn/_PAC.py +414 -0
  328. scitex/nn/_PSD.py +40 -0
  329. scitex/nn/_ResNet1D.py +120 -0
  330. scitex/nn/_SpatialAttention.py +25 -0
  331. scitex/nn/_Spectrogram.py +161 -0
  332. scitex/nn/_SwapChannels.py +50 -0
  333. scitex/nn/_TransposeLayer.py +19 -0
  334. scitex/nn/_Wavelet.py +183 -0
  335. scitex/nn/__init__.py +63 -0
  336. scitex/os/__init__.py +8 -0
  337. scitex/os/_mv.py +50 -0
  338. scitex/parallel/__init__.py +8 -0
  339. scitex/parallel/_run.py +151 -0
  340. scitex/path/__init__.py +33 -0
  341. scitex/path/_clean.py +52 -0
  342. scitex/path/_find.py +108 -0
  343. scitex/path/_get_module_path.py +51 -0
  344. scitex/path/_get_spath.py +35 -0
  345. scitex/path/_getsize.py +18 -0
  346. scitex/path/_increment_version.py +87 -0
  347. scitex/path/_mk_spath.py +51 -0
  348. scitex/path/_path.py +19 -0
  349. scitex/path/_split.py +23 -0
  350. scitex/path/_this_path.py +19 -0
  351. scitex/path/_version.py +101 -0
  352. scitex/pd/__init__.py +41 -0
  353. scitex/pd/_find_indi.py +126 -0
  354. scitex/pd/_find_pval.py +113 -0
  355. scitex/pd/_force_df.py +154 -0
  356. scitex/pd/_from_xyz.py +71 -0
  357. scitex/pd/_ignore_SettingWithCopyWarning.py +34 -0
  358. scitex/pd/_melt_cols.py +81 -0
  359. scitex/pd/_merge_columns.py +221 -0
  360. scitex/pd/_mv.py +63 -0
  361. scitex/pd/_replace.py +62 -0
  362. scitex/pd/_round.py +93 -0
  363. scitex/pd/_slice.py +63 -0
  364. scitex/pd/_sort.py +91 -0
  365. scitex/pd/_to_numeric.py +53 -0
  366. scitex/pd/_to_xy.py +59 -0
  367. scitex/pd/_to_xyz.py +110 -0
  368. scitex/plt/__init__.py +36 -0
  369. scitex/plt/_subplots/_AxesWrapper.py +182 -0
  370. scitex/plt/_subplots/_AxisWrapper.py +249 -0
  371. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +414 -0
  372. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +896 -0
  373. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +368 -0
  374. scitex/plt/_subplots/_AxisWrapperMixins/_TrackingMixin.py +185 -0
  375. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +16 -0
  376. scitex/plt/_subplots/_FigWrapper.py +226 -0
  377. scitex/plt/_subplots/_SubplotsWrapper.py +171 -0
  378. scitex/plt/_subplots/__init__.py +111 -0
  379. scitex/plt/_subplots/_export_as_csv.py +232 -0
  380. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +61 -0
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +90 -0
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +49 -0
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +46 -0
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +39 -0
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +125 -0
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +72 -0
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +34 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +36 -0
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +79 -0
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +59 -0
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +32 -0
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +79 -0
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +75 -0
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +64 -0
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +44 -0
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +70 -0
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +66 -0
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +95 -0
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +67 -0
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +52 -0
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +46 -0
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +46 -0
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +46 -0
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +46 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +44 -0
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +103 -0
  407. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +82 -0
  408. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +58 -0
  409. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +117 -0
  410. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +30 -0
  411. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +51 -0
  412. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +93 -0
  413. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +94 -0
  414. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +92 -0
  415. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +65 -0
  416. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +59 -0
  417. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +58 -0
  418. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +45 -0
  419. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +70 -0
  420. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +75 -0
  421. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +75 -0
  422. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +155 -0
  423. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +64 -0
  424. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +77 -0
  425. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +210 -0
  426. scitex/plt/_subplots/_export_as_csv_formatters/verify_formatters.py +342 -0
  427. scitex/plt/_subplots/_export_as_csv_formatters.py +115 -0
  428. scitex/plt/_tpl.py +28 -0
  429. scitex/plt/ax/__init__.py +114 -0
  430. scitex/plt/ax/_plot/__init__.py +53 -0
  431. scitex/plt/ax/_plot/_plot_circular_hist.py +124 -0
  432. scitex/plt/ax/_plot/_plot_conf_mat.py +136 -0
  433. scitex/plt/ax/_plot/_plot_cube.py +57 -0
  434. scitex/plt/ax/_plot/_plot_ecdf.py +84 -0
  435. scitex/plt/ax/_plot/_plot_fillv.py +55 -0
  436. scitex/plt/ax/_plot/_plot_heatmap.py +266 -0
  437. scitex/plt/ax/_plot/_plot_image.py +94 -0
  438. scitex/plt/ax/_plot/_plot_joyplot.py +76 -0
  439. scitex/plt/ax/_plot/_plot_raster.py +172 -0
  440. scitex/plt/ax/_plot/_plot_rectangle.py +69 -0
  441. scitex/plt/ax/_plot/_plot_scatter_hist.py +133 -0
  442. scitex/plt/ax/_plot/_plot_shaded_line.py +142 -0
  443. scitex/plt/ax/_plot/_plot_statistical_shaded_line.py +221 -0
  444. scitex/plt/ax/_plot/_plot_violin.py +343 -0
  445. scitex/plt/ax/_style/__init__.py +38 -0
  446. scitex/plt/ax/_style/_add_marginal_ax.py +44 -0
  447. scitex/plt/ax/_style/_add_panel.py +92 -0
  448. scitex/plt/ax/_style/_extend.py +64 -0
  449. scitex/plt/ax/_style/_force_aspect.py +37 -0
  450. scitex/plt/ax/_style/_format_label.py +23 -0
  451. scitex/plt/ax/_style/_hide_spines.py +84 -0
  452. scitex/plt/ax/_style/_map_ticks.py +182 -0
  453. scitex/plt/ax/_style/_rotate_labels.py +215 -0
  454. scitex/plt/ax/_style/_sci_note.py +279 -0
  455. scitex/plt/ax/_style/_set_log_scale.py +299 -0
  456. scitex/plt/ax/_style/_set_meta.py +261 -0
  457. scitex/plt/ax/_style/_set_n_ticks.py +37 -0
  458. scitex/plt/ax/_style/_set_size.py +16 -0
  459. scitex/plt/ax/_style/_set_supxyt.py +116 -0
  460. scitex/plt/ax/_style/_set_ticks.py +276 -0
  461. scitex/plt/ax/_style/_set_xyt.py +121 -0
  462. scitex/plt/ax/_style/_share_axes.py +264 -0
  463. scitex/plt/ax/_style/_shift.py +139 -0
  464. scitex/plt/ax/_style/_show_spines.py +333 -0
  465. scitex/plt/color/_PARAMS.py +70 -0
  466. scitex/plt/color/__init__.py +52 -0
  467. scitex/plt/color/_add_hue_col.py +41 -0
  468. scitex/plt/color/_colors.py +205 -0
  469. scitex/plt/color/_get_colors_from_cmap.py +134 -0
  470. scitex/plt/color/_interpolate.py +29 -0
  471. scitex/plt/color/_vizualize_colors.py +54 -0
  472. scitex/plt/utils/__init__.py +44 -0
  473. scitex/plt/utils/_calc_bacc_from_conf_mat.py +46 -0
  474. scitex/plt/utils/_calc_nice_ticks.py +101 -0
  475. scitex/plt/utils/_close.py +68 -0
  476. scitex/plt/utils/_colorbar.py +96 -0
  477. scitex/plt/utils/_configure_mpl.py +295 -0
  478. scitex/plt/utils/_histogram_utils.py +132 -0
  479. scitex/plt/utils/_im2grid.py +70 -0
  480. scitex/plt/utils/_is_valid_axis.py +78 -0
  481. scitex/plt/utils/_mk_colorbar.py +65 -0
  482. scitex/plt/utils/_mk_patches.py +26 -0
  483. scitex/plt/utils/_scientific_captions.py +638 -0
  484. scitex/plt/utils/_scitex_config.py +223 -0
  485. scitex/reproduce/__init__.py +14 -0
  486. scitex/reproduce/_fix_seeds.py +45 -0
  487. scitex/reproduce/_gen_ID.py +55 -0
  488. scitex/reproduce/_gen_timestamp.py +35 -0
  489. scitex/res/__init__.py +5 -0
  490. scitex/resource/__init__.py +13 -0
  491. scitex/resource/_get_processor_usages.py +281 -0
  492. scitex/resource/_get_specs.py +280 -0
  493. scitex/resource/_log_processor_usages.py +190 -0
  494. scitex/resource/_utils/__init__.py +31 -0
  495. scitex/resource/_utils/_get_env_info.py +481 -0
  496. scitex/resource/limit_ram.py +33 -0
  497. scitex/scholar/__init__.py +24 -0
  498. scitex/scholar/_local_search.py +454 -0
  499. scitex/scholar/_paper.py +244 -0
  500. scitex/scholar/_pdf_downloader.py +325 -0
  501. scitex/scholar/_search.py +393 -0
  502. scitex/scholar/_vector_search.py +370 -0
  503. scitex/scholar/_web_sources.py +457 -0
  504. scitex/stats/__init__.py +31 -0
  505. scitex/stats/_calc_partial_corr.py +17 -0
  506. scitex/stats/_corr_test_multi.py +94 -0
  507. scitex/stats/_corr_test_wrapper.py +115 -0
  508. scitex/stats/_describe_wrapper.py +90 -0
  509. scitex/stats/_multiple_corrections.py +63 -0
  510. scitex/stats/_nan_stats.py +93 -0
  511. scitex/stats/_p2stars.py +116 -0
  512. scitex/stats/_p2stars_wrapper.py +56 -0
  513. scitex/stats/_statistical_tests.py +73 -0
  514. scitex/stats/desc/__init__.py +40 -0
  515. scitex/stats/desc/_describe.py +189 -0
  516. scitex/stats/desc/_nan.py +289 -0
  517. scitex/stats/desc/_real.py +94 -0
  518. scitex/stats/multiple/__init__.py +14 -0
  519. scitex/stats/multiple/_bonferroni_correction.py +72 -0
  520. scitex/stats/multiple/_fdr_correction.py +400 -0
  521. scitex/stats/multiple/_multicompair.py +28 -0
  522. scitex/stats/tests/__corr_test.py +277 -0
  523. scitex/stats/tests/__corr_test_multi.py +343 -0
  524. scitex/stats/tests/__corr_test_single.py +277 -0
  525. scitex/stats/tests/__init__.py +22 -0
  526. scitex/stats/tests/_brunner_munzel_test.py +192 -0
  527. scitex/stats/tests/_nocorrelation_test.py +28 -0
  528. scitex/stats/tests/_smirnov_grubbs.py +98 -0
  529. scitex/str/__init__.py +113 -0
  530. scitex/str/_clean_path.py +75 -0
  531. scitex/str/_color_text.py +52 -0
  532. scitex/str/_decapitalize.py +58 -0
  533. scitex/str/_factor_out_digits.py +281 -0
  534. scitex/str/_format_plot_text.py +498 -0
  535. scitex/str/_grep.py +48 -0
  536. scitex/str/_latex.py +155 -0
  537. scitex/str/_latex_fallback.py +471 -0
  538. scitex/str/_mask_api.py +39 -0
  539. scitex/str/_mask_api_key.py +8 -0
  540. scitex/str/_parse.py +158 -0
  541. scitex/str/_print_block.py +47 -0
  542. scitex/str/_print_debug.py +68 -0
  543. scitex/str/_printc.py +62 -0
  544. scitex/str/_readable_bytes.py +38 -0
  545. scitex/str/_remove_ansi.py +23 -0
  546. scitex/str/_replace.py +134 -0
  547. scitex/str/_search.py +125 -0
  548. scitex/str/_squeeze_space.py +36 -0
  549. scitex/tex/__init__.py +10 -0
  550. scitex/tex/_preview.py +103 -0
  551. scitex/tex/_to_vec.py +116 -0
  552. scitex/torch/__init__.py +18 -0
  553. scitex/torch/_apply_to.py +34 -0
  554. scitex/torch/_nan_funcs.py +77 -0
  555. scitex/types/_ArrayLike.py +44 -0
  556. scitex/types/_ColorLike.py +21 -0
  557. scitex/types/__init__.py +14 -0
  558. scitex/types/_is_listed_X.py +70 -0
  559. scitex/utils/__init__.py +22 -0
  560. scitex/utils/_compress_hdf5.py +116 -0
  561. scitex/utils/_email.py +120 -0
  562. scitex/utils/_grid.py +148 -0
  563. scitex/utils/_notify.py +247 -0
  564. scitex/utils/_search.py +121 -0
  565. scitex/web/__init__.py +38 -0
  566. scitex/web/_search_pubmed.py +438 -0
  567. scitex/web/_summarize_url.py +158 -0
  568. scitex-2.0.0.dist-info/METADATA +307 -0
  569. scitex-2.0.0.dist-info/RECORD +572 -0
  570. scitex-2.0.0.dist-info/WHEEL +6 -0
  571. scitex-2.0.0.dist-info/licenses/LICENSE +7 -0
  572. scitex-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,325 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Time-stamp: "2024-12-06 10:20:00"
4
+ # Author: Claude
5
+ # Filename: _pdf_downloader.py
6
+
7
+ """
8
+ PDF downloader for scientific papers.
9
+ """
10
+
11
+ import asyncio
12
+ import aiohttp
13
+ from pathlib import Path
14
+ from typing import Optional, Dict, Any, List
15
+ import logging
16
+ import re
17
+ from urllib.parse import urlparse, quote
18
+
19
+ from ._paper import Paper
20
+
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class PDFDownloader:
26
+ """Download PDFs for scientific papers."""
27
+
28
+ def __init__(
29
+ self,
30
+ download_dir: Optional[Path] = None,
31
+ timeout: int = 30,
32
+ max_concurrent: int = 3,
33
+ ):
34
+ """Initialize PDF downloader.
35
+
36
+ Parameters
37
+ ----------
38
+ download_dir : Path, optional
39
+ Directory to save PDFs (default: current directory)
40
+ timeout : int
41
+ Download timeout in seconds
42
+ max_concurrent : int
43
+ Maximum concurrent downloads
44
+ """
45
+ self.download_dir = Path(download_dir) if download_dir else Path.cwd()
46
+ self.download_dir.mkdir(parents=True, exist_ok=True)
47
+ self.timeout = timeout
48
+ self.max_concurrent = max_concurrent
49
+
50
+ # Headers for requests
51
+ self.headers = {
52
+ "User-Agent": "Mozilla/5.0 (compatible; SciTeX Scholar/1.0; +https://github.com/ywatanabe/scitex)"
53
+ }
54
+
55
+ async def download_paper(
56
+ self,
57
+ paper: Paper,
58
+ session: Optional[aiohttp.ClientSession] = None,
59
+ force: bool = False,
60
+ ) -> Optional[Path]:
61
+ """Download PDF for a single paper.
62
+
63
+ Parameters
64
+ ----------
65
+ paper : Paper
66
+ Paper to download
67
+ session : aiohttp.ClientSession, optional
68
+ Session for connection pooling
69
+ force : bool
70
+ Force re-download even if file exists
71
+
72
+ Returns
73
+ -------
74
+ Path or None
75
+ Path to downloaded PDF, or None if failed
76
+ """
77
+ # Check if already has PDF
78
+ if paper.pdf_path and paper.pdf_path.exists() and not force:
79
+ logger.info(f"PDF already exists: {paper.pdf_path}")
80
+ return paper.pdf_path
81
+
82
+ # Generate filename
83
+ filename = self._generate_filename(paper)
84
+ pdf_path = self.download_dir / filename
85
+
86
+ # Check if already downloaded
87
+ if pdf_path.exists() and not force:
88
+ paper.pdf_path = pdf_path
89
+ logger.info(f"PDF already downloaded: {pdf_path}")
90
+ return pdf_path
91
+
92
+ # Get PDF URL
93
+ pdf_url = self._get_pdf_url(paper)
94
+ if not pdf_url:
95
+ logger.warning(f"No PDF URL available for: {paper.title}")
96
+ return None
97
+
98
+ # Download
99
+ close_session = False
100
+ if session is None:
101
+ session = aiohttp.ClientSession()
102
+ close_session = True
103
+
104
+ try:
105
+ logger.info(f"Downloading PDF from: {pdf_url}")
106
+
107
+ async with session.get(
108
+ pdf_url,
109
+ headers=self.headers,
110
+ timeout=aiohttp.ClientTimeout(total=self.timeout),
111
+ ) as response:
112
+ if response.status == 200:
113
+ content = await response.read()
114
+
115
+ # Verify it's a PDF
116
+ if not content.startswith(b"%PDF"):
117
+ logger.error(f"Downloaded content is not a PDF for: {paper.title}")
118
+ return None
119
+
120
+ # Save PDF
121
+ with open(pdf_path, "wb") as f:
122
+ f.write(content)
123
+
124
+ paper.pdf_path = pdf_path
125
+ logger.info(f"Downloaded PDF to: {pdf_path}")
126
+ return pdf_path
127
+ else:
128
+ logger.error(f"Failed to download PDF (status {response.status}): {paper.title}")
129
+ return None
130
+
131
+ except asyncio.TimeoutError:
132
+ logger.error(f"Timeout downloading PDF: {paper.title}")
133
+ return None
134
+ except Exception as e:
135
+ logger.error(f"Error downloading PDF: {e}")
136
+ return None
137
+ finally:
138
+ if close_session:
139
+ await session.close()
140
+
141
+ async def download_papers(
142
+ self,
143
+ papers: List[Paper],
144
+ force: bool = False,
145
+ progress_callback: Optional[callable] = None,
146
+ ) -> Dict[str, Path]:
147
+ """Download PDFs for multiple papers.
148
+
149
+ Parameters
150
+ ----------
151
+ papers : List[Paper]
152
+ Papers to download
153
+ force : bool
154
+ Force re-download even if files exist
155
+ progress_callback : callable, optional
156
+ Callback function(completed, total)
157
+
158
+ Returns
159
+ -------
160
+ Dict[str, Path]
161
+ Mapping of paper identifiers to PDF paths
162
+ """
163
+ results = {}
164
+
165
+ # Create semaphore for concurrent downloads
166
+ semaphore = asyncio.Semaphore(self.max_concurrent)
167
+
168
+ async def download_with_semaphore(paper):
169
+ async with semaphore:
170
+ path = await self.download_paper(paper, session, force)
171
+ if path:
172
+ results[paper.get_identifier()] = path
173
+
174
+ if progress_callback:
175
+ progress_callback(len(results), len(papers))
176
+
177
+ return path
178
+
179
+ # Download all papers
180
+ async with aiohttp.ClientSession() as session:
181
+ tasks = [download_with_semaphore(paper) for paper in papers]
182
+ await asyncio.gather(*tasks, return_exceptions=True)
183
+
184
+ return results
185
+
186
+ def _generate_filename(self, paper: Paper) -> str:
187
+ """Generate filename for PDF."""
188
+ # Clean title for filename
189
+ title = re.sub(r"[^\w\s-]", "", paper.title)
190
+ title = re.sub(r"[-\s]+", "-", title)
191
+ title = title[:100] # Limit length
192
+
193
+ # Add year if available
194
+ if paper.year:
195
+ filename = f"{paper.year}_{title}.pdf"
196
+ else:
197
+ filename = f"{title}.pdf"
198
+
199
+ return filename
200
+
201
+ def _get_pdf_url(self, paper: Paper) -> Optional[str]:
202
+ """Get PDF URL for a paper."""
203
+ # Check metadata for PDF URL
204
+ if paper.metadata and "pdf_url" in paper.metadata:
205
+ return paper.metadata["pdf_url"]
206
+
207
+ # Source-specific URL generation
208
+ if paper.source == "arxiv" and paper.arxiv_id:
209
+ # arXiv PDF URL
210
+ return f"https://arxiv.org/pdf/{paper.arxiv_id}.pdf"
211
+
212
+ elif paper.source == "pubmed" and paper.pmid:
213
+ # PubMed Central PDF (if available)
214
+ # Note: This requires checking PMC availability
215
+ return self._get_pmc_pdf_url(paper.pmid)
216
+
217
+ elif paper.doi:
218
+ # Try Sci-Hub (for educational purposes only)
219
+ # Note: Use responsibly and check local regulations
220
+ return f"https://sci-hub.se/{paper.doi}"
221
+
222
+ return None
223
+
224
+ def _get_pmc_pdf_url(self, pmid: str) -> Optional[str]:
225
+ """Get PMC PDF URL from PMID (if available)."""
226
+ # This would require an API call to check PMC availability
227
+ # For now, return None
228
+ # In a full implementation, you would:
229
+ # 1. Query PMC to check if full text is available
230
+ # 2. Get the PMC ID
231
+ # 3. Construct the PDF URL
232
+ return None
233
+
234
+ async def download_from_url(
235
+ self,
236
+ url: str,
237
+ filename: Optional[str] = None,
238
+ session: Optional[aiohttp.ClientSession] = None,
239
+ ) -> Optional[Path]:
240
+ """Download PDF from a direct URL.
241
+
242
+ Parameters
243
+ ----------
244
+ url : str
245
+ PDF URL
246
+ filename : str, optional
247
+ Filename to save as
248
+ session : aiohttp.ClientSession, optional
249
+ Session for connection pooling
250
+
251
+ Returns
252
+ -------
253
+ Path or None
254
+ Path to downloaded PDF
255
+ """
256
+ if not filename:
257
+ # Extract filename from URL
258
+ parsed = urlparse(url)
259
+ filename = Path(parsed.path).name
260
+ if not filename.endswith(".pdf"):
261
+ filename = "downloaded_paper.pdf"
262
+
263
+ pdf_path = self.download_dir / filename
264
+
265
+ close_session = False
266
+ if session is None:
267
+ session = aiohttp.ClientSession()
268
+ close_session = True
269
+
270
+ try:
271
+ async with session.get(
272
+ url,
273
+ headers=self.headers,
274
+ timeout=aiohttp.ClientTimeout(total=self.timeout),
275
+ ) as response:
276
+ if response.status == 200:
277
+ content = await response.read()
278
+
279
+ # Save PDF
280
+ with open(pdf_path, "wb") as f:
281
+ f.write(content)
282
+
283
+ logger.info(f"Downloaded PDF to: {pdf_path}")
284
+ return pdf_path
285
+ else:
286
+ logger.error(f"Failed to download from {url} (status {response.status})")
287
+ return None
288
+
289
+ except Exception as e:
290
+ logger.error(f"Error downloading from {url}: {e}")
291
+ return None
292
+ finally:
293
+ if close_session:
294
+ await session.close()
295
+
296
+
297
+ # Example usage
298
+ if __name__ == "__main__":
299
+ async def main():
300
+ # Create downloader
301
+ downloader = PDFDownloader(download_dir=Path("./papers"))
302
+
303
+ # Example paper
304
+ paper = Paper(
305
+ title="Attention Is All You Need",
306
+ authors=["Ashish Vaswani", "Noam Shazeer", "et al."],
307
+ abstract="The dominant sequence transduction models...",
308
+ source="arxiv",
309
+ year=2017,
310
+ arxiv_id="1706.03762",
311
+ )
312
+
313
+ # Download single paper
314
+ pdf_path = await downloader.download_paper(paper)
315
+ if pdf_path:
316
+ print(f"Downloaded to: {pdf_path}")
317
+
318
+ # Download from URL
319
+ url = "https://arxiv.org/pdf/1706.03762.pdf"
320
+ path = await downloader.download_from_url(url, "attention_paper.pdf")
321
+ if path:
322
+ print(f"Downloaded from URL to: {path}")
323
+
324
+ # Run example
325
+ asyncio.run(main())
@@ -0,0 +1,393 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Time-stamp: "2024-12-06 10:25:00"
4
+ # Author: Claude
5
+ # Filename: _search.py
6
+
7
+ """
8
+ Unified search interface for SciTeX Scholar.
9
+ """
10
+
11
+ import os
12
+ import asyncio
13
+ from pathlib import Path
14
+ from typing import List, Optional, Union, Dict, Any, Tuple
15
+ import logging
16
+
17
+ from ._paper import Paper
18
+ from ._vector_search import VectorSearchEngine
19
+ from ._web_sources import search_all_sources
20
+ from ._local_search import LocalSearchEngine
21
+ from ._pdf_downloader import PDFDownloader
22
+
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def get_scholar_dir() -> Path:
28
+ """Get the SciTeX Scholar directory from environment or default."""
29
+ scholar_dir = os.environ.get('SciTeX_SCHOLAR_DIR', '~/.scitex/scholar')
30
+ path = Path(scholar_dir).expanduser()
31
+ path.mkdir(parents=True, exist_ok=True)
32
+ return path
33
+
34
+
35
+ async def search(
36
+ query: str,
37
+ web: bool = True,
38
+ local: Optional[List[Union[str, Path]]] = None,
39
+ max_results: int = 20,
40
+ download_pdfs: bool = False,
41
+ use_vector_search: bool = True,
42
+ web_sources: Optional[List[str]] = None,
43
+ ) -> List[Paper]:
44
+ """Search for scientific papers from web and local sources.
45
+
46
+ Parameters
47
+ ----------
48
+ query : str
49
+ Search query
50
+ web : bool
51
+ Whether to search web sources (PubMed, arXiv, etc.)
52
+ local : List[str or Path], optional
53
+ Local directories to search. If None or empty list, no local search.
54
+ If provided, searches these specific paths.
55
+ max_results : int
56
+ Maximum number of results to return
57
+ download_pdfs : bool
58
+ Whether to download PDFs for web results
59
+ use_vector_search : bool
60
+ Whether to use vector similarity search
61
+ web_sources : List[str], optional
62
+ Web sources to search (default: all available)
63
+
64
+ Returns
65
+ -------
66
+ List[Paper]
67
+ List of papers matching the query
68
+
69
+ Examples
70
+ --------
71
+ >>> import asyncio
72
+ >>> import scitex.scholar
73
+ >>>
74
+ >>> # Search web only (no local)
75
+ >>> papers = asyncio.run(scitex.scholar.search("deep learning"))
76
+ >>>
77
+ >>> # Search specific local directories
78
+ >>> papers = asyncio.run(scitex.scholar.search(
79
+ ... "neural networks",
80
+ ... web=False,
81
+ ... local=["./papers", "~/Documents/papers"]
82
+ ... ))
83
+ >>>
84
+ >>> # Search both web and local
85
+ >>> papers = asyncio.run(scitex.scholar.search(
86
+ ... "transformer architecture",
87
+ ... local=["./my_papers"],
88
+ ... download_pdfs=True
89
+ ... ))
90
+ """
91
+ all_papers = []
92
+ scholar_dir = get_scholar_dir()
93
+
94
+ # Search web sources
95
+ if web:
96
+ web_papers = await _search_web_sources(
97
+ query,
98
+ max_results_per_source=max(5, max_results // 3),
99
+ sources=web_sources
100
+ )
101
+ all_papers.extend(web_papers)
102
+ logger.info(f"Found {len(web_papers)} papers from web sources")
103
+
104
+ # Search local sources if paths provided
105
+ if local:
106
+ local_paths = [Path(p).expanduser() for p in local]
107
+ local_papers = await _search_local_sources(
108
+ query,
109
+ local_paths,
110
+ max_results=max_results
111
+ )
112
+ all_papers.extend(local_papers)
113
+ logger.info(f"Found {len(local_papers)} papers from local sources")
114
+
115
+ # Remove duplicates based on title similarity
116
+ papers = _deduplicate_papers(all_papers)
117
+
118
+ # Apply vector search if enabled
119
+ if use_vector_search and papers:
120
+ papers = await _apply_vector_search(query, papers, max_results, scholar_dir)
121
+ else:
122
+ # Simple relevance sorting
123
+ papers = papers[:max_results]
124
+
125
+ # Download PDFs if requested
126
+ if download_pdfs and web:
127
+ await _download_pdfs(papers, scholar_dir / "pdfs")
128
+
129
+ return papers
130
+
131
+
132
+ async def _search_web_sources(
133
+ query: str,
134
+ max_results_per_source: int,
135
+ sources: Optional[List[str]] = None
136
+ ) -> List[Paper]:
137
+ """Search web sources for papers."""
138
+ try:
139
+ results = await search_all_sources(
140
+ query,
141
+ max_results_per_source=max_results_per_source,
142
+ sources=sources
143
+ )
144
+
145
+ papers = []
146
+ for source, source_papers in results.items():
147
+ papers.extend(source_papers)
148
+
149
+ return papers
150
+ except Exception as e:
151
+ logger.error(f"Error in web search: {e}")
152
+ return []
153
+
154
+
155
+ async def _search_local_sources(
156
+ query: str,
157
+ paths: List[Path],
158
+ max_results: int
159
+ ) -> List[Paper]:
160
+ """Search local sources for papers."""
161
+ try:
162
+ scholar_dir = get_scholar_dir()
163
+ local_engine = LocalSearchEngine(
164
+ index_path=scholar_dir / "local_index.json",
165
+ cache_metadata=True
166
+ )
167
+
168
+ results = local_engine.search(
169
+ query,
170
+ paths,
171
+ recursive=True,
172
+ max_results=max_results
173
+ )
174
+
175
+ papers = [paper for paper, score in results]
176
+ return papers
177
+ except Exception as e:
178
+ logger.error(f"Error in local search: {e}")
179
+ return []
180
+
181
+
182
+ async def _apply_vector_search(
183
+ query: str,
184
+ papers: List[Paper],
185
+ max_results: int,
186
+ scholar_dir: Path
187
+ ) -> List[Paper]:
188
+ """Apply vector similarity search to rank papers."""
189
+ try:
190
+ vector_engine = VectorSearchEngine(
191
+ index_path=scholar_dir / "vector_index.pkl",
192
+ embedding_dim=384, # Using smaller model by default
193
+ similarity_metric="cosine"
194
+ )
195
+
196
+ # Add papers to engine if not already indexed
197
+ for paper in papers:
198
+ vector_engine.add_paper(paper, update_embedding=True)
199
+
200
+ # Search and re-rank
201
+ results = vector_engine.search(query, top_k=max_results)
202
+
203
+ # Save updated index
204
+ vector_engine.save_index()
205
+
206
+ return [paper for paper, score in results]
207
+ except Exception as e:
208
+ logger.error(f"Error in vector search: {e}")
209
+ # Fallback to original order
210
+ return papers[:max_results]
211
+
212
+
213
+ async def _download_pdfs(papers: List[Paper], download_dir: Path) -> None:
214
+ """Download PDFs for papers that don't have local copies."""
215
+ try:
216
+ downloader = PDFDownloader(download_dir=download_dir)
217
+
218
+ # Filter papers that need PDFs
219
+ papers_to_download = [
220
+ p for p in papers
221
+ if not p.has_pdf() and p.source in ["arxiv", "pubmed"]
222
+ ]
223
+
224
+ if papers_to_download:
225
+ logger.info(f"Downloading PDFs for {len(papers_to_download)} papers...")
226
+
227
+ def progress_callback(completed, total):
228
+ if completed % 5 == 0 or completed == total:
229
+ logger.info(f"Downloaded {completed}/{total} PDFs")
230
+
231
+ await downloader.download_papers(
232
+ papers_to_download,
233
+ progress_callback=progress_callback
234
+ )
235
+ except Exception as e:
236
+ logger.error(f"Error downloading PDFs: {e}")
237
+
238
+
239
+ def _deduplicate_papers(papers: List[Paper]) -> List[Paper]:
240
+ """Remove duplicate papers based on title similarity."""
241
+ if not papers:
242
+ return papers
243
+
244
+ unique_papers = []
245
+ seen_identifiers = set()
246
+
247
+ for paper in papers:
248
+ # Check exact identifier match
249
+ identifier = paper.get_identifier()
250
+ if identifier in seen_identifiers:
251
+ continue
252
+
253
+ # Check title similarity with existing papers
254
+ is_duplicate = False
255
+ for existing in unique_papers:
256
+ if paper.similarity_score(existing) > 0.8: # 80% similarity threshold
257
+ is_duplicate = True
258
+ break
259
+
260
+ if not is_duplicate:
261
+ unique_papers.append(paper)
262
+ seen_identifiers.add(identifier)
263
+
264
+ return unique_papers
265
+
266
+
267
+ def build_index(
268
+ paths: Optional[List[Union[str, Path]]] = None,
269
+ recursive: bool = True,
270
+ build_vector_index: bool = True,
271
+ ) -> Dict[str, Any]:
272
+ """Build search index for local papers.
273
+
274
+ Parameters
275
+ ----------
276
+ paths : List[str or Path], optional
277
+ Paths to index (default: current directory)
278
+ recursive : bool
279
+ Whether to search directories recursively
280
+ build_vector_index : bool
281
+ Whether to build vector embeddings
282
+
283
+ Returns
284
+ -------
285
+ Dict[str, Any]
286
+ Index statistics
287
+
288
+ Examples
289
+ --------
290
+ >>> import scitex.scholar
291
+ >>>
292
+ >>> # Index current directory
293
+ >>> stats = scitex.scholar.build_index()
294
+ >>>
295
+ >>> # Index multiple directories
296
+ >>> stats = scitex.scholar.build_index([
297
+ ... "./papers",
298
+ ... "~/Documents/research"
299
+ ... ])
300
+ """
301
+ if paths is None:
302
+ paths = [Path(".")]
303
+ else:
304
+ paths = [Path(p).expanduser() for p in paths]
305
+
306
+ scholar_dir = get_scholar_dir()
307
+ stats = {}
308
+
309
+ # Build local search index
310
+ logger.info(f"Building local search index for {len(paths)} paths...")
311
+ local_engine = LocalSearchEngine(
312
+ index_path=scholar_dir / "local_index.json",
313
+ cache_metadata=True
314
+ )
315
+
316
+ num_files = local_engine.build_index(paths, recursive=recursive)
317
+ stats["local_files_indexed"] = num_files
318
+
319
+ # Build vector index if requested
320
+ if build_vector_index and num_files > 0:
321
+ logger.info("Building vector embeddings...")
322
+
323
+ # Get all indexed papers
324
+ all_papers = []
325
+ for path in paths:
326
+ results = local_engine.search("*", [path], max_results=None)
327
+ all_papers.extend([paper for paper, score in results])
328
+
329
+ # Create vector index
330
+ vector_engine = VectorSearchEngine(
331
+ index_path=scholar_dir / "vector_index.pkl",
332
+ embedding_dim=384,
333
+ similarity_metric="cosine"
334
+ )
335
+
336
+ # Add papers with progress logging
337
+ for i, paper in enumerate(all_papers):
338
+ vector_engine.add_paper(paper, update_embedding=True)
339
+ if (i + 1) % 10 == 0:
340
+ logger.info(f"Generated embeddings for {i + 1}/{len(all_papers)} papers")
341
+
342
+ # Save index
343
+ vector_engine.save_index()
344
+ stats["vector_embeddings_created"] = len(all_papers)
345
+ stats.update(vector_engine.get_statistics())
346
+
347
+ logger.info(f"Index building complete: {stats}")
348
+ return stats
349
+
350
+
351
+ # Synchronous wrapper for convenience
352
+ def search_sync(
353
+ query: str,
354
+ web: bool = True,
355
+ local: Optional[List[Union[str, Path]]] = None,
356
+ max_results: int = 20,
357
+ download_pdfs: bool = False,
358
+ use_vector_search: bool = True,
359
+ web_sources: Optional[List[str]] = None,
360
+ ) -> List[Paper]:
361
+ """Synchronous wrapper for search function.
362
+
363
+ See `search` for parameter documentation.
364
+
365
+ Examples
366
+ --------
367
+ >>> import scitex.scholar
368
+ >>>
369
+ >>> # Simple synchronous search (web only)
370
+ >>> papers = scitex.scholar.search_sync("machine learning")
371
+ >>>
372
+ >>> # Search with local directories
373
+ >>> papers = scitex.scholar.search_sync(
374
+ ... "deep learning",
375
+ ... local=["./papers", "~/Documents/research"]
376
+ ... )
377
+ >>>
378
+ >>> # Local only search
379
+ >>> papers = scitex.scholar.search_sync(
380
+ ... "neural networks",
381
+ ... web=False,
382
+ ... local=["./my_papers"]
383
+ ... )
384
+ """
385
+ return asyncio.run(search(
386
+ query=query,
387
+ web=web,
388
+ local=local,
389
+ max_results=max_results,
390
+ download_pdfs=download_pdfs,
391
+ use_vector_search=use_vector_search,
392
+ web_sources=web_sources,
393
+ ))