scitex 2.0.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (572) hide show
  1. scitex/__init__.py +73 -0
  2. scitex/__main__.py +89 -0
  3. scitex/__version__.py +14 -0
  4. scitex/_sh.py +59 -0
  5. scitex/ai/_LearningCurveLogger.py +583 -0
  6. scitex/ai/__Classifiers.py +101 -0
  7. scitex/ai/__init__.py +55 -0
  8. scitex/ai/_gen_ai/_Anthropic.py +173 -0
  9. scitex/ai/_gen_ai/_BaseGenAI.py +336 -0
  10. scitex/ai/_gen_ai/_DeepSeek.py +175 -0
  11. scitex/ai/_gen_ai/_Google.py +161 -0
  12. scitex/ai/_gen_ai/_Groq.py +97 -0
  13. scitex/ai/_gen_ai/_Llama.py +142 -0
  14. scitex/ai/_gen_ai/_OpenAI.py +230 -0
  15. scitex/ai/_gen_ai/_PARAMS.py +565 -0
  16. scitex/ai/_gen_ai/_Perplexity.py +191 -0
  17. scitex/ai/_gen_ai/__init__.py +32 -0
  18. scitex/ai/_gen_ai/_calc_cost.py +78 -0
  19. scitex/ai/_gen_ai/_format_output_func.py +183 -0
  20. scitex/ai/_gen_ai/_genai_factory.py +71 -0
  21. scitex/ai/act/__init__.py +8 -0
  22. scitex/ai/act/_define.py +11 -0
  23. scitex/ai/classification/__init__.py +7 -0
  24. scitex/ai/classification/classification_reporter.py +1137 -0
  25. scitex/ai/classification/classifier_server.py +131 -0
  26. scitex/ai/classification/classifiers.py +101 -0
  27. scitex/ai/classification_reporter.py +1161 -0
  28. scitex/ai/classifier_server.py +131 -0
  29. scitex/ai/clustering/__init__.py +11 -0
  30. scitex/ai/clustering/_pca.py +115 -0
  31. scitex/ai/clustering/_umap.py +376 -0
  32. scitex/ai/early_stopping.py +149 -0
  33. scitex/ai/feature_extraction/__init__.py +56 -0
  34. scitex/ai/feature_extraction/vit.py +148 -0
  35. scitex/ai/genai/__init__.py +277 -0
  36. scitex/ai/genai/anthropic.py +177 -0
  37. scitex/ai/genai/anthropic_provider.py +320 -0
  38. scitex/ai/genai/anthropic_refactored.py +109 -0
  39. scitex/ai/genai/auth_manager.py +200 -0
  40. scitex/ai/genai/base_genai.py +336 -0
  41. scitex/ai/genai/base_provider.py +291 -0
  42. scitex/ai/genai/calc_cost.py +78 -0
  43. scitex/ai/genai/chat_history.py +307 -0
  44. scitex/ai/genai/cost_tracker.py +276 -0
  45. scitex/ai/genai/deepseek.py +188 -0
  46. scitex/ai/genai/deepseek_provider.py +251 -0
  47. scitex/ai/genai/format_output_func.py +183 -0
  48. scitex/ai/genai/genai_factory.py +71 -0
  49. scitex/ai/genai/google.py +169 -0
  50. scitex/ai/genai/google_provider.py +228 -0
  51. scitex/ai/genai/groq.py +104 -0
  52. scitex/ai/genai/groq_provider.py +248 -0
  53. scitex/ai/genai/image_processor.py +250 -0
  54. scitex/ai/genai/llama.py +155 -0
  55. scitex/ai/genai/llama_provider.py +214 -0
  56. scitex/ai/genai/mock_provider.py +127 -0
  57. scitex/ai/genai/model_registry.py +304 -0
  58. scitex/ai/genai/openai.py +230 -0
  59. scitex/ai/genai/openai_provider.py +293 -0
  60. scitex/ai/genai/params.py +565 -0
  61. scitex/ai/genai/perplexity.py +202 -0
  62. scitex/ai/genai/perplexity_provider.py +205 -0
  63. scitex/ai/genai/provider_base.py +302 -0
  64. scitex/ai/genai/provider_factory.py +370 -0
  65. scitex/ai/genai/response_handler.py +235 -0
  66. scitex/ai/layer/_Pass.py +21 -0
  67. scitex/ai/layer/__init__.py +10 -0
  68. scitex/ai/layer/_switch.py +8 -0
  69. scitex/ai/loss/_L1L2Losses.py +34 -0
  70. scitex/ai/loss/__init__.py +12 -0
  71. scitex/ai/loss/multi_task_loss.py +47 -0
  72. scitex/ai/metrics/__init__.py +9 -0
  73. scitex/ai/metrics/_bACC.py +51 -0
  74. scitex/ai/metrics/silhoute_score_block.py +496 -0
  75. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  76. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  77. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  78. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  79. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  80. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  81. scitex/ai/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  82. scitex/ai/optim/__init__.py +13 -0
  83. scitex/ai/optim/_get_set.py +31 -0
  84. scitex/ai/optim/_optimizers.py +71 -0
  85. scitex/ai/plt/__init__.py +21 -0
  86. scitex/ai/plt/_conf_mat.py +592 -0
  87. scitex/ai/plt/_learning_curve.py +194 -0
  88. scitex/ai/plt/_optuna_study.py +111 -0
  89. scitex/ai/plt/aucs/__init__.py +2 -0
  90. scitex/ai/plt/aucs/example.py +60 -0
  91. scitex/ai/plt/aucs/pre_rec_auc.py +223 -0
  92. scitex/ai/plt/aucs/roc_auc.py +246 -0
  93. scitex/ai/sampling/undersample.py +29 -0
  94. scitex/ai/sk/__init__.py +11 -0
  95. scitex/ai/sk/_clf.py +58 -0
  96. scitex/ai/sk/_to_sktime.py +100 -0
  97. scitex/ai/sklearn/__init__.py +26 -0
  98. scitex/ai/sklearn/clf.py +58 -0
  99. scitex/ai/sklearn/to_sktime.py +100 -0
  100. scitex/ai/training/__init__.py +7 -0
  101. scitex/ai/training/early_stopping.py +150 -0
  102. scitex/ai/training/learning_curve_logger.py +555 -0
  103. scitex/ai/utils/__init__.py +22 -0
  104. scitex/ai/utils/_check_params.py +50 -0
  105. scitex/ai/utils/_default_dataset.py +46 -0
  106. scitex/ai/utils/_format_samples_for_sktime.py +26 -0
  107. scitex/ai/utils/_label_encoder.py +134 -0
  108. scitex/ai/utils/_merge_labels.py +22 -0
  109. scitex/ai/utils/_sliding_window_data_augmentation.py +11 -0
  110. scitex/ai/utils/_under_sample.py +51 -0
  111. scitex/ai/utils/_verify_n_gpus.py +16 -0
  112. scitex/ai/utils/grid_search.py +148 -0
  113. scitex/context/__init__.py +9 -0
  114. scitex/context/_suppress_output.py +38 -0
  115. scitex/db/_BaseMixins/_BaseBackupMixin.py +30 -0
  116. scitex/db/_BaseMixins/_BaseBatchMixin.py +31 -0
  117. scitex/db/_BaseMixins/_BaseBlobMixin.py +81 -0
  118. scitex/db/_BaseMixins/_BaseConnectionMixin.py +43 -0
  119. scitex/db/_BaseMixins/_BaseImportExportMixin.py +39 -0
  120. scitex/db/_BaseMixins/_BaseIndexMixin.py +29 -0
  121. scitex/db/_BaseMixins/_BaseMaintenanceMixin.py +33 -0
  122. scitex/db/_BaseMixins/_BaseQueryMixin.py +52 -0
  123. scitex/db/_BaseMixins/_BaseRowMixin.py +32 -0
  124. scitex/db/_BaseMixins/_BaseSchemaMixin.py +44 -0
  125. scitex/db/_BaseMixins/_BaseTableMixin.py +66 -0
  126. scitex/db/_BaseMixins/_BaseTransactionMixin.py +52 -0
  127. scitex/db/_BaseMixins/__init__.py +30 -0
  128. scitex/db/_PostgreSQL.py +126 -0
  129. scitex/db/_PostgreSQLMixins/_BackupMixin.py +166 -0
  130. scitex/db/_PostgreSQLMixins/_BatchMixin.py +82 -0
  131. scitex/db/_PostgreSQLMixins/_BlobMixin.py +231 -0
  132. scitex/db/_PostgreSQLMixins/_ConnectionMixin.py +92 -0
  133. scitex/db/_PostgreSQLMixins/_ImportExportMixin.py +59 -0
  134. scitex/db/_PostgreSQLMixins/_IndexMixin.py +64 -0
  135. scitex/db/_PostgreSQLMixins/_MaintenanceMixin.py +175 -0
  136. scitex/db/_PostgreSQLMixins/_QueryMixin.py +108 -0
  137. scitex/db/_PostgreSQLMixins/_RowMixin.py +75 -0
  138. scitex/db/_PostgreSQLMixins/_SchemaMixin.py +126 -0
  139. scitex/db/_PostgreSQLMixins/_TableMixin.py +176 -0
  140. scitex/db/_PostgreSQLMixins/_TransactionMixin.py +57 -0
  141. scitex/db/_PostgreSQLMixins/__init__.py +34 -0
  142. scitex/db/_SQLite3.py +2136 -0
  143. scitex/db/_SQLite3Mixins/_BatchMixin.py +243 -0
  144. scitex/db/_SQLite3Mixins/_BlobMixin.py +229 -0
  145. scitex/db/_SQLite3Mixins/_ConnectionMixin.py +108 -0
  146. scitex/db/_SQLite3Mixins/_ImportExportMixin.py +80 -0
  147. scitex/db/_SQLite3Mixins/_IndexMixin.py +32 -0
  148. scitex/db/_SQLite3Mixins/_MaintenanceMixin.py +176 -0
  149. scitex/db/_SQLite3Mixins/_QueryMixin.py +83 -0
  150. scitex/db/_SQLite3Mixins/_RowMixin.py +75 -0
  151. scitex/db/_SQLite3Mixins/_TableMixin.py +183 -0
  152. scitex/db/_SQLite3Mixins/_TransactionMixin.py +71 -0
  153. scitex/db/_SQLite3Mixins/__init__.py +30 -0
  154. scitex/db/__init__.py +14 -0
  155. scitex/db/_delete_duplicates.py +397 -0
  156. scitex/db/_inspect.py +163 -0
  157. scitex/decorators/__init__.py +54 -0
  158. scitex/decorators/_auto_order.py +172 -0
  159. scitex/decorators/_batch_fn.py +127 -0
  160. scitex/decorators/_cache_disk.py +32 -0
  161. scitex/decorators/_cache_mem.py +12 -0
  162. scitex/decorators/_combined.py +98 -0
  163. scitex/decorators/_converters.py +282 -0
  164. scitex/decorators/_deprecated.py +26 -0
  165. scitex/decorators/_not_implemented.py +30 -0
  166. scitex/decorators/_numpy_fn.py +86 -0
  167. scitex/decorators/_pandas_fn.py +121 -0
  168. scitex/decorators/_preserve_doc.py +19 -0
  169. scitex/decorators/_signal_fn.py +95 -0
  170. scitex/decorators/_timeout.py +55 -0
  171. scitex/decorators/_torch_fn.py +136 -0
  172. scitex/decorators/_wrap.py +39 -0
  173. scitex/decorators/_xarray_fn.py +88 -0
  174. scitex/dev/__init__.py +15 -0
  175. scitex/dev/_analyze_code_flow.py +284 -0
  176. scitex/dev/_reload.py +59 -0
  177. scitex/dict/_DotDict.py +442 -0
  178. scitex/dict/__init__.py +18 -0
  179. scitex/dict/_listed_dict.py +42 -0
  180. scitex/dict/_pop_keys.py +36 -0
  181. scitex/dict/_replace.py +13 -0
  182. scitex/dict/_safe_merge.py +62 -0
  183. scitex/dict/_to_str.py +32 -0
  184. scitex/dsp/__init__.py +72 -0
  185. scitex/dsp/_crop.py +122 -0
  186. scitex/dsp/_demo_sig.py +331 -0
  187. scitex/dsp/_detect_ripples.py +212 -0
  188. scitex/dsp/_ensure_3d.py +18 -0
  189. scitex/dsp/_hilbert.py +78 -0
  190. scitex/dsp/_listen.py +702 -0
  191. scitex/dsp/_misc.py +30 -0
  192. scitex/dsp/_mne.py +32 -0
  193. scitex/dsp/_modulation_index.py +79 -0
  194. scitex/dsp/_pac.py +319 -0
  195. scitex/dsp/_psd.py +102 -0
  196. scitex/dsp/_resample.py +65 -0
  197. scitex/dsp/_time.py +36 -0
  198. scitex/dsp/_transform.py +68 -0
  199. scitex/dsp/_wavelet.py +212 -0
  200. scitex/dsp/add_noise.py +111 -0
  201. scitex/dsp/example.py +253 -0
  202. scitex/dsp/filt.py +155 -0
  203. scitex/dsp/norm.py +18 -0
  204. scitex/dsp/params.py +51 -0
  205. scitex/dsp/reference.py +43 -0
  206. scitex/dsp/template.py +25 -0
  207. scitex/dsp/utils/__init__.py +15 -0
  208. scitex/dsp/utils/_differential_bandpass_filters.py +120 -0
  209. scitex/dsp/utils/_ensure_3d.py +18 -0
  210. scitex/dsp/utils/_ensure_even_len.py +10 -0
  211. scitex/dsp/utils/_zero_pad.py +48 -0
  212. scitex/dsp/utils/filter.py +408 -0
  213. scitex/dsp/utils/pac.py +177 -0
  214. scitex/dt/__init__.py +8 -0
  215. scitex/dt/_linspace.py +130 -0
  216. scitex/etc/__init__.py +15 -0
  217. scitex/etc/wait_key.py +34 -0
  218. scitex/gen/_DimHandler.py +196 -0
  219. scitex/gen/_TimeStamper.py +244 -0
  220. scitex/gen/__init__.py +95 -0
  221. scitex/gen/_alternate_kwarg.py +13 -0
  222. scitex/gen/_cache.py +11 -0
  223. scitex/gen/_check_host.py +34 -0
  224. scitex/gen/_ci.py +12 -0
  225. scitex/gen/_close.py +222 -0
  226. scitex/gen/_embed.py +78 -0
  227. scitex/gen/_inspect_module.py +257 -0
  228. scitex/gen/_is_ipython.py +12 -0
  229. scitex/gen/_less.py +48 -0
  230. scitex/gen/_list_packages.py +139 -0
  231. scitex/gen/_mat2py.py +88 -0
  232. scitex/gen/_norm.py +170 -0
  233. scitex/gen/_paste.py +18 -0
  234. scitex/gen/_print_config.py +84 -0
  235. scitex/gen/_shell.py +48 -0
  236. scitex/gen/_src.py +111 -0
  237. scitex/gen/_start.py +451 -0
  238. scitex/gen/_symlink.py +55 -0
  239. scitex/gen/_symlog.py +27 -0
  240. scitex/gen/_tee.py +238 -0
  241. scitex/gen/_title2path.py +60 -0
  242. scitex/gen/_title_case.py +88 -0
  243. scitex/gen/_to_even.py +84 -0
  244. scitex/gen/_to_odd.py +34 -0
  245. scitex/gen/_to_rank.py +39 -0
  246. scitex/gen/_transpose.py +37 -0
  247. scitex/gen/_type.py +78 -0
  248. scitex/gen/_var_info.py +73 -0
  249. scitex/gen/_wrap.py +17 -0
  250. scitex/gen/_xml2dict.py +76 -0
  251. scitex/gen/misc.py +730 -0
  252. scitex/gen/path.py +0 -0
  253. scitex/general/__init__.py +5 -0
  254. scitex/gists/_SigMacro_processFigure_S.py +128 -0
  255. scitex/gists/_SigMacro_toBlue.py +172 -0
  256. scitex/gists/__init__.py +12 -0
  257. scitex/io/_H5Explorer.py +292 -0
  258. scitex/io/__init__.py +82 -0
  259. scitex/io/_cache.py +101 -0
  260. scitex/io/_flush.py +24 -0
  261. scitex/io/_glob.py +103 -0
  262. scitex/io/_json2md.py +113 -0
  263. scitex/io/_load.py +168 -0
  264. scitex/io/_load_configs.py +146 -0
  265. scitex/io/_load_modules/__init__.py +38 -0
  266. scitex/io/_load_modules/_catboost.py +66 -0
  267. scitex/io/_load_modules/_con.py +20 -0
  268. scitex/io/_load_modules/_db.py +24 -0
  269. scitex/io/_load_modules/_docx.py +42 -0
  270. scitex/io/_load_modules/_eeg.py +110 -0
  271. scitex/io/_load_modules/_hdf5.py +196 -0
  272. scitex/io/_load_modules/_image.py +19 -0
  273. scitex/io/_load_modules/_joblib.py +19 -0
  274. scitex/io/_load_modules/_json.py +18 -0
  275. scitex/io/_load_modules/_markdown.py +103 -0
  276. scitex/io/_load_modules/_matlab.py +37 -0
  277. scitex/io/_load_modules/_numpy.py +39 -0
  278. scitex/io/_load_modules/_optuna.py +155 -0
  279. scitex/io/_load_modules/_pandas.py +69 -0
  280. scitex/io/_load_modules/_pdf.py +31 -0
  281. scitex/io/_load_modules/_pickle.py +24 -0
  282. scitex/io/_load_modules/_torch.py +16 -0
  283. scitex/io/_load_modules/_txt.py +126 -0
  284. scitex/io/_load_modules/_xml.py +49 -0
  285. scitex/io/_load_modules/_yaml.py +23 -0
  286. scitex/io/_mv_to_tmp.py +19 -0
  287. scitex/io/_path.py +286 -0
  288. scitex/io/_reload.py +78 -0
  289. scitex/io/_save.py +539 -0
  290. scitex/io/_save_modules/__init__.py +66 -0
  291. scitex/io/_save_modules/_catboost.py +22 -0
  292. scitex/io/_save_modules/_csv.py +89 -0
  293. scitex/io/_save_modules/_excel.py +49 -0
  294. scitex/io/_save_modules/_hdf5.py +249 -0
  295. scitex/io/_save_modules/_html.py +48 -0
  296. scitex/io/_save_modules/_image.py +140 -0
  297. scitex/io/_save_modules/_joblib.py +25 -0
  298. scitex/io/_save_modules/_json.py +25 -0
  299. scitex/io/_save_modules/_listed_dfs_as_csv.py +57 -0
  300. scitex/io/_save_modules/_listed_scalars_as_csv.py +42 -0
  301. scitex/io/_save_modules/_matlab.py +24 -0
  302. scitex/io/_save_modules/_mp4.py +29 -0
  303. scitex/io/_save_modules/_numpy.py +57 -0
  304. scitex/io/_save_modules/_optuna_study_as_csv_and_pngs.py +38 -0
  305. scitex/io/_save_modules/_pickle.py +45 -0
  306. scitex/io/_save_modules/_plotly.py +27 -0
  307. scitex/io/_save_modules/_text.py +23 -0
  308. scitex/io/_save_modules/_torch.py +26 -0
  309. scitex/io/_save_modules/_yaml.py +29 -0
  310. scitex/life/__init__.py +10 -0
  311. scitex/life/_monitor_rain.py +49 -0
  312. scitex/linalg/__init__.py +17 -0
  313. scitex/linalg/_distance.py +63 -0
  314. scitex/linalg/_geometric_median.py +64 -0
  315. scitex/linalg/_misc.py +73 -0
  316. scitex/nn/_AxiswiseDropout.py +27 -0
  317. scitex/nn/_BNet.py +126 -0
  318. scitex/nn/_BNet_Res.py +164 -0
  319. scitex/nn/_ChannelGainChanger.py +44 -0
  320. scitex/nn/_DropoutChannels.py +50 -0
  321. scitex/nn/_Filters.py +489 -0
  322. scitex/nn/_FreqGainChanger.py +110 -0
  323. scitex/nn/_GaussianFilter.py +48 -0
  324. scitex/nn/_Hilbert.py +111 -0
  325. scitex/nn/_MNet_1000.py +157 -0
  326. scitex/nn/_ModulationIndex.py +221 -0
  327. scitex/nn/_PAC.py +414 -0
  328. scitex/nn/_PSD.py +40 -0
  329. scitex/nn/_ResNet1D.py +120 -0
  330. scitex/nn/_SpatialAttention.py +25 -0
  331. scitex/nn/_Spectrogram.py +161 -0
  332. scitex/nn/_SwapChannels.py +50 -0
  333. scitex/nn/_TransposeLayer.py +19 -0
  334. scitex/nn/_Wavelet.py +183 -0
  335. scitex/nn/__init__.py +63 -0
  336. scitex/os/__init__.py +8 -0
  337. scitex/os/_mv.py +50 -0
  338. scitex/parallel/__init__.py +8 -0
  339. scitex/parallel/_run.py +151 -0
  340. scitex/path/__init__.py +33 -0
  341. scitex/path/_clean.py +52 -0
  342. scitex/path/_find.py +108 -0
  343. scitex/path/_get_module_path.py +51 -0
  344. scitex/path/_get_spath.py +35 -0
  345. scitex/path/_getsize.py +18 -0
  346. scitex/path/_increment_version.py +87 -0
  347. scitex/path/_mk_spath.py +51 -0
  348. scitex/path/_path.py +19 -0
  349. scitex/path/_split.py +23 -0
  350. scitex/path/_this_path.py +19 -0
  351. scitex/path/_version.py +101 -0
  352. scitex/pd/__init__.py +41 -0
  353. scitex/pd/_find_indi.py +126 -0
  354. scitex/pd/_find_pval.py +113 -0
  355. scitex/pd/_force_df.py +154 -0
  356. scitex/pd/_from_xyz.py +71 -0
  357. scitex/pd/_ignore_SettingWithCopyWarning.py +34 -0
  358. scitex/pd/_melt_cols.py +81 -0
  359. scitex/pd/_merge_columns.py +221 -0
  360. scitex/pd/_mv.py +63 -0
  361. scitex/pd/_replace.py +62 -0
  362. scitex/pd/_round.py +93 -0
  363. scitex/pd/_slice.py +63 -0
  364. scitex/pd/_sort.py +91 -0
  365. scitex/pd/_to_numeric.py +53 -0
  366. scitex/pd/_to_xy.py +59 -0
  367. scitex/pd/_to_xyz.py +110 -0
  368. scitex/plt/__init__.py +36 -0
  369. scitex/plt/_subplots/_AxesWrapper.py +182 -0
  370. scitex/plt/_subplots/_AxisWrapper.py +249 -0
  371. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +414 -0
  372. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +896 -0
  373. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +368 -0
  374. scitex/plt/_subplots/_AxisWrapperMixins/_TrackingMixin.py +185 -0
  375. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +16 -0
  376. scitex/plt/_subplots/_FigWrapper.py +226 -0
  377. scitex/plt/_subplots/_SubplotsWrapper.py +171 -0
  378. scitex/plt/_subplots/__init__.py +111 -0
  379. scitex/plt/_subplots/_export_as_csv.py +232 -0
  380. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +61 -0
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +90 -0
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +49 -0
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +46 -0
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +39 -0
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +125 -0
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +72 -0
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +34 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +36 -0
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +79 -0
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +59 -0
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +32 -0
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +79 -0
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +75 -0
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +64 -0
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +44 -0
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +70 -0
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +66 -0
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +95 -0
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +67 -0
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +52 -0
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +46 -0
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +46 -0
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +46 -0
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +46 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +44 -0
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +103 -0
  407. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +82 -0
  408. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +58 -0
  409. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +117 -0
  410. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +30 -0
  411. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +51 -0
  412. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +93 -0
  413. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +94 -0
  414. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +92 -0
  415. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +65 -0
  416. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +59 -0
  417. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +58 -0
  418. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +45 -0
  419. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +70 -0
  420. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +75 -0
  421. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +75 -0
  422. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +155 -0
  423. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +64 -0
  424. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +77 -0
  425. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +210 -0
  426. scitex/plt/_subplots/_export_as_csv_formatters/verify_formatters.py +342 -0
  427. scitex/plt/_subplots/_export_as_csv_formatters.py +115 -0
  428. scitex/plt/_tpl.py +28 -0
  429. scitex/plt/ax/__init__.py +114 -0
  430. scitex/plt/ax/_plot/__init__.py +53 -0
  431. scitex/plt/ax/_plot/_plot_circular_hist.py +124 -0
  432. scitex/plt/ax/_plot/_plot_conf_mat.py +136 -0
  433. scitex/plt/ax/_plot/_plot_cube.py +57 -0
  434. scitex/plt/ax/_plot/_plot_ecdf.py +84 -0
  435. scitex/plt/ax/_plot/_plot_fillv.py +55 -0
  436. scitex/plt/ax/_plot/_plot_heatmap.py +266 -0
  437. scitex/plt/ax/_plot/_plot_image.py +94 -0
  438. scitex/plt/ax/_plot/_plot_joyplot.py +76 -0
  439. scitex/plt/ax/_plot/_plot_raster.py +172 -0
  440. scitex/plt/ax/_plot/_plot_rectangle.py +69 -0
  441. scitex/plt/ax/_plot/_plot_scatter_hist.py +133 -0
  442. scitex/plt/ax/_plot/_plot_shaded_line.py +142 -0
  443. scitex/plt/ax/_plot/_plot_statistical_shaded_line.py +221 -0
  444. scitex/plt/ax/_plot/_plot_violin.py +343 -0
  445. scitex/plt/ax/_style/__init__.py +38 -0
  446. scitex/plt/ax/_style/_add_marginal_ax.py +44 -0
  447. scitex/plt/ax/_style/_add_panel.py +92 -0
  448. scitex/plt/ax/_style/_extend.py +64 -0
  449. scitex/plt/ax/_style/_force_aspect.py +37 -0
  450. scitex/plt/ax/_style/_format_label.py +23 -0
  451. scitex/plt/ax/_style/_hide_spines.py +84 -0
  452. scitex/plt/ax/_style/_map_ticks.py +182 -0
  453. scitex/plt/ax/_style/_rotate_labels.py +215 -0
  454. scitex/plt/ax/_style/_sci_note.py +279 -0
  455. scitex/plt/ax/_style/_set_log_scale.py +299 -0
  456. scitex/plt/ax/_style/_set_meta.py +261 -0
  457. scitex/plt/ax/_style/_set_n_ticks.py +37 -0
  458. scitex/plt/ax/_style/_set_size.py +16 -0
  459. scitex/plt/ax/_style/_set_supxyt.py +116 -0
  460. scitex/plt/ax/_style/_set_ticks.py +276 -0
  461. scitex/plt/ax/_style/_set_xyt.py +121 -0
  462. scitex/plt/ax/_style/_share_axes.py +264 -0
  463. scitex/plt/ax/_style/_shift.py +139 -0
  464. scitex/plt/ax/_style/_show_spines.py +333 -0
  465. scitex/plt/color/_PARAMS.py +70 -0
  466. scitex/plt/color/__init__.py +52 -0
  467. scitex/plt/color/_add_hue_col.py +41 -0
  468. scitex/plt/color/_colors.py +205 -0
  469. scitex/plt/color/_get_colors_from_cmap.py +134 -0
  470. scitex/plt/color/_interpolate.py +29 -0
  471. scitex/plt/color/_vizualize_colors.py +54 -0
  472. scitex/plt/utils/__init__.py +44 -0
  473. scitex/plt/utils/_calc_bacc_from_conf_mat.py +46 -0
  474. scitex/plt/utils/_calc_nice_ticks.py +101 -0
  475. scitex/plt/utils/_close.py +68 -0
  476. scitex/plt/utils/_colorbar.py +96 -0
  477. scitex/plt/utils/_configure_mpl.py +295 -0
  478. scitex/plt/utils/_histogram_utils.py +132 -0
  479. scitex/plt/utils/_im2grid.py +70 -0
  480. scitex/plt/utils/_is_valid_axis.py +78 -0
  481. scitex/plt/utils/_mk_colorbar.py +65 -0
  482. scitex/plt/utils/_mk_patches.py +26 -0
  483. scitex/plt/utils/_scientific_captions.py +638 -0
  484. scitex/plt/utils/_scitex_config.py +223 -0
  485. scitex/reproduce/__init__.py +14 -0
  486. scitex/reproduce/_fix_seeds.py +45 -0
  487. scitex/reproduce/_gen_ID.py +55 -0
  488. scitex/reproduce/_gen_timestamp.py +35 -0
  489. scitex/res/__init__.py +5 -0
  490. scitex/resource/__init__.py +13 -0
  491. scitex/resource/_get_processor_usages.py +281 -0
  492. scitex/resource/_get_specs.py +280 -0
  493. scitex/resource/_log_processor_usages.py +190 -0
  494. scitex/resource/_utils/__init__.py +31 -0
  495. scitex/resource/_utils/_get_env_info.py +481 -0
  496. scitex/resource/limit_ram.py +33 -0
  497. scitex/scholar/__init__.py +24 -0
  498. scitex/scholar/_local_search.py +454 -0
  499. scitex/scholar/_paper.py +244 -0
  500. scitex/scholar/_pdf_downloader.py +325 -0
  501. scitex/scholar/_search.py +393 -0
  502. scitex/scholar/_vector_search.py +370 -0
  503. scitex/scholar/_web_sources.py +457 -0
  504. scitex/stats/__init__.py +31 -0
  505. scitex/stats/_calc_partial_corr.py +17 -0
  506. scitex/stats/_corr_test_multi.py +94 -0
  507. scitex/stats/_corr_test_wrapper.py +115 -0
  508. scitex/stats/_describe_wrapper.py +90 -0
  509. scitex/stats/_multiple_corrections.py +63 -0
  510. scitex/stats/_nan_stats.py +93 -0
  511. scitex/stats/_p2stars.py +116 -0
  512. scitex/stats/_p2stars_wrapper.py +56 -0
  513. scitex/stats/_statistical_tests.py +73 -0
  514. scitex/stats/desc/__init__.py +40 -0
  515. scitex/stats/desc/_describe.py +189 -0
  516. scitex/stats/desc/_nan.py +289 -0
  517. scitex/stats/desc/_real.py +94 -0
  518. scitex/stats/multiple/__init__.py +14 -0
  519. scitex/stats/multiple/_bonferroni_correction.py +72 -0
  520. scitex/stats/multiple/_fdr_correction.py +400 -0
  521. scitex/stats/multiple/_multicompair.py +28 -0
  522. scitex/stats/tests/__corr_test.py +277 -0
  523. scitex/stats/tests/__corr_test_multi.py +343 -0
  524. scitex/stats/tests/__corr_test_single.py +277 -0
  525. scitex/stats/tests/__init__.py +22 -0
  526. scitex/stats/tests/_brunner_munzel_test.py +192 -0
  527. scitex/stats/tests/_nocorrelation_test.py +28 -0
  528. scitex/stats/tests/_smirnov_grubbs.py +98 -0
  529. scitex/str/__init__.py +113 -0
  530. scitex/str/_clean_path.py +75 -0
  531. scitex/str/_color_text.py +52 -0
  532. scitex/str/_decapitalize.py +58 -0
  533. scitex/str/_factor_out_digits.py +281 -0
  534. scitex/str/_format_plot_text.py +498 -0
  535. scitex/str/_grep.py +48 -0
  536. scitex/str/_latex.py +155 -0
  537. scitex/str/_latex_fallback.py +471 -0
  538. scitex/str/_mask_api.py +39 -0
  539. scitex/str/_mask_api_key.py +8 -0
  540. scitex/str/_parse.py +158 -0
  541. scitex/str/_print_block.py +47 -0
  542. scitex/str/_print_debug.py +68 -0
  543. scitex/str/_printc.py +62 -0
  544. scitex/str/_readable_bytes.py +38 -0
  545. scitex/str/_remove_ansi.py +23 -0
  546. scitex/str/_replace.py +134 -0
  547. scitex/str/_search.py +125 -0
  548. scitex/str/_squeeze_space.py +36 -0
  549. scitex/tex/__init__.py +10 -0
  550. scitex/tex/_preview.py +103 -0
  551. scitex/tex/_to_vec.py +116 -0
  552. scitex/torch/__init__.py +18 -0
  553. scitex/torch/_apply_to.py +34 -0
  554. scitex/torch/_nan_funcs.py +77 -0
  555. scitex/types/_ArrayLike.py +44 -0
  556. scitex/types/_ColorLike.py +21 -0
  557. scitex/types/__init__.py +14 -0
  558. scitex/types/_is_listed_X.py +70 -0
  559. scitex/utils/__init__.py +22 -0
  560. scitex/utils/_compress_hdf5.py +116 -0
  561. scitex/utils/_email.py +120 -0
  562. scitex/utils/_grid.py +148 -0
  563. scitex/utils/_notify.py +247 -0
  564. scitex/utils/_search.py +121 -0
  565. scitex/web/__init__.py +38 -0
  566. scitex/web/_search_pubmed.py +438 -0
  567. scitex/web/_summarize_url.py +158 -0
  568. scitex-2.0.0.dist-info/METADATA +307 -0
  569. scitex-2.0.0.dist-info/RECORD +572 -0
  570. scitex-2.0.0.dist-info/WHEEL +6 -0
  571. scitex-2.0.0.dist-info/licenses/LICENSE +7 -0
  572. scitex-2.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,438 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Time-stamp: "2024-11-13 14:30:43 (ywatanabe)"
4
+ # File: ./scitex_repo/src/scitex/web/_search_pubmed.py
5
+
6
+ """
7
+ 1. Functionality:
8
+ - Searches PubMed database for scientific articles
9
+ - Retrieves detailed information about matched articles
10
+ - Displays article metadata including title, authors, journal, year, and abstract
11
+ 2. Input:
12
+ - Search query string (e.g., "epilepsy prediction")
13
+ - Optional parameters for batch size and result limit
14
+ 3. Output:
15
+ - Formatted article information displayed to stdout
16
+ - BibTeX file with official citations
17
+ 4. Prerequisites:
18
+ - Internet connection
19
+ - requests package
20
+ - scitex package
21
+ """
22
+
23
+ """Imports"""
24
+ import argparse
25
+ import asyncio
26
+ import xml.etree.ElementTree as ET
27
+ from typing import Any, Dict, List, Union
28
+
29
+ import aiohttp
30
+ import scitex
31
+ import pandas as pd
32
+ import requests
33
+
34
+ """Functions & Classes"""
35
+
36
+
37
+ def _search_pubmed(query: str, retmax: int = 300) -> Dict[str, Any]:
38
+ try:
39
+ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
40
+ search_url = f"{base_url}esearch.fcgi"
41
+ params = {
42
+ "db": "pubmed",
43
+ "term": query,
44
+ "retmax": retmax,
45
+ "retmode": "json",
46
+ "usehistory": "y",
47
+ }
48
+
49
+ response = requests.get(search_url, params=params, timeout=10)
50
+ if not response.ok:
51
+ scitex.str.printc("PubMed API request failed", c="red")
52
+ return {}
53
+ return response.json()
54
+ except requests.exceptions.RequestException as e:
55
+ scitex.str.printc(f"Network error: {e}", c="red")
56
+ return {}
57
+
58
+
59
+ def _fetch_details(
60
+ webenv: str, query_key: str, retstart: int = 0, retmax: int = 100
61
+ ) -> Dict[str, Any]:
62
+ """Fetches detailed information including abstracts for articles.
63
+
64
+ Parameters
65
+ ----------
66
+ [Previous parameters remain the same]
67
+
68
+ Returns
69
+ -------
70
+ Dict[str, Any]
71
+ Dictionary containing article details and abstracts
72
+ """
73
+ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
74
+
75
+ # Fetch abstracts
76
+ efetch_url = f"{base_url}efetch.fcgi"
77
+ efetch_params = {
78
+ "db": "pubmed",
79
+ "query_key": query_key,
80
+ "WebEnv": webenv,
81
+ "retstart": retstart,
82
+ "retmax": retmax,
83
+ "retmode": "xml",
84
+ "rettype": "abstract",
85
+ "field": "abstract,mesh",
86
+ }
87
+
88
+ abstract_response = requests.get(efetch_url, params=efetch_params)
89
+
90
+ # Fetch metadata
91
+ fetch_url = f"{base_url}esummary.fcgi"
92
+ params = {
93
+ "db": "pubmed",
94
+ "query_key": query_key,
95
+ "WebEnv": webenv,
96
+ "retstart": retstart,
97
+ "retmax": retmax,
98
+ "retmode": "json",
99
+ }
100
+
101
+ details_response = requests.get(fetch_url, params=params)
102
+
103
+ if not all([abstract_response.ok, details_response.ok]):
104
+ # print(f"Error fetching data")
105
+ return {}
106
+
107
+ return {
108
+ "abstracts": abstract_response.text,
109
+ "details": details_response.json(),
110
+ }
111
+
112
+
113
+ def _parse_abstract_xml(xml_text: str) -> Dict[str, tuple]:
114
+ """Parses XML response to extract abstracts.
115
+
116
+ Parameters
117
+ ----------
118
+ xml_text : str
119
+ XML response from PubMed
120
+
121
+ Returns
122
+ -------
123
+ Dict[str, str]
124
+ Dictionary mapping PMIDs to abstracts
125
+ """
126
+ root = ET.fromstring(xml_text)
127
+ results = {}
128
+
129
+ for article in root.findall(".//PubmedArticle"):
130
+ pmid = article.find(".//PMID").text
131
+ abstract_element = article.find(".//Abstract/AbstractText")
132
+ abstract = abstract_element.text if abstract_element is not None else ""
133
+
134
+ # DOI
135
+ doi_element = article.find(".//ArticleId[@IdType='doi']")
136
+ doi = doi_element.text if doi_element is not None else ""
137
+
138
+ # Get MeSH terms
139
+ keywords = []
140
+ mesh_terms = article.findall(".//MeshHeading/DescriptorName")
141
+ keywords = [term.text for term in mesh_terms if term is not None]
142
+
143
+ results[pmid] = (abstract, keywords, doi)
144
+
145
+ return results
146
+
147
+
148
+ def _get_citation(pmid: str) -> str:
149
+ """Gets official citation in BibTeX format.
150
+
151
+ Parameters
152
+ ----------
153
+ pmid : str
154
+ PubMed ID
155
+
156
+ Returns
157
+ -------
158
+ str
159
+ Official BibTeX citation
160
+ """
161
+ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
162
+ cite_url = f"{base_url}efetch.fcgi"
163
+ params = {
164
+ "db": "pubmed",
165
+ "id": pmid,
166
+ "rettype": "bibtex",
167
+ "retmode": "text",
168
+ }
169
+ response = requests.get(cite_url, params=params)
170
+ return response.text if response.ok else ""
171
+
172
+
173
+ def get_crossref_metrics(doi: str) -> Dict[str, Any]:
174
+ """Get article metrics from CrossRef using DOI."""
175
+ base_url = "https://api.crossref.org/works/"
176
+ headers = {"User-Agent": "mailto:your.email@example.com"} # Replace with your email
177
+
178
+ response = requests.get(f"{base_url}{doi}", headers=headers)
179
+ if response.ok:
180
+ data = response.json()["message"]
181
+ return {
182
+ "citations": data.get("is-referenced-by-count", 0),
183
+ "type": data.get("type", ""),
184
+ "publisher": data.get("publisher", ""),
185
+ "references": len(data.get("reference", [])),
186
+ "doi": data.get("DOI", ""),
187
+ }
188
+ return {}
189
+
190
+
191
+ def save_bibtex(
192
+ papers: Dict[str, Any], abstracts: Dict[str, str], output_file: str
193
+ ) -> None:
194
+ """Saves paper metadata as BibTeX file with abstracts.
195
+
196
+ Parameters
197
+ ----------
198
+ papers : Dict[str, Any]
199
+ Dictionary of paper metadata
200
+ abstracts : Dict[str, str]
201
+ Dictionary of PMIDs to abstracts
202
+ output_file : str
203
+ Output file path
204
+ """
205
+ with open(output_file, "w", encoding="utf-8") as bibtex_file:
206
+ for pmid, paper in papers.items():
207
+ if pmid == "uids":
208
+ continue
209
+
210
+ citation = _get_citation(pmid)
211
+ if citation:
212
+ bibtex_file.write(citation)
213
+ else:
214
+ # Use default tuple if pmid not in abstracts
215
+ default_data = ("", [], "") # abstract, keywords, doi
216
+ bibtex_entry = format_bibtex(
217
+ paper, pmid, abstracts.get(pmid, default_data)
218
+ )
219
+ bibtex_file.write(bibtex_entry + "\n")
220
+ scitex.str.printc(f"Saved to: {str(bibtex_file)}", c="yellow")
221
+
222
+
223
+ def format_bibtex(paper: Dict[str, Any], pmid: str, abstract_data: tuple) -> str:
224
+ abstract, keywords, doi = abstract_data
225
+
226
+ # Get CrossRef and Scimago metrics
227
+ crossref_metrics = get_crossref_metrics(doi) if doi else {}
228
+ journal = paper.get("source", "Unknown Journal")
229
+ # journal_metrics = get_journal_metrics(journal)
230
+
231
+ authors = paper.get("authors", [{"name": "Unknown"}])
232
+ author_names = " and ".join(author["name"] for author in authors)
233
+ year = paper.get("pubdate", "").split()[0]
234
+ title = paper.get("title", "No Title")
235
+
236
+ # Name formatting
237
+ first_author = authors[0]["name"]
238
+ first_name = first_author.split()[0]
239
+ last_name = first_author.split()[-1]
240
+ clean_first_name = "".join(c for c in first_name if c.isalnum())
241
+ clean_last_name = "".join(c for c in last_name if c.isalnum())
242
+
243
+ # Title words
244
+ title_words = title.split()
245
+ first_title_word = "".join(c.lower() for c in title_words[0] if c.isalnum())
246
+ second_title_word = (
247
+ "".join(c.lower() for c in title_words[1] if c.isalnum())
248
+ if len(title_words) > 1
249
+ else ""
250
+ )
251
+
252
+ citation_key = f"{clean_first_name}.{clean_last_name}_{year}_{first_title_word}_{second_title_word}"
253
+
254
+ entry = f"""@article{{{citation_key},
255
+ author = {{{author_names}}},
256
+ title = {{{title}}},
257
+ journal = {{{journal}}},
258
+ year = {{{year}}},
259
+ pmid = {{{pmid}}},
260
+ doi = {{{doi}}},
261
+ publisher = {{{crossref_metrics.get('publisher', '')}}},
262
+ references = {{{crossref_metrics.get('references', 0)}}},
263
+ keywords = {{{", ".join(keywords)}}},
264
+ abstract = {{{abstract}}}
265
+ }}
266
+ """
267
+ return entry
268
+
269
+
270
+ async def fetch_async(
271
+ session: aiohttp.ClientSession, url: str, params: Dict
272
+ ) -> Union[Dict, str]:
273
+ """Asynchronous fetch helper."""
274
+ async with session.get(url, params=params) as response:
275
+ if response.status == 200:
276
+ if params.get("retmode") == "xml":
277
+ return await response.text()
278
+ elif params.get("retmode") == "json":
279
+ return await response.json()
280
+ return await response.text()
281
+ return {}
282
+
283
+
284
+ async def batch__fetch_details(pmids: List[str], batch_size: int = 20) -> List[Dict]:
285
+ """Fetches details for multiple PMIDs concurrently.
286
+
287
+ Parameters
288
+ ----------
289
+ pmids : List[str]
290
+ List of PubMed IDs
291
+ batch_size : int, optional
292
+ Size of each batch for concurrent requests
293
+
294
+ Returns
295
+ -------
296
+ List[Dict]
297
+ List of response data
298
+ """
299
+ base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
300
+
301
+ async with aiohttp.ClientSession() as session:
302
+ tasks = []
303
+ for i in range(0, len(pmids), batch_size):
304
+ batch_pmids = pmids[i : i + batch_size]
305
+
306
+ # Fetch both details and citations concurrently
307
+ efetch_params = {
308
+ "db": "pubmed",
309
+ "id": ",".join(batch_pmids),
310
+ "retmode": "xml",
311
+ "rettype": "abstract",
312
+ }
313
+
314
+ esummary_params = {
315
+ "db": "pubmed",
316
+ "id": ",".join(batch_pmids),
317
+ "retmode": "json",
318
+ }
319
+
320
+ tasks.append(fetch_async(session, f"{base_url}efetch.fcgi", efetch_params))
321
+ tasks.append(
322
+ fetch_async(session, f"{base_url}esummary.fcgi", esummary_params)
323
+ )
324
+
325
+ results = await asyncio.gather(*tasks)
326
+ return results
327
+
328
+
329
+ def search_pubmed(query: str, n_entries: int = 10) -> int:
330
+ # query = args.query or "epilepsy prediction"
331
+ # print(f"Using query: {query}")
332
+
333
+ search_results = _search_pubmed(query)
334
+ if not search_results:
335
+ # print("No results found or error occurred")
336
+ return 1
337
+
338
+ pmids = search_results["esearchresult"]["idlist"]
339
+ count = len(pmids)
340
+ # print(f"Found {count:,} results")
341
+
342
+ output_file = f"pubmed_{query.replace(' ', '_')}.bib"
343
+ # print(f"Saving results to: {output_file}")
344
+
345
+ # Process in larger batches asynchronously
346
+ results = asyncio.run(batch__fetch_details(pmids[:n_entries]))
347
+ # here, results seems long string
348
+
349
+ # Process results and save
350
+ with open(output_file, "w", encoding="utf-8") as f:
351
+ for i in range(0, len(results), 2):
352
+ xml_response = results[i]
353
+ json_response = results[i + 1]
354
+
355
+ if isinstance(xml_response, str):
356
+ abstracts = _parse_abstract_xml(xml_response)
357
+ if isinstance(json_response, dict) and "result" in json_response:
358
+ details = json_response["result"]
359
+ save_bibtex(details, abstracts, output_file)
360
+
361
+ # Process results and save
362
+ temp_bibtex = []
363
+ for i in range(0, len(results), 2):
364
+ xml_response = results[i]
365
+ json_response = results[i + 1]
366
+
367
+ if isinstance(xml_response, str):
368
+ abstracts = _parse_abstract_xml(xml_response)
369
+ if isinstance(json_response, dict) and "result" in json_response:
370
+ details = json_response["result"]
371
+ for pmid in details:
372
+ if pmid != "uids":
373
+ citation = _get_citation(pmid)
374
+ if citation:
375
+ temp_bibtex.append(citation)
376
+ else:
377
+ entry = format_bibtex(
378
+ details[pmid], pmid, abstracts.get(pmid, "")
379
+ )
380
+ temp_bibtex.append(entry)
381
+
382
+ # Write all entries at once
383
+ with open(output_file, "w", encoding="utf-8") as f:
384
+ f.write("\n".join(temp_bibtex))
385
+
386
+ return 0
387
+
388
+
389
+ def parse_args() -> argparse.Namespace:
390
+ parser = argparse.ArgumentParser(
391
+ description="PubMed article search and retrieval tool"
392
+ )
393
+ parser.add_argument(
394
+ "--query",
395
+ "-q",
396
+ type=str,
397
+ help='Search query (default: "epilepsy prediction")',
398
+ )
399
+ parser.add_argument(
400
+ "--n_entries",
401
+ "-n",
402
+ type=int,
403
+ default=10,
404
+ help='Search query (default: "epilepsy prediction")',
405
+ )
406
+ args = parser.parse_args()
407
+ scitex.str.printc(args, c="yellow")
408
+ return args
409
+
410
+
411
+ def run_main() -> None:
412
+ global CONFIG
413
+ import sys
414
+
415
+ import matplotlib.pyplot as plt
416
+ import scitex
417
+
418
+ CONFIG, sys.stdout, sys.stderr, plt, CC = scitex.gen.start(
419
+ sys,
420
+ verbose=False,
421
+ )
422
+
423
+ args = parse_args()
424
+ exit_status = search_pubmed(args.query, args.n_entries)
425
+
426
+ scitex.gen.close(
427
+ CONFIG,
428
+ verbose=False,
429
+ notify=False,
430
+ message="",
431
+ exit_status=exit_status,
432
+ )
433
+
434
+
435
+ if __name__ == "__main__":
436
+ run_main()
437
+
438
+ # EOF
@@ -0,0 +1,158 @@
1
+ #!./env/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ # Time-stamp: "2024-07-29 21:43:30 (ywatanabe)"
4
+ # ./src/scitex/web/_crawl.py
5
+
6
+
7
+ import requests
8
+ from bs4 import BeautifulSoup
9
+ import urllib.parse
10
+ from concurrent.futures import ThreadPoolExecutor, as_completed
11
+ import json
12
+ from tqdm import tqdm
13
+ import scitex
14
+ from pprint import pprint
15
+
16
+ try:
17
+ from readability import Document
18
+ except ImportError:
19
+ try:
20
+ from readability.readability import Document
21
+ except ImportError:
22
+ Document = None
23
+
24
+ import re
25
+
26
+
27
+ # def crawl_url(url, max_depth=1):
28
+ # print("\nCrawling...")
29
+ # visited = set()
30
+ # to_visit = [(url, 0)]
31
+ # contents = {}
32
+
33
+ # while to_visit:
34
+ # current_url, depth = to_visit.pop(0)
35
+ # if current_url in visited or depth > max_depth:
36
+ # continue
37
+
38
+ # try:
39
+ # response = requests.get(current_url)
40
+ # if response.status_code == 200:
41
+ # visited.add(current_url)
42
+ # contents[current_url] = response.text
43
+ # soup = BeautifulSoup(response.text, "html.parser")
44
+
45
+ # for link in soup.find_all("a", href=True):
46
+ # absolute_link = urllib.parse.urljoin(
47
+ # current_url, link["href"]
48
+ # )
49
+ # if absolute_link not in visited:
50
+ # to_visit.append((absolute_link, depth + 1))
51
+
52
+ # except requests.RequestException:
53
+ # pass
54
+
55
+ # return visited, contents
56
+
57
+
58
+ def extract_main_content(html):
59
+ if Document is None:
60
+ # Fallback: just strip HTML tags
61
+ content = re.sub("<[^<]+?>", "", html)
62
+ content = " ".join(content.split())
63
+ return content[:5000] # Limit to first 5000 chars
64
+
65
+ doc = Document(html)
66
+ content = doc.summary()
67
+ # Remove HTML tags
68
+ content = re.sub("<[^<]+?>", "", content)
69
+ # Remove extra whitespace
70
+ content = " ".join(content.split())
71
+ return content
72
+
73
+
74
+ def crawl_url(url, max_depth=1):
75
+ print("\nCrawling...")
76
+ visited = set()
77
+ to_visit = [(url, 0)]
78
+ contents = {}
79
+
80
+ while to_visit:
81
+ current_url, depth = to_visit.pop(0)
82
+ if current_url in visited or depth > max_depth:
83
+ continue
84
+
85
+ try:
86
+ response = requests.get(current_url)
87
+ if response.status_code == 200:
88
+ visited.add(current_url)
89
+ main_content = extract_main_content(response.text)
90
+ contents[current_url] = main_content
91
+ soup = BeautifulSoup(response.text, "html.parser")
92
+
93
+ for link in soup.find_all("a", href=True):
94
+ absolute_link = urllib.parse.urljoin(current_url, link["href"])
95
+ if absolute_link not in visited:
96
+ to_visit.append((absolute_link, depth + 1))
97
+
98
+ except requests.RequestException:
99
+ pass
100
+
101
+ return visited, contents
102
+
103
+
104
+ def crawl_to_json(start_url):
105
+ if not start_url.startswith("http"):
106
+ start_url = "https://" + start_url
107
+ crawled_urls, contents = crawl_url(start_url)
108
+
109
+ print("\nSummalizing as json...")
110
+
111
+ def process_url(url):
112
+ llm = scitex.ai.GenAI("gpt-4o-mini")
113
+ return {
114
+ "url": url,
115
+ "content": llm(f"Summarize this page in 1 line:\n\n{contents[url]}"),
116
+ }
117
+
118
+ with ThreadPoolExecutor() as executor:
119
+ future_to_url = {executor.submit(process_url, url): url for url in crawled_urls}
120
+ crawled_pages = []
121
+ for future in tqdm(
122
+ as_completed(future_to_url),
123
+ total=len(crawled_urls),
124
+ desc="Processing URLs",
125
+ ):
126
+ crawled_pages.append(future.result())
127
+
128
+ result = {"start_url": start_url, "crawled_pages": crawled_pages}
129
+
130
+ return json.dumps(result, indent=2)
131
+
132
+
133
+ def summarize_all(json_contents):
134
+ llm = scitex.ai.GenAI("gpt-4o-mini")
135
+ out = llm(f"Summarize this json file with 5 bullet points:\n\n{json_contents}")
136
+ return out
137
+
138
+
139
+ def summarize_url(start_url):
140
+ json_result = crawl_to_json(start_url)
141
+ ground_summary = summarize_all(json_result)
142
+
143
+ pprint(ground_summary)
144
+ return ground_summary, json_result
145
+
146
+
147
+ main = summarize_url
148
+
149
+ if __name__ == "__main__":
150
+ import argparse
151
+ import scitex
152
+
153
+ parser = argparse.ArgumentParser(description="")
154
+ parser.add_argument("--url", "-u", type=str, help="(default: %(default)s)")
155
+ args = parser.parse_args()
156
+ scitex.gen.print_block(args, c="yellow")
157
+
158
+ main(args.url)