scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (704) hide show
  1. scitex/__init__.py +53 -15
  2. scitex/__main__.py +72 -26
  3. scitex/__version__.py +1 -1
  4. scitex/_sh.py +145 -23
  5. scitex/ai/__init__.py +30 -16
  6. scitex/ai/_gen_ai/_Anthropic.py +5 -7
  7. scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
  8. scitex/ai/_gen_ai/_DeepSeek.py +10 -2
  9. scitex/ai/_gen_ai/_Google.py +2 -2
  10. scitex/ai/_gen_ai/_Llama.py +2 -2
  11. scitex/ai/_gen_ai/_OpenAI.py +2 -2
  12. scitex/ai/_gen_ai/_PARAMS.py +51 -65
  13. scitex/ai/_gen_ai/_Perplexity.py +2 -2
  14. scitex/ai/_gen_ai/__init__.py +25 -14
  15. scitex/ai/_gen_ai/_format_output_func.py +4 -4
  16. scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
  17. scitex/ai/classification/CrossValidationExperiment.py +374 -0
  18. scitex/ai/classification/__init__.py +43 -4
  19. scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
  20. scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
  21. scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
  22. scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
  23. scitex/ai/classification/reporters/__init__.py +11 -0
  24. scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  25. scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
  26. scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
  27. scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
  28. scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
  29. scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
  30. scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
  31. scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  32. scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  33. scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  34. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  35. scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  36. scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  37. scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  38. scitex/ai/classification/timeseries/__init__.py +39 -0
  39. scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
  40. scitex/ai/clustering/_umap.py +2 -2
  41. scitex/ai/feature_extraction/vit.py +1 -0
  42. scitex/ai/feature_selection/__init__.py +30 -0
  43. scitex/ai/feature_selection/feature_selection.py +364 -0
  44. scitex/ai/loss/multi_task_loss.py +1 -1
  45. scitex/ai/metrics/__init__.py +51 -4
  46. scitex/ai/metrics/_calc_bacc.py +61 -0
  47. scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
  48. scitex/ai/metrics/_calc_clf_report.py +78 -0
  49. scitex/ai/metrics/_calc_conf_mat.py +93 -0
  50. scitex/ai/metrics/_calc_feature_importance.py +183 -0
  51. scitex/ai/metrics/_calc_mcc.py +61 -0
  52. scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
  53. scitex/ai/metrics/_calc_roc_auc.py +110 -0
  54. scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
  55. scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
  56. scitex/ai/metrics/_normalize_labels.py +83 -0
  57. scitex/ai/plt/__init__.py +47 -8
  58. scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
  59. scitex/ai/plt/_plot_feature_importance.py +323 -0
  60. scitex/ai/plt/_plot_learning_curve.py +345 -0
  61. scitex/ai/plt/_plot_optuna_study.py +225 -0
  62. scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
  63. scitex/ai/plt/_plot_roc_curve.py +255 -0
  64. scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
  65. scitex/ai/training/__init__.py +2 -2
  66. scitex/ai/utils/grid_search.py +3 -3
  67. scitex/benchmark/__init__.py +52 -0
  68. scitex/benchmark/benchmark.py +400 -0
  69. scitex/benchmark/monitor.py +370 -0
  70. scitex/benchmark/profiler.py +297 -0
  71. scitex/browser/__init__.py +48 -0
  72. scitex/browser/automation/CookieHandler.py +216 -0
  73. scitex/browser/automation/__init__.py +7 -0
  74. scitex/browser/collaboration/__init__.py +55 -0
  75. scitex/browser/collaboration/auth_helpers.py +94 -0
  76. scitex/browser/collaboration/collaborative_agent.py +136 -0
  77. scitex/browser/collaboration/credential_manager.py +188 -0
  78. scitex/browser/collaboration/interactive_panel.py +400 -0
  79. scitex/browser/collaboration/persistent_browser.py +170 -0
  80. scitex/browser/collaboration/shared_session.py +383 -0
  81. scitex/browser/collaboration/standard_interactions.py +246 -0
  82. scitex/browser/collaboration/visual_feedback.py +181 -0
  83. scitex/browser/core/BrowserMixin.py +326 -0
  84. scitex/browser/core/ChromeProfileManager.py +446 -0
  85. scitex/browser/core/__init__.py +9 -0
  86. scitex/browser/debugging/__init__.py +18 -0
  87. scitex/browser/debugging/_browser_logger.py +657 -0
  88. scitex/browser/debugging/_highlight_element.py +143 -0
  89. scitex/browser/debugging/_show_grid.py +154 -0
  90. scitex/browser/interaction/__init__.py +24 -0
  91. scitex/browser/interaction/click_center.py +149 -0
  92. scitex/browser/interaction/click_with_fallbacks.py +206 -0
  93. scitex/browser/interaction/close_popups.py +498 -0
  94. scitex/browser/interaction/fill_with_fallbacks.py +209 -0
  95. scitex/browser/pdf/__init__.py +14 -0
  96. scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
  97. scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
  98. scitex/browser/remote/CaptchaHandler.py +434 -0
  99. scitex/browser/remote/ZenRowsAPIClient.py +347 -0
  100. scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
  101. scitex/browser/remote/__init__.py +11 -0
  102. scitex/browser/stealth/HumanBehavior.py +344 -0
  103. scitex/browser/stealth/StealthManager.py +1008 -0
  104. scitex/browser/stealth/__init__.py +9 -0
  105. scitex/browser/template.py +122 -0
  106. scitex/capture/__init__.py +110 -0
  107. scitex/capture/__main__.py +25 -0
  108. scitex/capture/capture.py +848 -0
  109. scitex/capture/cli.py +233 -0
  110. scitex/capture/gif.py +344 -0
  111. scitex/capture/mcp_server.py +961 -0
  112. scitex/capture/session.py +70 -0
  113. scitex/capture/utils.py +705 -0
  114. scitex/cli/__init__.py +17 -0
  115. scitex/cli/cloud.py +447 -0
  116. scitex/cli/main.py +42 -0
  117. scitex/cli/scholar.py +280 -0
  118. scitex/context/_suppress_output.py +5 -3
  119. scitex/db/__init__.py +30 -3
  120. scitex/db/__main__.py +75 -0
  121. scitex/db/_check_health.py +381 -0
  122. scitex/db/_delete_duplicates.py +25 -386
  123. scitex/db/_inspect.py +335 -114
  124. scitex/db/_inspect_optimized.py +301 -0
  125. scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
  126. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
  127. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
  128. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
  129. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
  130. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
  131. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
  132. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
  133. scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
  134. scitex/db/_postgresql/__init__.py +6 -0
  135. scitex/db/_sqlite3/_SQLite3.py +210 -0
  136. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
  137. scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
  138. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
  139. scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
  140. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
  141. scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
  142. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
  143. scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
  144. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
  145. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
  146. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
  147. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
  148. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
  149. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
  150. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
  151. scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
  152. scitex/db/_sqlite3/__init__.py +7 -0
  153. scitex/db/_sqlite3/_delete_duplicates.py +274 -0
  154. scitex/decorators/__init__.py +2 -0
  155. scitex/decorators/_cache_disk.py +13 -5
  156. scitex/decorators/_cache_disk_async.py +49 -0
  157. scitex/decorators/_deprecated.py +175 -10
  158. scitex/decorators/_timeout.py +1 -1
  159. scitex/dev/_analyze_code_flow.py +2 -2
  160. scitex/dict/_DotDict.py +73 -15
  161. scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
  162. scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
  163. scitex/dict/__init__.py +2 -0
  164. scitex/dict/_flatten.py +27 -0
  165. scitex/dsp/_crop.py +2 -2
  166. scitex/dsp/_demo_sig.py +2 -2
  167. scitex/dsp/_detect_ripples.py +2 -2
  168. scitex/dsp/_hilbert.py +2 -2
  169. scitex/dsp/_listen.py +6 -6
  170. scitex/dsp/_modulation_index.py +2 -2
  171. scitex/dsp/_pac.py +1 -1
  172. scitex/dsp/_psd.py +2 -2
  173. scitex/dsp/_resample.py +2 -1
  174. scitex/dsp/_time.py +3 -2
  175. scitex/dsp/_wavelet.py +3 -2
  176. scitex/dsp/add_noise.py +2 -2
  177. scitex/dsp/example.py +1 -0
  178. scitex/dsp/filt.py +10 -9
  179. scitex/dsp/template.py +3 -2
  180. scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
  181. scitex/dsp/utils/pac.py +2 -2
  182. scitex/dt/_normalize_timestamp.py +432 -0
  183. scitex/errors.py +572 -0
  184. scitex/gen/_DimHandler.py +2 -2
  185. scitex/gen/__init__.py +37 -7
  186. scitex/gen/_deprecated_close.py +80 -0
  187. scitex/gen/_deprecated_start.py +26 -0
  188. scitex/gen/_detect_environment.py +152 -0
  189. scitex/gen/_detect_notebook_path.py +169 -0
  190. scitex/gen/_embed.py +6 -2
  191. scitex/gen/_get_notebook_path.py +257 -0
  192. scitex/gen/_less.py +1 -1
  193. scitex/gen/_list_packages.py +2 -2
  194. scitex/gen/_norm.py +44 -9
  195. scitex/gen/_norm_cache.py +269 -0
  196. scitex/gen/_src.py +3 -5
  197. scitex/gen/_title_case.py +3 -3
  198. scitex/io/__init__.py +28 -6
  199. scitex/io/_glob.py +13 -7
  200. scitex/io/_load.py +108 -21
  201. scitex/io/_load_cache.py +303 -0
  202. scitex/io/_load_configs.py +40 -15
  203. scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
  204. scitex/io/_load_modules/_ZarrExplorer.py +114 -0
  205. scitex/io/_load_modules/_bibtex.py +207 -0
  206. scitex/io/_load_modules/_hdf5.py +53 -178
  207. scitex/io/_load_modules/_json.py +5 -3
  208. scitex/io/_load_modules/_pdf.py +871 -16
  209. scitex/io/_load_modules/_sqlite3.py +15 -0
  210. scitex/io/_load_modules/_txt.py +41 -12
  211. scitex/io/_load_modules/_yaml.py +4 -3
  212. scitex/io/_load_modules/_zarr.py +126 -0
  213. scitex/io/_save.py +429 -171
  214. scitex/io/_save_modules/__init__.py +6 -0
  215. scitex/io/_save_modules/_bibtex.py +194 -0
  216. scitex/io/_save_modules/_csv.py +8 -4
  217. scitex/io/_save_modules/_excel.py +174 -15
  218. scitex/io/_save_modules/_hdf5.py +251 -226
  219. scitex/io/_save_modules/_image.py +1 -3
  220. scitex/io/_save_modules/_json.py +49 -4
  221. scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
  222. scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
  223. scitex/io/_save_modules/_tex.py +277 -0
  224. scitex/io/_save_modules/_yaml.py +42 -3
  225. scitex/io/_save_modules/_zarr.py +160 -0
  226. scitex/io/utils/__init__.py +20 -0
  227. scitex/io/utils/h5_to_zarr.py +616 -0
  228. scitex/linalg/_geometric_median.py +6 -2
  229. scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
  230. scitex/logging/__init__.py +122 -0
  231. scitex/logging/_config.py +158 -0
  232. scitex/logging/_context.py +103 -0
  233. scitex/logging/_formatters.py +128 -0
  234. scitex/logging/_handlers.py +64 -0
  235. scitex/logging/_levels.py +35 -0
  236. scitex/logging/_logger.py +163 -0
  237. scitex/logging/_print_capture.py +95 -0
  238. scitex/ml/__init__.py +69 -0
  239. scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
  240. scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
  241. scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
  242. scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
  243. scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
  244. scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
  245. scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
  246. scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
  247. scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
  248. scitex/ml/_gen_ai/__init__.py +43 -0
  249. scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
  250. scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
  251. scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
  252. scitex/ml/activation/__init__.py +8 -0
  253. scitex/ml/activation/_define.py +11 -0
  254. scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
  255. scitex/ml/classification/CrossValidationExperiment.py +374 -0
  256. scitex/ml/classification/__init__.py +46 -0
  257. scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
  258. scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
  259. scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
  260. scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
  261. scitex/ml/classification/reporters/__init__.py +11 -0
  262. scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
  263. scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
  264. scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
  265. scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
  266. scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
  267. scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
  268. scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
  269. scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
  270. scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
  271. scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
  272. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
  273. scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
  274. scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
  275. scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
  276. scitex/ml/classification/timeseries/__init__.py +39 -0
  277. scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
  278. scitex/ml/clustering/__init__.py +11 -0
  279. scitex/ml/clustering/_pca.py +115 -0
  280. scitex/ml/clustering/_umap.py +376 -0
  281. scitex/ml/feature_extraction/__init__.py +56 -0
  282. scitex/ml/feature_extraction/vit.py +149 -0
  283. scitex/ml/feature_selection/__init__.py +30 -0
  284. scitex/ml/feature_selection/feature_selection.py +364 -0
  285. scitex/ml/loss/_L1L2Losses.py +34 -0
  286. scitex/ml/loss/__init__.py +12 -0
  287. scitex/ml/loss/multi_task_loss.py +47 -0
  288. scitex/ml/metrics/__init__.py +56 -0
  289. scitex/ml/metrics/_calc_bacc.py +61 -0
  290. scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
  291. scitex/ml/metrics/_calc_clf_report.py +78 -0
  292. scitex/ml/metrics/_calc_conf_mat.py +93 -0
  293. scitex/ml/metrics/_calc_feature_importance.py +183 -0
  294. scitex/ml/metrics/_calc_mcc.py +61 -0
  295. scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
  296. scitex/ml/metrics/_calc_roc_auc.py +110 -0
  297. scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
  298. scitex/ml/metrics/_calc_silhouette_score.py +503 -0
  299. scitex/ml/metrics/_normalize_labels.py +83 -0
  300. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
  301. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
  302. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
  303. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
  304. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
  305. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
  306. scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
  307. scitex/ml/optim/__init__.py +13 -0
  308. scitex/ml/optim/_get_set.py +31 -0
  309. scitex/ml/optim/_optimizers.py +71 -0
  310. scitex/ml/plt/__init__.py +60 -0
  311. scitex/ml/plt/_plot_conf_mat.py +663 -0
  312. scitex/ml/plt/_plot_feature_importance.py +323 -0
  313. scitex/ml/plt/_plot_learning_curve.py +345 -0
  314. scitex/ml/plt/_plot_optuna_study.py +225 -0
  315. scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
  316. scitex/ml/plt/_plot_roc_curve.py +255 -0
  317. scitex/ml/sk/__init__.py +11 -0
  318. scitex/ml/sk/_clf.py +58 -0
  319. scitex/ml/sk/_to_sktime.py +100 -0
  320. scitex/ml/sklearn/__init__.py +26 -0
  321. scitex/ml/sklearn/clf.py +58 -0
  322. scitex/ml/sklearn/to_sktime.py +100 -0
  323. scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
  324. scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
  325. scitex/ml/training/__init__.py +7 -0
  326. scitex/ml/utils/__init__.py +22 -0
  327. scitex/ml/utils/_check_params.py +50 -0
  328. scitex/ml/utils/_default_dataset.py +46 -0
  329. scitex/ml/utils/_format_samples_for_sktime.py +26 -0
  330. scitex/ml/utils/_label_encoder.py +134 -0
  331. scitex/ml/utils/_merge_labels.py +22 -0
  332. scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
  333. scitex/ml/utils/_under_sample.py +51 -0
  334. scitex/ml/utils/_verify_n_gpus.py +16 -0
  335. scitex/ml/utils/grid_search.py +148 -0
  336. scitex/nn/_BNet.py +15 -9
  337. scitex/nn/_Filters.py +2 -2
  338. scitex/nn/_ModulationIndex.py +2 -2
  339. scitex/nn/_PAC.py +1 -1
  340. scitex/nn/_Spectrogram.py +12 -3
  341. scitex/nn/__init__.py +9 -10
  342. scitex/path/__init__.py +18 -0
  343. scitex/path/_clean.py +4 -0
  344. scitex/path/_find.py +9 -4
  345. scitex/path/_symlink.py +348 -0
  346. scitex/path/_version.py +4 -3
  347. scitex/pd/__init__.py +2 -0
  348. scitex/pd/_get_unique.py +99 -0
  349. scitex/plt/__init__.py +114 -5
  350. scitex/plt/_subplots/_AxesWrapper.py +1 -3
  351. scitex/plt/_subplots/_AxisWrapper.py +7 -3
  352. scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
  353. scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
  354. scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
  355. scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
  356. scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
  357. scitex/plt/_subplots/_FigWrapper.py +62 -6
  358. scitex/plt/_subplots/_export_as_csv.py +43 -27
  359. scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
  360. scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
  361. scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
  362. scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
  363. scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
  364. scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
  365. scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
  366. scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
  367. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
  368. scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
  369. scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
  370. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
  371. scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
  372. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
  373. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
  374. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
  375. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
  376. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
  377. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
  378. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
  379. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
  380. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
  381. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
  382. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
  383. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
  384. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
  385. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
  386. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
  387. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
  388. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
  389. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
  390. scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
  391. scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
  392. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
  393. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
  394. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
  395. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
  396. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
  397. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
  398. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
  399. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
  400. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
  401. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
  402. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
  403. scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
  404. scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
  405. scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
  406. scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
  407. scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
  408. scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
  409. scitex/plt/ax/_style/_hide_spines.py +1 -3
  410. scitex/plt/ax/_style/_rotate_labels.py +180 -76
  411. scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
  412. scitex/plt/ax/_style/_set_meta.py +11 -4
  413. scitex/plt/ax/_style/_set_supxyt.py +3 -3
  414. scitex/plt/ax/_style/_set_xyt.py +3 -3
  415. scitex/plt/ax/_style/_share_axes.py +2 -2
  416. scitex/plt/color/__init__.py +4 -4
  417. scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
  418. scitex/plt/utils/_configure_mpl.py +99 -86
  419. scitex/plt/utils/_histogram_utils.py +1 -3
  420. scitex/plt/utils/_is_valid_axis.py +1 -3
  421. scitex/plt/utils/_scitex_config.py +1 -0
  422. scitex/repro/__init__.py +75 -0
  423. scitex/{reproduce → repro}/_gen_ID.py +1 -1
  424. scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
  425. scitex/repro_rng/_RandomStateManager.py +590 -0
  426. scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  427. scitex/repro_rng/__init__.py +39 -0
  428. scitex/reproduce/__init__.py +25 -13
  429. scitex/reproduce/_hash_array.py +22 -0
  430. scitex/resource/_get_processor_usages.py +4 -4
  431. scitex/resource/_get_specs.py +2 -2
  432. scitex/resource/_log_processor_usages.py +2 -2
  433. scitex/rng/_RandomStateManager.py +590 -0
  434. scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
  435. scitex/rng/__init__.py +39 -0
  436. scitex/scholar/__init__.py +309 -19
  437. scitex/scholar/__main__.py +319 -0
  438. scitex/scholar/auth/ScholarAuthManager.py +308 -0
  439. scitex/scholar/auth/__init__.py +12 -0
  440. scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
  441. scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
  442. scitex/scholar/auth/core/StrategyResolver.py +309 -0
  443. scitex/scholar/auth/core/__init__.py +16 -0
  444. scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
  445. scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
  446. scitex/scholar/auth/gateway/__init__.py +38 -0
  447. scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
  448. scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
  449. scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
  450. scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
  451. scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
  452. scitex/scholar/auth/providers/__init__.py +18 -0
  453. scitex/scholar/auth/session/AuthCacheManager.py +189 -0
  454. scitex/scholar/auth/session/SessionManager.py +159 -0
  455. scitex/scholar/auth/session/__init__.py +11 -0
  456. scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
  457. scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
  458. scitex/scholar/auth/sso/SSOAutomator.py +180 -0
  459. scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
  460. scitex/scholar/auth/sso/__init__.py +15 -0
  461. scitex/scholar/browser/ScholarBrowserManager.py +705 -0
  462. scitex/scholar/browser/__init__.py +38 -0
  463. scitex/scholar/browser/utils/__init__.py +13 -0
  464. scitex/scholar/browser/utils/click_and_wait.py +205 -0
  465. scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
  466. scitex/scholar/browser/utils/wait_redirects.py +732 -0
  467. scitex/scholar/config/PublisherRules.py +132 -0
  468. scitex/scholar/config/ScholarConfig.py +126 -0
  469. scitex/scholar/config/__init__.py +17 -0
  470. scitex/scholar/core/Paper.py +627 -0
  471. scitex/scholar/core/Papers.py +722 -0
  472. scitex/scholar/core/Scholar.py +1975 -0
  473. scitex/scholar/core/__init__.py +9 -0
  474. scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
  475. scitex/scholar/impact_factor/__init__.py +20 -0
  476. scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
  477. scitex/scholar/impact_factor/estimation/__init__.py +40 -0
  478. scitex/scholar/impact_factor/estimation/build_database.py +0 -0
  479. scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
  480. scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
  481. scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
  482. scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
  483. scitex/scholar/integration/__init__.py +59 -0
  484. scitex/scholar/integration/base.py +502 -0
  485. scitex/scholar/integration/mendeley/__init__.py +22 -0
  486. scitex/scholar/integration/mendeley/exporter.py +166 -0
  487. scitex/scholar/integration/mendeley/importer.py +236 -0
  488. scitex/scholar/integration/mendeley/linker.py +79 -0
  489. scitex/scholar/integration/mendeley/mapper.py +212 -0
  490. scitex/scholar/integration/zotero/__init__.py +27 -0
  491. scitex/scholar/integration/zotero/__main__.py +264 -0
  492. scitex/scholar/integration/zotero/exporter.py +351 -0
  493. scitex/scholar/integration/zotero/importer.py +372 -0
  494. scitex/scholar/integration/zotero/linker.py +415 -0
  495. scitex/scholar/integration/zotero/mapper.py +286 -0
  496. scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
  497. scitex/scholar/metadata_engines/__init__.py +21 -0
  498. scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
  499. scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
  500. scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
  501. scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
  502. scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
  503. scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
  504. scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
  505. scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
  506. scitex/scholar/metadata_engines/individual/__init__.py +7 -0
  507. scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
  508. scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
  509. scitex/scholar/metadata_engines/utils/__init__.py +30 -0
  510. scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
  511. scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
  512. scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
  513. scitex/scholar/pdf_download/__init__.py +5 -0
  514. scitex/scholar/pdf_download/strategies/__init__.py +38 -0
  515. scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
  516. scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
  517. scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
  518. scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
  519. scitex/scholar/pdf_download/strategies/response_body.py +207 -0
  520. scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
  521. scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
  522. scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
  523. scitex/scholar/pipelines/__init__.py +49 -0
  524. scitex/scholar/storage/BibTeXHandler.py +1018 -0
  525. scitex/scholar/storage/PaperIO.py +468 -0
  526. scitex/scholar/storage/ScholarLibrary.py +182 -0
  527. scitex/scholar/storage/_DeduplicationManager.py +548 -0
  528. scitex/scholar/storage/_LibraryCacheManager.py +724 -0
  529. scitex/scholar/storage/_LibraryManager.py +1835 -0
  530. scitex/scholar/storage/__init__.py +28 -0
  531. scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
  532. scitex/scholar/url_finder/__init__.py +7 -0
  533. scitex/scholar/url_finder/strategies/__init__.py +33 -0
  534. scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
  535. scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
  536. scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
  537. scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
  538. scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
  539. scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
  540. scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
  541. scitex/scholar/utils/__init__.py +22 -0
  542. scitex/scholar/utils/bibtex/__init__.py +9 -0
  543. scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
  544. scitex/scholar/utils/cleanup/__init__.py +8 -0
  545. scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
  546. scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
  547. scitex/scholar/utils/text/_TextNormalizer.py +407 -0
  548. scitex/scholar/utils/text/__init__.py +9 -0
  549. scitex/scholar/zotero/__init__.py +38 -0
  550. scitex/session/__init__.py +51 -0
  551. scitex/session/_lifecycle.py +736 -0
  552. scitex/session/_manager.py +102 -0
  553. scitex/session/template.py +122 -0
  554. scitex/stats/__init__.py +30 -26
  555. scitex/stats/correct/__init__.py +21 -0
  556. scitex/stats/correct/_correct_bonferroni.py +551 -0
  557. scitex/stats/correct/_correct_fdr.py +634 -0
  558. scitex/stats/correct/_correct_holm.py +548 -0
  559. scitex/stats/correct/_correct_sidak.py +499 -0
  560. scitex/stats/descriptive/__init__.py +85 -0
  561. scitex/stats/descriptive/_circular.py +540 -0
  562. scitex/stats/descriptive/_describe.py +219 -0
  563. scitex/stats/descriptive/_nan.py +518 -0
  564. scitex/stats/descriptive/_real.py +189 -0
  565. scitex/stats/effect_sizes/__init__.py +41 -0
  566. scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
  567. scitex/stats/effect_sizes/_cohens_d.py +342 -0
  568. scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
  569. scitex/stats/effect_sizes/_eta_squared.py +302 -0
  570. scitex/stats/effect_sizes/_prob_superiority.py +296 -0
  571. scitex/stats/posthoc/__init__.py +19 -0
  572. scitex/stats/posthoc/_dunnett.py +463 -0
  573. scitex/stats/posthoc/_games_howell.py +383 -0
  574. scitex/stats/posthoc/_tukey_hsd.py +367 -0
  575. scitex/stats/power/__init__.py +19 -0
  576. scitex/stats/power/_power.py +433 -0
  577. scitex/stats/template.py +119 -0
  578. scitex/stats/utils/__init__.py +62 -0
  579. scitex/stats/utils/_effect_size.py +985 -0
  580. scitex/stats/utils/_formatters.py +270 -0
  581. scitex/stats/utils/_normalizers.py +927 -0
  582. scitex/stats/utils/_power.py +433 -0
  583. scitex/stats_v01/_EffectSizeCalculator.py +488 -0
  584. scitex/stats_v01/_StatisticalValidator.py +411 -0
  585. scitex/stats_v01/__init__.py +60 -0
  586. scitex/stats_v01/_additional_tests.py +415 -0
  587. scitex/{stats → stats_v01}/_p2stars.py +19 -5
  588. scitex/stats_v01/_two_sample_tests.py +141 -0
  589. scitex/stats_v01/desc/__init__.py +83 -0
  590. scitex/stats_v01/desc/_circular.py +540 -0
  591. scitex/stats_v01/desc/_describe.py +219 -0
  592. scitex/stats_v01/desc/_nan.py +518 -0
  593. scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
  594. scitex/stats_v01/desc/_real.py +189 -0
  595. scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
  596. scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
  597. scitex/str/__init__.py +1 -3
  598. scitex/str/_clean_path.py +6 -2
  599. scitex/str/_latex_fallback.py +267 -160
  600. scitex/str/_parse.py +44 -36
  601. scitex/str/_printc.py +1 -3
  602. scitex/template/__init__.py +87 -0
  603. scitex/template/_create_project.py +267 -0
  604. scitex/template/create_pip_project.py +80 -0
  605. scitex/template/create_research.py +80 -0
  606. scitex/template/create_singularity.py +80 -0
  607. scitex/units.py +291 -0
  608. scitex/utils/_compress_hdf5.py +14 -3
  609. scitex/utils/_email.py +21 -2
  610. scitex/utils/_grid.py +6 -4
  611. scitex/utils/_notify.py +13 -10
  612. scitex/utils/_verify_scitex_format.py +589 -0
  613. scitex/utils/_verify_scitex_format_v01.py +370 -0
  614. scitex/utils/template.py +122 -0
  615. scitex/web/_search_pubmed.py +62 -16
  616. scitex-2.1.0.dist-info/LICENSE +21 -0
  617. scitex-2.1.0.dist-info/METADATA +677 -0
  618. scitex-2.1.0.dist-info/RECORD +919 -0
  619. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
  620. scitex-2.1.0.dist-info/entry_points.txt +3 -0
  621. scitex/ai/__Classifiers.py +0 -101
  622. scitex/ai/classification/classification_reporter.py +0 -1137
  623. scitex/ai/classification/classifiers.py +0 -101
  624. scitex/ai/classification_reporter.py +0 -1161
  625. scitex/ai/genai/__init__.py +0 -277
  626. scitex/ai/genai/anthropic_provider.py +0 -320
  627. scitex/ai/genai/anthropic_refactored.py +0 -109
  628. scitex/ai/genai/auth_manager.py +0 -200
  629. scitex/ai/genai/base_provider.py +0 -291
  630. scitex/ai/genai/chat_history.py +0 -307
  631. scitex/ai/genai/cost_tracker.py +0 -276
  632. scitex/ai/genai/deepseek_provider.py +0 -251
  633. scitex/ai/genai/google_provider.py +0 -228
  634. scitex/ai/genai/groq_provider.py +0 -248
  635. scitex/ai/genai/image_processor.py +0 -250
  636. scitex/ai/genai/llama_provider.py +0 -214
  637. scitex/ai/genai/mock_provider.py +0 -127
  638. scitex/ai/genai/model_registry.py +0 -304
  639. scitex/ai/genai/openai_provider.py +0 -293
  640. scitex/ai/genai/perplexity_provider.py +0 -205
  641. scitex/ai/genai/provider_base.py +0 -302
  642. scitex/ai/genai/provider_factory.py +0 -370
  643. scitex/ai/genai/response_handler.py +0 -235
  644. scitex/ai/layer/_Pass.py +0 -21
  645. scitex/ai/layer/__init__.py +0 -10
  646. scitex/ai/layer/_switch.py +0 -8
  647. scitex/ai/metrics/_bACC.py +0 -51
  648. scitex/ai/plt/_learning_curve.py +0 -194
  649. scitex/ai/plt/_optuna_study.py +0 -111
  650. scitex/ai/plt/aucs/__init__.py +0 -2
  651. scitex/ai/plt/aucs/example.py +0 -60
  652. scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
  653. scitex/ai/plt/aucs/roc_auc.py +0 -246
  654. scitex/ai/sampling/undersample.py +0 -29
  655. scitex/db/_SQLite3.py +0 -2136
  656. scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
  657. scitex/gen/_close.py +0 -222
  658. scitex/gen/_start.py +0 -451
  659. scitex/general/__init__.py +0 -5
  660. scitex/io/_load_modules/_db.py +0 -24
  661. scitex/life/__init__.py +0 -10
  662. scitex/life/_monitor_rain.py +0 -49
  663. scitex/reproduce/_fix_seeds.py +0 -45
  664. scitex/res/__init__.py +0 -5
  665. scitex/scholar/_local_search.py +0 -454
  666. scitex/scholar/_paper.py +0 -244
  667. scitex/scholar/_pdf_downloader.py +0 -325
  668. scitex/scholar/_search.py +0 -393
  669. scitex/scholar/_vector_search.py +0 -370
  670. scitex/scholar/_web_sources.py +0 -457
  671. scitex/stats/desc/__init__.py +0 -40
  672. scitex-2.0.0.dist-info/METADATA +0 -307
  673. scitex-2.0.0.dist-info/RECORD +0 -572
  674. scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
  675. /scitex/ai/{act → activation}/__init__.py +0 -0
  676. /scitex/ai/{act → activation}/_define.py +0 -0
  677. /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
  678. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
  679. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
  680. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
  681. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
  682. /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
  683. /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
  684. /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
  685. /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
  686. /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
  687. /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
  688. /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
  689. /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
  690. /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
  691. /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
  692. /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
  693. /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
  694. /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
  695. /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
  696. /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
  697. /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
  698. /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
  699. /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
  700. /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
  701. /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
  702. /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
  703. /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
  704. {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,686 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ # Time-stamp: "2025-08-01 13:00:00"
4
+ # Author: Yusuke Watanabe
5
+ # File: _ShibbolethAuthenticator.py
6
+
7
+ """
8
+ Shibboleth authentication for institutional access to academic papers.
9
+
10
+ This module provides authentication through Shibboleth single sign-on
11
+ to enable legal PDF downloads via institutional subscriptions.
12
+ """
13
+
14
+ import asyncio
15
+ import json
16
+ import re
17
+ from datetime import datetime, timedelta
18
+ from pathlib import Path
19
+ from typing import Any, Dict, List, Optional, Tuple
20
+ from urllib.parse import urlparse, urljoin, parse_qs
21
+
22
+ from scitex import logging
23
+
24
+ try:
25
+ from playwright.async_api import async_playwright, Page, Browser
26
+ except ImportError:
27
+ async_playwright = None
28
+ Page = None
29
+ Browser = None
30
+
31
+ from scitex.errors import ScholarError
32
+ from .BaseAuthenticator import BaseAuthenticator
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ class ShibbolethError(ScholarError):
38
+ """Raised when Shibboleth authentication fails."""
39
+ pass
40
+
41
+
42
+ class ShibbolethAuthenticator(BaseAuthenticator):
43
+ """
44
+ Handles Shibboleth authentication for institutional access.
45
+
46
+ Shibboleth is a single sign-on (SSO) system that provides federated
47
+ identity management and access control for academic resources.
48
+
49
+ This authenticator:
50
+ 1. Authenticates via institutional Identity Provider (IdP)
51
+ 2. Handles SAML assertions and attribute exchange
52
+ 3. Maintains authenticate_async sessions
53
+ 4. Returns session cookies for use by download strategies
54
+ """
55
+
56
+ def __init__(
57
+ self,
58
+ institution: Optional[str] = None,
59
+ idp_url: Optional[str] = None,
60
+ username: Optional[str] = None,
61
+ password: Optional[str] = None,
62
+ entity_id: Optional[str] = None,
63
+ cache_dir: Optional[Path] = None,
64
+ timeout: int = 120,
65
+ debug_mode: bool = False,
66
+ **kwargs,
67
+ ):
68
+ """
69
+ Initialize Shibboleth authenticator.
70
+
71
+ Args:
72
+ institution: Institution name (e.g., 'University of Example')
73
+ idp_url: Identity Provider URL
74
+ username: Username for authentication
75
+ password: Password for authentication
76
+ entity_id: Entity ID for the institution
77
+ cache_dir: Directory for session cache
78
+ timeout: Authentication timeout in seconds
79
+ debug_mode: Enable debug logging
80
+ """
81
+ super().__init__(
82
+ config={
83
+ "institution": institution,
84
+ "idp_url": idp_url,
85
+ "username": username,
86
+ "entity_id": entity_id,
87
+ "debug_mode": debug_mode,
88
+ }
89
+ )
90
+
91
+ self.institution = institution
92
+ self.idp_url = idp_url
93
+ self.username = username
94
+ self.password = password
95
+ self.entity_id = entity_id
96
+ self.timeout = timeout
97
+ self.debug_mode = debug_mode
98
+
99
+ # Session cache directory
100
+ self.cache_dir = (
101
+ cache_dir or Path.home() / ".scitex" / "scholar" / "shibboleth_sessions"
102
+ )
103
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
104
+
105
+ # Session file path
106
+ self.session_file = self.cache_dir / f"session_{self._get_session_async_key()}.json"
107
+
108
+ # Session management
109
+ self._cookies: Dict[str, str] = {}
110
+ self._full_cookies: List[Dict[str, Any]] = []
111
+ self._session_expiry: Optional[datetime] = None
112
+ self._saml_attributes: Dict[str, Any] = {}
113
+
114
+ # Common Shibboleth endpoints and patterns
115
+ self.wayf_urls = [
116
+ "https://wayf.surfnet.nl", # Dutch federation
117
+ "https://discovery.eduserv.org.uk", # UK federation
118
+ "https://wayf.incommonfederation.org", # InCommon (US)
119
+ "https://ds.aai.switch.ch", # Swiss federation
120
+ "https://discovery.shibboleth.net", # Generic discovery
121
+ ]
122
+
123
+ # Common IdP login patterns
124
+ self.idp_patterns = {
125
+ "username_field": [
126
+ "input[name='j_username']",
127
+ "input[name='username']",
128
+ "input[name='user']",
129
+ "input[id*='username']",
130
+ "input[type='text']",
131
+ ],
132
+ "password_field": [
133
+ "input[name='j_password']",
134
+ "input[name='password']",
135
+ "input[name='pass']",
136
+ "input[id*='password']",
137
+ "input[type='password']",
138
+ ],
139
+ "submit_button": [
140
+ "button[type='submit']",
141
+ "input[type='submit']",
142
+ "button[name='_eventId_proceed']",
143
+ "button:has-text('Login')",
144
+ "button:has-text('Sign in')",
145
+ ],
146
+ }
147
+
148
+ # Load existing session
149
+ self._load_session()
150
+
151
+ def _get_session_async_key(self) -> str:
152
+ """Generate unique session key for this configuration."""
153
+ key_parts = []
154
+ if self.institution:
155
+ key_parts.append(self.institution.replace(" ", "_"))
156
+ if self.username:
157
+ key_parts.append(self.username)
158
+ return "_".join(key_parts) or "default"
159
+
160
+ def _load_session(self) -> None:
161
+ """Load existing session from cache."""
162
+ if self.session_file.exists():
163
+ try:
164
+ with open(self.session_file, "r") as f:
165
+ data = json.load(f)
166
+
167
+ # Check if session is expired
168
+ expiry_str = data.get("expiry")
169
+ if expiry_str:
170
+ expiry = datetime.fromisoformat(expiry_str)
171
+ if expiry > datetime.now():
172
+ self._cookies = data.get("cookies", {})
173
+ self._full_cookies = data.get("full_cookies", [])
174
+ self._session_expiry = expiry
175
+ self._saml_attributes = data.get("saml_attributes", {})
176
+ logger.info(f"{self.name}: Loaded existing Shibboleth session")
177
+ else:
178
+ logger.info(f"{self.name}: Existing Shibboleth session expired")
179
+ self.session_file.unlink()
180
+ except Exception as e:
181
+ logger.warning(f"Failed to load session: {e}")
182
+
183
+ def _save_session_async(self) -> None:
184
+ """Save current session to cache."""
185
+ if self._cookies and self._session_expiry:
186
+ try:
187
+ data = {
188
+ "cookies": self._cookies,
189
+ "full_cookies": self._full_cookies,
190
+ "expiry": self._session_expiry.isoformat(),
191
+ "institution": self.institution,
192
+ "username": self.username,
193
+ "saml_attributes": self._saml_attributes,
194
+ }
195
+ with open(self.session_file, "w") as f:
196
+ json.dump(data, f, indent=2)
197
+ logger.info(f"{self.name}: Saved Shibboleth session")
198
+ except Exception as e:
199
+ logger.warning(f"Failed to save session: {e}")
200
+
201
+ async def authenticate_async(self, force: bool = False, **kwargs) -> dict:
202
+ """
203
+ Authenticate with Shibboleth and return session data.
204
+
205
+ The Shibboleth authentication flow typically involves:
206
+ 1. Accessing a protected resource
207
+ 2. Redirect to WAYF (Where Are You From) service
208
+ 3. Selecting institution
209
+ 4. Redirect to institution's IdP
210
+ 5. Authentication at IdP
211
+ 6. SAML assertion sent back to Service Provider
212
+ 7. Access granted to resource
213
+
214
+ Args:
215
+ force: Force re-authentication even if session exists
216
+ **kwargs: Additional parameters (e.g., resource_url)
217
+
218
+ Returns:
219
+ Dictionary containing session cookies and SAML attributes
220
+
221
+ Raises:
222
+ ShibbolethError: If authentication fails
223
+ """
224
+ if async_playwright is None:
225
+ raise ShibbolethError(
226
+ "Playwright is required for Shibboleth authentication. "
227
+ "Install with: pip install playwright && playwright install chromium"
228
+ )
229
+
230
+ # Check existing session
231
+ if not force and await self.is_authenticate_async():
232
+ logger.info(f"{self.name}: Using existing Shibboleth session")
233
+ return {
234
+ "cookies": self._cookies,
235
+ "full_cookies": self._full_cookies,
236
+ "saml_attributes": self._saml_attributes,
237
+ }
238
+
239
+ # Get resource URL to access (triggers Shibboleth flow)
240
+ resource_url = kwargs.get("resource_url", "https://www.nature.com/siteindex")
241
+
242
+ logger.info(f"Authenticating with Shibboleth for {self.institution or 'institution'}")
243
+
244
+ async with async_playwright() as p:
245
+ browser = await p.chromium.launch(
246
+ headless=not self.debug_mode,
247
+ args=['--disable-blink-features=AutomationControlled']
248
+ )
249
+
250
+ try:
251
+ context = await browser.new_context(
252
+ user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
253
+ )
254
+ page = await context.new_page()
255
+
256
+ # Step 1: Access protected resource
257
+ await page.goto(resource_url, wait_until="networkidle")
258
+
259
+ # Step 2: Look for institutional login option
260
+ login_found = await self._find_institutional_login_async(page)
261
+
262
+ if login_found:
263
+ # Click institutional login
264
+ await login_found.click()
265
+ await page.wait_for_load_state("networkidle")
266
+
267
+ # Step 3: Handle WAYF/Discovery Service
268
+ wayf_handled = await self._handle_wayf_selection_async(page)
269
+
270
+ if not wayf_handled and not self.idp_url:
271
+ raise ShibbolethError("Could not find institution selection page")
272
+
273
+ # Step 4: Handle IdP login
274
+ if self.idp_url and page.url.startswith(self.idp_url):
275
+ await self._handle_idp_login_async(page)
276
+ else:
277
+ # Try to detect and handle IdP automatically
278
+ await self._handle_idp_login_async(page)
279
+
280
+ # Step 5: Wait for redirect back to resource
281
+ try:
282
+ await page.wait_for_function(
283
+ f"""() => {{
284
+ return !window.location.href.includes('idp') &&
285
+ !window.location.href.includes('wayf') &&
286
+ !window.location.href.includes('discovery');
287
+ }}""",
288
+ timeout=30000
289
+ )
290
+ except:
291
+ # Continue anyway - might still be authenticate_async
292
+ pass
293
+
294
+ # Extract cookies and SAML attributes
295
+ cookies = await context.cookies()
296
+
297
+ # Try to extract SAML attributes from page or headers
298
+ self._saml_attributes = await self._extract_saml_attributes_async(page)
299
+
300
+ # Convert cookies
301
+ self._cookies = {c["name"]: c["value"] for c in cookies}
302
+ self._full_cookies = cookies
303
+
304
+ # Set session expiry (typically 8-12 hours for Shibboleth)
305
+ self._session_expiry = datetime.now() + timedelta(hours=8)
306
+
307
+ # Save session
308
+ self._save_session_async()
309
+
310
+ logger.info(f"{self.name}: Shibboleth authentication successful")
311
+ return {
312
+ "cookies": self._cookies,
313
+ "full_cookies": self._full_cookies,
314
+ "saml_attributes": self._saml_attributes,
315
+ }
316
+
317
+ except Exception as e:
318
+ logger.error(f"Shibboleth authentication failed: {e}")
319
+ raise ShibbolethError(f"Authentication failed: {str(e)}")
320
+ finally:
321
+ await browser.close()
322
+
323
+ async def _find_institutional_login_async(self, page: Page) -> Optional[Any]:
324
+ """Find and return institutional login link/button."""
325
+ selectors = [
326
+ "a:has-text('Institutional')",
327
+ "a:has-text('Institution')",
328
+ "a:has-text('Shibboleth')",
329
+ "a:has-text('Federation')",
330
+ "a:has-text('Access through your institution')",
331
+ "button:has-text('Institutional')",
332
+ "a[href*='shibboleth']",
333
+ "a[href*='wayf']",
334
+ "a[href*='idp']",
335
+ ]
336
+
337
+ for selector in selectors:
338
+ element = await page.query_selector(selector)
339
+ if element and await element.is_visible():
340
+ logger.debug(f"Found institutional login: {selector}")
341
+ return element
342
+
343
+ return None
344
+
345
+ async def _handle_wayf_selection_async(self, page: Page) -> bool:
346
+ """Handle WAYF/Discovery Service institution selection."""
347
+ # Check if we're on a WAYF page
348
+ wayf_indicators = ["wayf", "discovery", "ds.", "where are you from"]
349
+ current_url = page.url.lower()
350
+ page_content = await page.content()
351
+
352
+ is_wayf = any(ind in current_url for ind in wayf_indicators) or \
353
+ any(ind in page_content.lower() for ind in wayf_indicators)
354
+
355
+ if not is_wayf:
356
+ return False
357
+
358
+ logger.info(f"{self.name}: Detected WAYF/Discovery Service page")
359
+
360
+ # Try to find institution selector
361
+ if self.institution:
362
+ # Search for institution in dropdown/list
363
+ selectors = [
364
+ f"option:has-text('{self.institution}')",
365
+ f"a:has-text('{self.institution}')",
366
+ f"li:has-text('{self.institution}')",
367
+ ]
368
+
369
+ for selector in selectors:
370
+ element = await page.query_selector(selector)
371
+ if element:
372
+ # If it's an option, select it
373
+ if await element.evaluate("el => el.tagName") == "OPTION":
374
+ select = await element.evaluate_handle("el => el.parentElement")
375
+ await select.select_option(value=await element.get_attribute("value"))
376
+ # Find and click submit button
377
+ submit = await page.query_selector("button[type='submit'], input[type='submit']")
378
+ if submit:
379
+ await submit.click()
380
+ else:
381
+ # Direct click
382
+ await element.click()
383
+
384
+ await page.wait_for_load_state("networkidle")
385
+ return True
386
+
387
+ # If automated selection fails, might need manual intervention
388
+ if self.debug_mode:
389
+ logger.info(f"{self.name}: Please select your institution manually")
390
+ await asyncio.sleep(30) # Give time for manual selection
391
+
392
+ return False
393
+
394
+ async def _handle_idp_login_async(self, page: Page) -> None:
395
+ """Handle login at the Identity Provider."""
396
+ logger.info(f"{self.name}: Handling IdP login page")
397
+
398
+ # Get credentials
399
+ if not self.username:
400
+ self.username = input("Shibboleth username: ")
401
+ if not self.password:
402
+ import getpass
403
+ self.password = getpass.getpass("Shibboleth password: ")
404
+
405
+ # Try each username field pattern
406
+ username_filled = False
407
+ for selector in self.idp_patterns["username_field"]:
408
+ field = await page.query_selector(selector)
409
+ if field and await field.is_visible():
410
+ await field.fill(self.username)
411
+ username_filled = True
412
+ break
413
+
414
+ if not username_filled:
415
+ raise ShibbolethError("Could not find username field")
416
+
417
+ # Try each password field pattern
418
+ password_filled = False
419
+ for selector in self.idp_patterns["password_field"]:
420
+ field = await page.query_selector(selector)
421
+ if field and await field.is_visible():
422
+ await field.fill(self.password)
423
+ password_filled = True
424
+ break
425
+
426
+ if not password_filled:
427
+ raise ShibbolethError("Could not find password field")
428
+
429
+ # Try each submit button pattern
430
+ for selector in self.idp_patterns["submit_button"]:
431
+ button = await page.query_selector(selector)
432
+ if button and await button.is_visible():
433
+ await button.click()
434
+ break
435
+
436
+ # Wait for authentication to complete
437
+ await page.wait_for_load_state("networkidle")
438
+
439
+ async def _extract_saml_attributes_async(self, page: Page) -> Dict[str, Any]:
440
+ """Try to extract SAML attributes from the page."""
441
+ attributes = {}
442
+
443
+ try:
444
+ # Some SPs expose attributes in meta tags
445
+ meta_tags = await page.query_selector_all("meta[name^='shib-']")
446
+ for tag in meta_tags:
447
+ name = await tag.get_attribute("name")
448
+ content = await tag.get_attribute("content")
449
+ if name and content:
450
+ attr_name = name.replace("shib-", "")
451
+ attributes[attr_name] = content
452
+
453
+ # Check for common attribute patterns in page
454
+ if not attributes:
455
+ # Try to find email/eppn
456
+ email_pattern = r'[\w._%+-]+@[\w.-]+\.[a-zA-Z]{2,}'
457
+ page_text = await page.text_content()
458
+ if page_text:
459
+ emails = re.findall(email_pattern, page_text)
460
+ if emails and self.username in emails[0]:
461
+ attributes["eppn"] = emails[0]
462
+
463
+ except Exception as e:
464
+ logger.debug(f"Could not extract SAML attributes: {e}")
465
+
466
+ return attributes
467
+
468
+ async def is_authenticate_async(self, verify_live: bool = False) -> bool:
469
+ """
470
+ Check if we have a valid authenticate_async session.
471
+
472
+ Args:
473
+ verify_live: If True, performs a live check
474
+
475
+ Returns:
476
+ True if authenticate_async, False otherwise
477
+ """
478
+ # Check if we have session data
479
+ if not self._cookies or not self._session_expiry:
480
+ return False
481
+
482
+ # Check if session is expired
483
+ if datetime.now() > self._session_expiry:
484
+ logger.info(f"{self.name}: Shibboleth session expired")
485
+ return False
486
+
487
+ # If requested, verify session is still valid
488
+ if verify_live:
489
+ try:
490
+ async with async_playwright() as p:
491
+ browser = await p.chromium.launch(headless=True)
492
+ context = await browser.new_context()
493
+
494
+ # Add cookies
495
+ await context.add_cookies(self._full_cookies)
496
+
497
+ page = await context.new_page()
498
+
499
+ # Try to access a protected resource
500
+ test_url = "https://www.nature.com/nature"
501
+ response = await page.goto(test_url, wait_until="networkidle")
502
+
503
+ # Check if we're redirected to login
504
+ is_valid = not any(
505
+ ind in page.url.lower()
506
+ for ind in ["login", "wayf", "idp", "shibboleth"]
507
+ )
508
+
509
+ await browser.close()
510
+
511
+ if not is_valid:
512
+ logger.info(f"{self.name}: Shibboleth session no longer valid")
513
+ self._cookies = {}
514
+ self._full_cookies = []
515
+ self._session_expiry = None
516
+
517
+ return is_valid
518
+
519
+ except Exception as e:
520
+ logger.warn(f"Failed to verify session: {e}")
521
+ return False
522
+
523
+ return True
524
+
525
+ async def get_auth_headers_async(self) -> Dict[str, str]:
526
+ """
527
+ Get authentication headers.
528
+
529
+ Shibboleth typically uses cookies rather than headers,
530
+ but some SPs may use additional headers.
531
+ """
532
+ headers = {}
533
+
534
+ # Some Shibboleth deployments use these headers
535
+ if self._saml_attributes:
536
+ if "eppn" in self._saml_attributes:
537
+ headers["X-Shibboleth-eppn"] = self._saml_attributes["eppn"]
538
+ if "affiliation" in self._saml_attributes:
539
+ headers["X-Shibboleth-affiliation"] = self._saml_attributes[
540
+ "affiliation"
541
+ ]
542
+
543
+ return headers
544
+
545
+ async def get_auth_cookies_async(self) -> List[Dict[str, Any]]:
546
+ """Get authentication cookies."""
547
+ if not await self.is_authenticate_async():
548
+ raise ShibbolethError("Not authenticate_async")
549
+ return self._full_cookies
550
+
551
+ async def logout_async(self) -> None:
552
+ """
553
+ Log out and clear authentication state.
554
+
555
+ Note: Shibboleth logout_async is complex as it involves:
556
+ - Local application logout_async
557
+ - IdP logout_async
558
+ - Optional Single Logout (SLO) to all SPs
559
+ """
560
+ self._cookies = {}
561
+ self._full_cookies = []
562
+ self._session_expiry = None
563
+ self._saml_attributes = {}
564
+
565
+ # Remove session file
566
+ if self.session_file.exists():
567
+ self.session_file.unlink()
568
+
569
+ logger.info(f"{self.name}: Logged out from Shibboleth")
570
+
571
+ async def get_session_info_async(self) -> Dict[str, Any]:
572
+ """Get information about current session."""
573
+ is_authenticate_async = await self.is_authenticate_async()
574
+
575
+ return {
576
+ "authenticate_async": is_authenticate_async,
577
+ "provider": "Shibboleth",
578
+ "institution": self.institution,
579
+ "username": self.username,
580
+ "idp_url": self.idp_url,
581
+ "entity_id": self.entity_id,
582
+ "saml_attributes": self._saml_attributes,
583
+ "session_expiry": self._session_expiry.isoformat() if self._session_expiry else None,
584
+ "cookies_count": len(self._cookies),
585
+ }
586
+
587
+ def detect_shibboleth_sp(self, url: str) -> Optional[Dict[str, str]]:
588
+ """
589
+ Detect if a URL is protected by Shibboleth.
590
+
591
+ Args:
592
+ url: URL to check
593
+
594
+ Returns:
595
+ Dictionary with SP information if detected, None otherwise
596
+ """
597
+ parsed = urlparse(url)
598
+ domain = parsed.netloc
599
+
600
+ # Common Shibboleth SP paths
601
+ shibboleth_paths = [
602
+ "/Shibboleth.sso",
603
+ "/shibboleth",
604
+ "/saml",
605
+ "/idp",
606
+ "/wayf",
607
+ "/ds", # Discovery Service
608
+ ]
609
+
610
+ # Check for common Shibboleth indicators
611
+ indicators = {
612
+ "jstor.org": {
613
+ "sp_type": "jstor",
614
+ "wayf": "https://www.jstor.org/wayf",
615
+ },
616
+ "projectmuse.org": {
617
+ "sp_type": "muse",
618
+ "wayf": "https://muse.jhu.edu/wayf",
619
+ },
620
+ "ebscohost.com": {
621
+ "sp_type": "ebsco",
622
+ "wayf": "https://search.ebscohost.com/wayf",
623
+ },
624
+ "ieee.org": {
625
+ "sp_type": "ieee",
626
+ "wayf": "https://ieeexplore.ieee.org/servlet/wayf",
627
+ },
628
+ "sciencedirect.com": {
629
+ "sp_type": "elsevier",
630
+ "wayf": "https://www.sciencedirect.com/wayf",
631
+ },
632
+ }
633
+
634
+ for domain_pattern, info in indicators.items():
635
+ if domain_pattern in domain:
636
+ return info
637
+
638
+ return None
639
+
640
+ def get_wayf_url(self, sp_url: str) -> Optional[str]:
641
+ """
642
+ Get the WAYF (Where Are You From) URL for a Service Provider.
643
+
644
+ Args:
645
+ sp_url: Service Provider URL
646
+
647
+ Returns:
648
+ WAYF URL if known, None otherwise
649
+ """
650
+ sp_info = self.detect_shibboleth_sp(sp_url)
651
+ if sp_info and "wayf" in sp_info:
652
+ return sp_info["wayf"]
653
+
654
+ # Return generic WAYF URL based on region
655
+ # This would need to be configured based on user's location
656
+ return self.wayf_urls[0] # Default to first WAYF
657
+
658
+ async def create_authenticate_async_browser(self) -> tuple[Browser, Any]:
659
+ """
660
+ Create a browser instance with Shibboleth authentication.
661
+
662
+ Returns:
663
+ Tuple of (browser, context) with authentication cookies set
664
+ """
665
+ if not await self.is_authenticate_async():
666
+ await self.authenticate_async()
667
+
668
+ if async_playwright is None:
669
+ raise ShibbolethError("Playwright is required")
670
+
671
+ p = await async_playwright().start()
672
+ browser = await p.chromium.launch(
673
+ headless=not self.debug_mode,
674
+ args=['--disable-blink-features=AutomationControlled']
675
+ )
676
+
677
+ context = await browser.new_context(
678
+ user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
679
+ )
680
+
681
+ # Add authentication cookies
682
+ await context.add_cookies(self._full_cookies)
683
+
684
+ return browser, context
685
+
686
+ # EOF