scitex 2.0.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +73 -0
- scitex/__main__.py +89 -0
- scitex/__version__.py +14 -0
- scitex/_sh.py +59 -0
- scitex/ai/_LearningCurveLogger.py +583 -0
- scitex/ai/__Classifiers.py +101 -0
- scitex/ai/__init__.py +55 -0
- scitex/ai/_gen_ai/_Anthropic.py +173 -0
- scitex/ai/_gen_ai/_BaseGenAI.py +336 -0
- scitex/ai/_gen_ai/_DeepSeek.py +175 -0
- scitex/ai/_gen_ai/_Google.py +161 -0
- scitex/ai/_gen_ai/_Groq.py +97 -0
- scitex/ai/_gen_ai/_Llama.py +142 -0
- scitex/ai/_gen_ai/_OpenAI.py +230 -0
- scitex/ai/_gen_ai/_PARAMS.py +565 -0
- scitex/ai/_gen_ai/_Perplexity.py +191 -0
- scitex/ai/_gen_ai/__init__.py +32 -0
- scitex/ai/_gen_ai/_calc_cost.py +78 -0
- scitex/ai/_gen_ai/_format_output_func.py +183 -0
- scitex/ai/_gen_ai/_genai_factory.py +71 -0
- scitex/ai/act/__init__.py +8 -0
- scitex/ai/act/_define.py +11 -0
- scitex/ai/classification/__init__.py +7 -0
- scitex/ai/classification/classification_reporter.py +1137 -0
- scitex/ai/classification/classifier_server.py +131 -0
- scitex/ai/classification/classifiers.py +101 -0
- scitex/ai/classification_reporter.py +1161 -0
- scitex/ai/classifier_server.py +131 -0
- scitex/ai/clustering/__init__.py +11 -0
- scitex/ai/clustering/_pca.py +115 -0
- scitex/ai/clustering/_umap.py +376 -0
- scitex/ai/early_stopping.py +149 -0
- scitex/ai/feature_extraction/__init__.py +56 -0
- scitex/ai/feature_extraction/vit.py +148 -0
- scitex/ai/genai/__init__.py +277 -0
- scitex/ai/genai/anthropic.py +177 -0
- scitex/ai/genai/anthropic_provider.py +320 -0
- scitex/ai/genai/anthropic_refactored.py +109 -0
- scitex/ai/genai/auth_manager.py +200 -0
- scitex/ai/genai/base_genai.py +336 -0
- scitex/ai/genai/base_provider.py +291 -0
- scitex/ai/genai/calc_cost.py +78 -0
- scitex/ai/genai/chat_history.py +307 -0
- scitex/ai/genai/cost_tracker.py +276 -0
- scitex/ai/genai/deepseek.py +188 -0
- scitex/ai/genai/deepseek_provider.py +251 -0
- scitex/ai/genai/format_output_func.py +183 -0
- scitex/ai/genai/genai_factory.py +71 -0
- scitex/ai/genai/google.py +169 -0
- scitex/ai/genai/google_provider.py +228 -0
- scitex/ai/genai/groq.py +104 -0
- scitex/ai/genai/groq_provider.py +248 -0
- scitex/ai/genai/image_processor.py +250 -0
- scitex/ai/genai/llama.py +155 -0
- scitex/ai/genai/llama_provider.py +214 -0
- scitex/ai/genai/mock_provider.py +127 -0
- scitex/ai/genai/model_registry.py +304 -0
- scitex/ai/genai/openai.py +230 -0
- scitex/ai/genai/openai_provider.py +293 -0
- scitex/ai/genai/params.py +565 -0
- scitex/ai/genai/perplexity.py +202 -0
- scitex/ai/genai/perplexity_provider.py +205 -0
- scitex/ai/genai/provider_base.py +302 -0
- scitex/ai/genai/provider_factory.py +370 -0
- scitex/ai/genai/response_handler.py +235 -0
- scitex/ai/layer/_Pass.py +21 -0
- scitex/ai/layer/__init__.py +10 -0
- scitex/ai/layer/_switch.py +8 -0
- scitex/ai/loss/_L1L2Losses.py +34 -0
- scitex/ai/loss/__init__.py +12 -0
- scitex/ai/loss/multi_task_loss.py +47 -0
- scitex/ai/metrics/__init__.py +9 -0
- scitex/ai/metrics/_bACC.py +51 -0
- scitex/ai/metrics/silhoute_score_block.py +496 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ai/optim/__init__.py +13 -0
- scitex/ai/optim/_get_set.py +31 -0
- scitex/ai/optim/_optimizers.py +71 -0
- scitex/ai/plt/__init__.py +21 -0
- scitex/ai/plt/_conf_mat.py +592 -0
- scitex/ai/plt/_learning_curve.py +194 -0
- scitex/ai/plt/_optuna_study.py +111 -0
- scitex/ai/plt/aucs/__init__.py +2 -0
- scitex/ai/plt/aucs/example.py +60 -0
- scitex/ai/plt/aucs/pre_rec_auc.py +223 -0
- scitex/ai/plt/aucs/roc_auc.py +246 -0
- scitex/ai/sampling/undersample.py +29 -0
- scitex/ai/sk/__init__.py +11 -0
- scitex/ai/sk/_clf.py +58 -0
- scitex/ai/sk/_to_sktime.py +100 -0
- scitex/ai/sklearn/__init__.py +26 -0
- scitex/ai/sklearn/clf.py +58 -0
- scitex/ai/sklearn/to_sktime.py +100 -0
- scitex/ai/training/__init__.py +7 -0
- scitex/ai/training/early_stopping.py +150 -0
- scitex/ai/training/learning_curve_logger.py +555 -0
- scitex/ai/utils/__init__.py +22 -0
- scitex/ai/utils/_check_params.py +50 -0
- scitex/ai/utils/_default_dataset.py +46 -0
- scitex/ai/utils/_format_samples_for_sktime.py +26 -0
- scitex/ai/utils/_label_encoder.py +134 -0
- scitex/ai/utils/_merge_labels.py +22 -0
- scitex/ai/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ai/utils/_under_sample.py +51 -0
- scitex/ai/utils/_verify_n_gpus.py +16 -0
- scitex/ai/utils/grid_search.py +148 -0
- scitex/context/__init__.py +9 -0
- scitex/context/_suppress_output.py +38 -0
- scitex/db/_BaseMixins/_BaseBackupMixin.py +30 -0
- scitex/db/_BaseMixins/_BaseBatchMixin.py +31 -0
- scitex/db/_BaseMixins/_BaseBlobMixin.py +81 -0
- scitex/db/_BaseMixins/_BaseConnectionMixin.py +43 -0
- scitex/db/_BaseMixins/_BaseImportExportMixin.py +39 -0
- scitex/db/_BaseMixins/_BaseIndexMixin.py +29 -0
- scitex/db/_BaseMixins/_BaseMaintenanceMixin.py +33 -0
- scitex/db/_BaseMixins/_BaseQueryMixin.py +52 -0
- scitex/db/_BaseMixins/_BaseRowMixin.py +32 -0
- scitex/db/_BaseMixins/_BaseSchemaMixin.py +44 -0
- scitex/db/_BaseMixins/_BaseTableMixin.py +66 -0
- scitex/db/_BaseMixins/_BaseTransactionMixin.py +52 -0
- scitex/db/_BaseMixins/__init__.py +30 -0
- scitex/db/_PostgreSQL.py +126 -0
- scitex/db/_PostgreSQLMixins/_BackupMixin.py +166 -0
- scitex/db/_PostgreSQLMixins/_BatchMixin.py +82 -0
- scitex/db/_PostgreSQLMixins/_BlobMixin.py +231 -0
- scitex/db/_PostgreSQLMixins/_ConnectionMixin.py +92 -0
- scitex/db/_PostgreSQLMixins/_ImportExportMixin.py +59 -0
- scitex/db/_PostgreSQLMixins/_IndexMixin.py +64 -0
- scitex/db/_PostgreSQLMixins/_MaintenanceMixin.py +175 -0
- scitex/db/_PostgreSQLMixins/_QueryMixin.py +108 -0
- scitex/db/_PostgreSQLMixins/_RowMixin.py +75 -0
- scitex/db/_PostgreSQLMixins/_SchemaMixin.py +126 -0
- scitex/db/_PostgreSQLMixins/_TableMixin.py +176 -0
- scitex/db/_PostgreSQLMixins/_TransactionMixin.py +57 -0
- scitex/db/_PostgreSQLMixins/__init__.py +34 -0
- scitex/db/_SQLite3.py +2136 -0
- scitex/db/_SQLite3Mixins/_BatchMixin.py +243 -0
- scitex/db/_SQLite3Mixins/_BlobMixin.py +229 -0
- scitex/db/_SQLite3Mixins/_ConnectionMixin.py +108 -0
- scitex/db/_SQLite3Mixins/_ImportExportMixin.py +80 -0
- scitex/db/_SQLite3Mixins/_IndexMixin.py +32 -0
- scitex/db/_SQLite3Mixins/_MaintenanceMixin.py +176 -0
- scitex/db/_SQLite3Mixins/_QueryMixin.py +83 -0
- scitex/db/_SQLite3Mixins/_RowMixin.py +75 -0
- scitex/db/_SQLite3Mixins/_TableMixin.py +183 -0
- scitex/db/_SQLite3Mixins/_TransactionMixin.py +71 -0
- scitex/db/_SQLite3Mixins/__init__.py +30 -0
- scitex/db/__init__.py +14 -0
- scitex/db/_delete_duplicates.py +397 -0
- scitex/db/_inspect.py +163 -0
- scitex/decorators/__init__.py +54 -0
- scitex/decorators/_auto_order.py +172 -0
- scitex/decorators/_batch_fn.py +127 -0
- scitex/decorators/_cache_disk.py +32 -0
- scitex/decorators/_cache_mem.py +12 -0
- scitex/decorators/_combined.py +98 -0
- scitex/decorators/_converters.py +282 -0
- scitex/decorators/_deprecated.py +26 -0
- scitex/decorators/_not_implemented.py +30 -0
- scitex/decorators/_numpy_fn.py +86 -0
- scitex/decorators/_pandas_fn.py +121 -0
- scitex/decorators/_preserve_doc.py +19 -0
- scitex/decorators/_signal_fn.py +95 -0
- scitex/decorators/_timeout.py +55 -0
- scitex/decorators/_torch_fn.py +136 -0
- scitex/decorators/_wrap.py +39 -0
- scitex/decorators/_xarray_fn.py +88 -0
- scitex/dev/__init__.py +15 -0
- scitex/dev/_analyze_code_flow.py +284 -0
- scitex/dev/_reload.py +59 -0
- scitex/dict/_DotDict.py +442 -0
- scitex/dict/__init__.py +18 -0
- scitex/dict/_listed_dict.py +42 -0
- scitex/dict/_pop_keys.py +36 -0
- scitex/dict/_replace.py +13 -0
- scitex/dict/_safe_merge.py +62 -0
- scitex/dict/_to_str.py +32 -0
- scitex/dsp/__init__.py +72 -0
- scitex/dsp/_crop.py +122 -0
- scitex/dsp/_demo_sig.py +331 -0
- scitex/dsp/_detect_ripples.py +212 -0
- scitex/dsp/_ensure_3d.py +18 -0
- scitex/dsp/_hilbert.py +78 -0
- scitex/dsp/_listen.py +702 -0
- scitex/dsp/_misc.py +30 -0
- scitex/dsp/_mne.py +32 -0
- scitex/dsp/_modulation_index.py +79 -0
- scitex/dsp/_pac.py +319 -0
- scitex/dsp/_psd.py +102 -0
- scitex/dsp/_resample.py +65 -0
- scitex/dsp/_time.py +36 -0
- scitex/dsp/_transform.py +68 -0
- scitex/dsp/_wavelet.py +212 -0
- scitex/dsp/add_noise.py +111 -0
- scitex/dsp/example.py +253 -0
- scitex/dsp/filt.py +155 -0
- scitex/dsp/norm.py +18 -0
- scitex/dsp/params.py +51 -0
- scitex/dsp/reference.py +43 -0
- scitex/dsp/template.py +25 -0
- scitex/dsp/utils/__init__.py +15 -0
- scitex/dsp/utils/_differential_bandpass_filters.py +120 -0
- scitex/dsp/utils/_ensure_3d.py +18 -0
- scitex/dsp/utils/_ensure_even_len.py +10 -0
- scitex/dsp/utils/_zero_pad.py +48 -0
- scitex/dsp/utils/filter.py +408 -0
- scitex/dsp/utils/pac.py +177 -0
- scitex/dt/__init__.py +8 -0
- scitex/dt/_linspace.py +130 -0
- scitex/etc/__init__.py +15 -0
- scitex/etc/wait_key.py +34 -0
- scitex/gen/_DimHandler.py +196 -0
- scitex/gen/_TimeStamper.py +244 -0
- scitex/gen/__init__.py +95 -0
- scitex/gen/_alternate_kwarg.py +13 -0
- scitex/gen/_cache.py +11 -0
- scitex/gen/_check_host.py +34 -0
- scitex/gen/_ci.py +12 -0
- scitex/gen/_close.py +222 -0
- scitex/gen/_embed.py +78 -0
- scitex/gen/_inspect_module.py +257 -0
- scitex/gen/_is_ipython.py +12 -0
- scitex/gen/_less.py +48 -0
- scitex/gen/_list_packages.py +139 -0
- scitex/gen/_mat2py.py +88 -0
- scitex/gen/_norm.py +170 -0
- scitex/gen/_paste.py +18 -0
- scitex/gen/_print_config.py +84 -0
- scitex/gen/_shell.py +48 -0
- scitex/gen/_src.py +111 -0
- scitex/gen/_start.py +451 -0
- scitex/gen/_symlink.py +55 -0
- scitex/gen/_symlog.py +27 -0
- scitex/gen/_tee.py +238 -0
- scitex/gen/_title2path.py +60 -0
- scitex/gen/_title_case.py +88 -0
- scitex/gen/_to_even.py +84 -0
- scitex/gen/_to_odd.py +34 -0
- scitex/gen/_to_rank.py +39 -0
- scitex/gen/_transpose.py +37 -0
- scitex/gen/_type.py +78 -0
- scitex/gen/_var_info.py +73 -0
- scitex/gen/_wrap.py +17 -0
- scitex/gen/_xml2dict.py +76 -0
- scitex/gen/misc.py +730 -0
- scitex/gen/path.py +0 -0
- scitex/general/__init__.py +5 -0
- scitex/gists/_SigMacro_processFigure_S.py +128 -0
- scitex/gists/_SigMacro_toBlue.py +172 -0
- scitex/gists/__init__.py +12 -0
- scitex/io/_H5Explorer.py +292 -0
- scitex/io/__init__.py +82 -0
- scitex/io/_cache.py +101 -0
- scitex/io/_flush.py +24 -0
- scitex/io/_glob.py +103 -0
- scitex/io/_json2md.py +113 -0
- scitex/io/_load.py +168 -0
- scitex/io/_load_configs.py +146 -0
- scitex/io/_load_modules/__init__.py +38 -0
- scitex/io/_load_modules/_catboost.py +66 -0
- scitex/io/_load_modules/_con.py +20 -0
- scitex/io/_load_modules/_db.py +24 -0
- scitex/io/_load_modules/_docx.py +42 -0
- scitex/io/_load_modules/_eeg.py +110 -0
- scitex/io/_load_modules/_hdf5.py +196 -0
- scitex/io/_load_modules/_image.py +19 -0
- scitex/io/_load_modules/_joblib.py +19 -0
- scitex/io/_load_modules/_json.py +18 -0
- scitex/io/_load_modules/_markdown.py +103 -0
- scitex/io/_load_modules/_matlab.py +37 -0
- scitex/io/_load_modules/_numpy.py +39 -0
- scitex/io/_load_modules/_optuna.py +155 -0
- scitex/io/_load_modules/_pandas.py +69 -0
- scitex/io/_load_modules/_pdf.py +31 -0
- scitex/io/_load_modules/_pickle.py +24 -0
- scitex/io/_load_modules/_torch.py +16 -0
- scitex/io/_load_modules/_txt.py +126 -0
- scitex/io/_load_modules/_xml.py +49 -0
- scitex/io/_load_modules/_yaml.py +23 -0
- scitex/io/_mv_to_tmp.py +19 -0
- scitex/io/_path.py +286 -0
- scitex/io/_reload.py +78 -0
- scitex/io/_save.py +539 -0
- scitex/io/_save_modules/__init__.py +66 -0
- scitex/io/_save_modules/_catboost.py +22 -0
- scitex/io/_save_modules/_csv.py +89 -0
- scitex/io/_save_modules/_excel.py +49 -0
- scitex/io/_save_modules/_hdf5.py +249 -0
- scitex/io/_save_modules/_html.py +48 -0
- scitex/io/_save_modules/_image.py +140 -0
- scitex/io/_save_modules/_joblib.py +25 -0
- scitex/io/_save_modules/_json.py +25 -0
- scitex/io/_save_modules/_listed_dfs_as_csv.py +57 -0
- scitex/io/_save_modules/_listed_scalars_as_csv.py +42 -0
- scitex/io/_save_modules/_matlab.py +24 -0
- scitex/io/_save_modules/_mp4.py +29 -0
- scitex/io/_save_modules/_numpy.py +57 -0
- scitex/io/_save_modules/_optuna_study_as_csv_and_pngs.py +38 -0
- scitex/io/_save_modules/_pickle.py +45 -0
- scitex/io/_save_modules/_plotly.py +27 -0
- scitex/io/_save_modules/_text.py +23 -0
- scitex/io/_save_modules/_torch.py +26 -0
- scitex/io/_save_modules/_yaml.py +29 -0
- scitex/life/__init__.py +10 -0
- scitex/life/_monitor_rain.py +49 -0
- scitex/linalg/__init__.py +17 -0
- scitex/linalg/_distance.py +63 -0
- scitex/linalg/_geometric_median.py +64 -0
- scitex/linalg/_misc.py +73 -0
- scitex/nn/_AxiswiseDropout.py +27 -0
- scitex/nn/_BNet.py +126 -0
- scitex/nn/_BNet_Res.py +164 -0
- scitex/nn/_ChannelGainChanger.py +44 -0
- scitex/nn/_DropoutChannels.py +50 -0
- scitex/nn/_Filters.py +489 -0
- scitex/nn/_FreqGainChanger.py +110 -0
- scitex/nn/_GaussianFilter.py +48 -0
- scitex/nn/_Hilbert.py +111 -0
- scitex/nn/_MNet_1000.py +157 -0
- scitex/nn/_ModulationIndex.py +221 -0
- scitex/nn/_PAC.py +414 -0
- scitex/nn/_PSD.py +40 -0
- scitex/nn/_ResNet1D.py +120 -0
- scitex/nn/_SpatialAttention.py +25 -0
- scitex/nn/_Spectrogram.py +161 -0
- scitex/nn/_SwapChannels.py +50 -0
- scitex/nn/_TransposeLayer.py +19 -0
- scitex/nn/_Wavelet.py +183 -0
- scitex/nn/__init__.py +63 -0
- scitex/os/__init__.py +8 -0
- scitex/os/_mv.py +50 -0
- scitex/parallel/__init__.py +8 -0
- scitex/parallel/_run.py +151 -0
- scitex/path/__init__.py +33 -0
- scitex/path/_clean.py +52 -0
- scitex/path/_find.py +108 -0
- scitex/path/_get_module_path.py +51 -0
- scitex/path/_get_spath.py +35 -0
- scitex/path/_getsize.py +18 -0
- scitex/path/_increment_version.py +87 -0
- scitex/path/_mk_spath.py +51 -0
- scitex/path/_path.py +19 -0
- scitex/path/_split.py +23 -0
- scitex/path/_this_path.py +19 -0
- scitex/path/_version.py +101 -0
- scitex/pd/__init__.py +41 -0
- scitex/pd/_find_indi.py +126 -0
- scitex/pd/_find_pval.py +113 -0
- scitex/pd/_force_df.py +154 -0
- scitex/pd/_from_xyz.py +71 -0
- scitex/pd/_ignore_SettingWithCopyWarning.py +34 -0
- scitex/pd/_melt_cols.py +81 -0
- scitex/pd/_merge_columns.py +221 -0
- scitex/pd/_mv.py +63 -0
- scitex/pd/_replace.py +62 -0
- scitex/pd/_round.py +93 -0
- scitex/pd/_slice.py +63 -0
- scitex/pd/_sort.py +91 -0
- scitex/pd/_to_numeric.py +53 -0
- scitex/pd/_to_xy.py +59 -0
- scitex/pd/_to_xyz.py +110 -0
- scitex/plt/__init__.py +36 -0
- scitex/plt/_subplots/_AxesWrapper.py +182 -0
- scitex/plt/_subplots/_AxisWrapper.py +249 -0
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +414 -0
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +896 -0
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +368 -0
- scitex/plt/_subplots/_AxisWrapperMixins/_TrackingMixin.py +185 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +16 -0
- scitex/plt/_subplots/_FigWrapper.py +226 -0
- scitex/plt/_subplots/_SubplotsWrapper.py +171 -0
- scitex/plt/_subplots/__init__.py +111 -0
- scitex/plt/_subplots/_export_as_csv.py +232 -0
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +61 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +90 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +49 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +46 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +39 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +125 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +72 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +34 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +36 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +79 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +59 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +32 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +79 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +75 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +64 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +44 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +70 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +66 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +95 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +67 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +52 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +46 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +46 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +46 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +46 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +44 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +103 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +82 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +58 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +117 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +30 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +51 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +93 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +94 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +92 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +65 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +59 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +58 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +45 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +70 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +75 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +75 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +155 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +64 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +77 -0
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +210 -0
- scitex/plt/_subplots/_export_as_csv_formatters/verify_formatters.py +342 -0
- scitex/plt/_subplots/_export_as_csv_formatters.py +115 -0
- scitex/plt/_tpl.py +28 -0
- scitex/plt/ax/__init__.py +114 -0
- scitex/plt/ax/_plot/__init__.py +53 -0
- scitex/plt/ax/_plot/_plot_circular_hist.py +124 -0
- scitex/plt/ax/_plot/_plot_conf_mat.py +136 -0
- scitex/plt/ax/_plot/_plot_cube.py +57 -0
- scitex/plt/ax/_plot/_plot_ecdf.py +84 -0
- scitex/plt/ax/_plot/_plot_fillv.py +55 -0
- scitex/plt/ax/_plot/_plot_heatmap.py +266 -0
- scitex/plt/ax/_plot/_plot_image.py +94 -0
- scitex/plt/ax/_plot/_plot_joyplot.py +76 -0
- scitex/plt/ax/_plot/_plot_raster.py +172 -0
- scitex/plt/ax/_plot/_plot_rectangle.py +69 -0
- scitex/plt/ax/_plot/_plot_scatter_hist.py +133 -0
- scitex/plt/ax/_plot/_plot_shaded_line.py +142 -0
- scitex/plt/ax/_plot/_plot_statistical_shaded_line.py +221 -0
- scitex/plt/ax/_plot/_plot_violin.py +343 -0
- scitex/plt/ax/_style/__init__.py +38 -0
- scitex/plt/ax/_style/_add_marginal_ax.py +44 -0
- scitex/plt/ax/_style/_add_panel.py +92 -0
- scitex/plt/ax/_style/_extend.py +64 -0
- scitex/plt/ax/_style/_force_aspect.py +37 -0
- scitex/plt/ax/_style/_format_label.py +23 -0
- scitex/plt/ax/_style/_hide_spines.py +84 -0
- scitex/plt/ax/_style/_map_ticks.py +182 -0
- scitex/plt/ax/_style/_rotate_labels.py +215 -0
- scitex/plt/ax/_style/_sci_note.py +279 -0
- scitex/plt/ax/_style/_set_log_scale.py +299 -0
- scitex/plt/ax/_style/_set_meta.py +261 -0
- scitex/plt/ax/_style/_set_n_ticks.py +37 -0
- scitex/plt/ax/_style/_set_size.py +16 -0
- scitex/plt/ax/_style/_set_supxyt.py +116 -0
- scitex/plt/ax/_style/_set_ticks.py +276 -0
- scitex/plt/ax/_style/_set_xyt.py +121 -0
- scitex/plt/ax/_style/_share_axes.py +264 -0
- scitex/plt/ax/_style/_shift.py +139 -0
- scitex/plt/ax/_style/_show_spines.py +333 -0
- scitex/plt/color/_PARAMS.py +70 -0
- scitex/plt/color/__init__.py +52 -0
- scitex/plt/color/_add_hue_col.py +41 -0
- scitex/plt/color/_colors.py +205 -0
- scitex/plt/color/_get_colors_from_cmap.py +134 -0
- scitex/plt/color/_interpolate.py +29 -0
- scitex/plt/color/_vizualize_colors.py +54 -0
- scitex/plt/utils/__init__.py +44 -0
- scitex/plt/utils/_calc_bacc_from_conf_mat.py +46 -0
- scitex/plt/utils/_calc_nice_ticks.py +101 -0
- scitex/plt/utils/_close.py +68 -0
- scitex/plt/utils/_colorbar.py +96 -0
- scitex/plt/utils/_configure_mpl.py +295 -0
- scitex/plt/utils/_histogram_utils.py +132 -0
- scitex/plt/utils/_im2grid.py +70 -0
- scitex/plt/utils/_is_valid_axis.py +78 -0
- scitex/plt/utils/_mk_colorbar.py +65 -0
- scitex/plt/utils/_mk_patches.py +26 -0
- scitex/plt/utils/_scientific_captions.py +638 -0
- scitex/plt/utils/_scitex_config.py +223 -0
- scitex/reproduce/__init__.py +14 -0
- scitex/reproduce/_fix_seeds.py +45 -0
- scitex/reproduce/_gen_ID.py +55 -0
- scitex/reproduce/_gen_timestamp.py +35 -0
- scitex/res/__init__.py +5 -0
- scitex/resource/__init__.py +13 -0
- scitex/resource/_get_processor_usages.py +281 -0
- scitex/resource/_get_specs.py +280 -0
- scitex/resource/_log_processor_usages.py +190 -0
- scitex/resource/_utils/__init__.py +31 -0
- scitex/resource/_utils/_get_env_info.py +481 -0
- scitex/resource/limit_ram.py +33 -0
- scitex/scholar/__init__.py +24 -0
- scitex/scholar/_local_search.py +454 -0
- scitex/scholar/_paper.py +244 -0
- scitex/scholar/_pdf_downloader.py +325 -0
- scitex/scholar/_search.py +393 -0
- scitex/scholar/_vector_search.py +370 -0
- scitex/scholar/_web_sources.py +457 -0
- scitex/stats/__init__.py +31 -0
- scitex/stats/_calc_partial_corr.py +17 -0
- scitex/stats/_corr_test_multi.py +94 -0
- scitex/stats/_corr_test_wrapper.py +115 -0
- scitex/stats/_describe_wrapper.py +90 -0
- scitex/stats/_multiple_corrections.py +63 -0
- scitex/stats/_nan_stats.py +93 -0
- scitex/stats/_p2stars.py +116 -0
- scitex/stats/_p2stars_wrapper.py +56 -0
- scitex/stats/_statistical_tests.py +73 -0
- scitex/stats/desc/__init__.py +40 -0
- scitex/stats/desc/_describe.py +189 -0
- scitex/stats/desc/_nan.py +289 -0
- scitex/stats/desc/_real.py +94 -0
- scitex/stats/multiple/__init__.py +14 -0
- scitex/stats/multiple/_bonferroni_correction.py +72 -0
- scitex/stats/multiple/_fdr_correction.py +400 -0
- scitex/stats/multiple/_multicompair.py +28 -0
- scitex/stats/tests/__corr_test.py +277 -0
- scitex/stats/tests/__corr_test_multi.py +343 -0
- scitex/stats/tests/__corr_test_single.py +277 -0
- scitex/stats/tests/__init__.py +22 -0
- scitex/stats/tests/_brunner_munzel_test.py +192 -0
- scitex/stats/tests/_nocorrelation_test.py +28 -0
- scitex/stats/tests/_smirnov_grubbs.py +98 -0
- scitex/str/__init__.py +113 -0
- scitex/str/_clean_path.py +75 -0
- scitex/str/_color_text.py +52 -0
- scitex/str/_decapitalize.py +58 -0
- scitex/str/_factor_out_digits.py +281 -0
- scitex/str/_format_plot_text.py +498 -0
- scitex/str/_grep.py +48 -0
- scitex/str/_latex.py +155 -0
- scitex/str/_latex_fallback.py +471 -0
- scitex/str/_mask_api.py +39 -0
- scitex/str/_mask_api_key.py +8 -0
- scitex/str/_parse.py +158 -0
- scitex/str/_print_block.py +47 -0
- scitex/str/_print_debug.py +68 -0
- scitex/str/_printc.py +62 -0
- scitex/str/_readable_bytes.py +38 -0
- scitex/str/_remove_ansi.py +23 -0
- scitex/str/_replace.py +134 -0
- scitex/str/_search.py +125 -0
- scitex/str/_squeeze_space.py +36 -0
- scitex/tex/__init__.py +10 -0
- scitex/tex/_preview.py +103 -0
- scitex/tex/_to_vec.py +116 -0
- scitex/torch/__init__.py +18 -0
- scitex/torch/_apply_to.py +34 -0
- scitex/torch/_nan_funcs.py +77 -0
- scitex/types/_ArrayLike.py +44 -0
- scitex/types/_ColorLike.py +21 -0
- scitex/types/__init__.py +14 -0
- scitex/types/_is_listed_X.py +70 -0
- scitex/utils/__init__.py +22 -0
- scitex/utils/_compress_hdf5.py +116 -0
- scitex/utils/_email.py +120 -0
- scitex/utils/_grid.py +148 -0
- scitex/utils/_notify.py +247 -0
- scitex/utils/_search.py +121 -0
- scitex/web/__init__.py +38 -0
- scitex/web/_search_pubmed.py +438 -0
- scitex/web/_summarize_url.py +158 -0
- scitex-2.0.0.dist-info/METADATA +307 -0
- scitex-2.0.0.dist-info/RECORD +572 -0
- scitex-2.0.0.dist-info/WHEEL +6 -0
- scitex-2.0.0.dist-info/licenses/LICENSE +7 -0
- scitex-2.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,457 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Time-stamp: "2024-12-06 10:10:00"
|
|
4
|
+
# Author: Claude
|
|
5
|
+
# Filename: _web_sources.py
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
Web source search functions for scientific papers.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
import aiohttp
|
|
13
|
+
from typing import List, Optional, Dict, Any
|
|
14
|
+
import xml.etree.ElementTree as ET
|
|
15
|
+
from datetime import datetime
|
|
16
|
+
import re
|
|
17
|
+
import logging
|
|
18
|
+
from urllib.parse import quote
|
|
19
|
+
|
|
20
|
+
from ._paper import Paper
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
async def search_pubmed(
|
|
27
|
+
query: str,
|
|
28
|
+
max_results: int = 10,
|
|
29
|
+
session: Optional[aiohttp.ClientSession] = None,
|
|
30
|
+
) -> List[Paper]:
|
|
31
|
+
"""Search PubMed for papers.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
query : str
|
|
36
|
+
Search query
|
|
37
|
+
max_results : int
|
|
38
|
+
Maximum number of results
|
|
39
|
+
session : aiohttp.ClientSession, optional
|
|
40
|
+
Aiohttp session for connection pooling
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
List[Paper]
|
|
45
|
+
List of papers from PubMed
|
|
46
|
+
"""
|
|
47
|
+
papers = []
|
|
48
|
+
close_session = False
|
|
49
|
+
|
|
50
|
+
if session is None:
|
|
51
|
+
session = aiohttp.ClientSession()
|
|
52
|
+
close_session = True
|
|
53
|
+
|
|
54
|
+
try:
|
|
55
|
+
# Search for PMIDs
|
|
56
|
+
search_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
|
|
57
|
+
search_params = {
|
|
58
|
+
"db": "pubmed",
|
|
59
|
+
"term": query,
|
|
60
|
+
"retmax": max_results,
|
|
61
|
+
"retmode": "json",
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async with session.get(search_url, params=search_params) as response:
|
|
65
|
+
data = await response.json()
|
|
66
|
+
pmids = data.get("esearchresult", {}).get("idlist", [])
|
|
67
|
+
|
|
68
|
+
if not pmids:
|
|
69
|
+
logger.info(f"No results found for query: {query}")
|
|
70
|
+
return papers
|
|
71
|
+
|
|
72
|
+
# Fetch details for PMIDs
|
|
73
|
+
fetch_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
|
74
|
+
fetch_params = {
|
|
75
|
+
"db": "pubmed",
|
|
76
|
+
"id": ",".join(pmids),
|
|
77
|
+
"retmode": "xml",
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
async with session.get(fetch_url, params=fetch_params) as response:
|
|
81
|
+
xml_data = await response.text()
|
|
82
|
+
|
|
83
|
+
# Parse XML
|
|
84
|
+
root = ET.fromstring(xml_data)
|
|
85
|
+
|
|
86
|
+
for article in root.findall(".//PubmedArticle"):
|
|
87
|
+
try:
|
|
88
|
+
# Extract metadata
|
|
89
|
+
medline = article.find(".//MedlineCitation")
|
|
90
|
+
if medline is None:
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
# Title
|
|
94
|
+
title_elem = medline.find(".//ArticleTitle")
|
|
95
|
+
title = title_elem.text if title_elem is not None else "No title"
|
|
96
|
+
|
|
97
|
+
# Authors
|
|
98
|
+
authors = []
|
|
99
|
+
for author in medline.findall(".//Author"):
|
|
100
|
+
last_name = author.find("LastName")
|
|
101
|
+
fore_name = author.find("ForeName")
|
|
102
|
+
if last_name is not None:
|
|
103
|
+
name = last_name.text
|
|
104
|
+
if fore_name is not None:
|
|
105
|
+
name = f"{fore_name.text} {name}"
|
|
106
|
+
authors.append(name)
|
|
107
|
+
|
|
108
|
+
# Abstract
|
|
109
|
+
abstract_parts = []
|
|
110
|
+
for abstract_elem in medline.findall(".//AbstractText"):
|
|
111
|
+
if abstract_elem.text:
|
|
112
|
+
abstract_parts.append(abstract_elem.text)
|
|
113
|
+
abstract = " ".join(abstract_parts) if abstract_parts else "No abstract available"
|
|
114
|
+
|
|
115
|
+
# Year
|
|
116
|
+
year = None
|
|
117
|
+
pub_date = medline.find(".//PubDate")
|
|
118
|
+
if pub_date is not None:
|
|
119
|
+
year_elem = pub_date.find("Year")
|
|
120
|
+
if year_elem is not None:
|
|
121
|
+
year = int(year_elem.text)
|
|
122
|
+
|
|
123
|
+
# Journal
|
|
124
|
+
journal_elem = medline.find(".//Journal/Title")
|
|
125
|
+
journal = journal_elem.text if journal_elem is not None else None
|
|
126
|
+
|
|
127
|
+
# PMID
|
|
128
|
+
pmid_elem = medline.find(".//PMID")
|
|
129
|
+
pmid = pmid_elem.text if pmid_elem is not None else None
|
|
130
|
+
|
|
131
|
+
# DOI
|
|
132
|
+
doi = None
|
|
133
|
+
for id_elem in article.findall(".//ArticleId"):
|
|
134
|
+
if id_elem.get("IdType") == "doi":
|
|
135
|
+
doi = id_elem.text
|
|
136
|
+
break
|
|
137
|
+
|
|
138
|
+
# Keywords
|
|
139
|
+
keywords = []
|
|
140
|
+
for kw in medline.findall(".//Keyword"):
|
|
141
|
+
if kw.text:
|
|
142
|
+
keywords.append(kw.text)
|
|
143
|
+
|
|
144
|
+
# Create Paper object
|
|
145
|
+
paper = Paper(
|
|
146
|
+
title=title,
|
|
147
|
+
authors=authors,
|
|
148
|
+
abstract=abstract,
|
|
149
|
+
source="pubmed",
|
|
150
|
+
year=year,
|
|
151
|
+
doi=doi,
|
|
152
|
+
pmid=pmid,
|
|
153
|
+
journal=journal,
|
|
154
|
+
keywords=keywords,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
papers.append(paper)
|
|
158
|
+
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.error(f"Error parsing PubMed article: {e}")
|
|
161
|
+
continue
|
|
162
|
+
|
|
163
|
+
except Exception as e:
|
|
164
|
+
logger.error(f"Error searching PubMed: {e}")
|
|
165
|
+
finally:
|
|
166
|
+
if close_session:
|
|
167
|
+
await session.close()
|
|
168
|
+
|
|
169
|
+
return papers
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
async def search_arxiv(
|
|
173
|
+
query: str,
|
|
174
|
+
max_results: int = 10,
|
|
175
|
+
session: Optional[aiohttp.ClientSession] = None,
|
|
176
|
+
) -> List[Paper]:
|
|
177
|
+
"""Search arXiv for papers.
|
|
178
|
+
|
|
179
|
+
Parameters
|
|
180
|
+
----------
|
|
181
|
+
query : str
|
|
182
|
+
Search query
|
|
183
|
+
max_results : int
|
|
184
|
+
Maximum number of results
|
|
185
|
+
session : aiohttp.ClientSession, optional
|
|
186
|
+
Aiohttp session for connection pooling
|
|
187
|
+
|
|
188
|
+
Returns
|
|
189
|
+
-------
|
|
190
|
+
List[Paper]
|
|
191
|
+
List of papers from arXiv
|
|
192
|
+
"""
|
|
193
|
+
papers = []
|
|
194
|
+
close_session = False
|
|
195
|
+
|
|
196
|
+
if session is None:
|
|
197
|
+
session = aiohttp.ClientSession()
|
|
198
|
+
close_session = True
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
# arXiv API URL
|
|
202
|
+
url = "http://export.arxiv.org/api/query"
|
|
203
|
+
params = {
|
|
204
|
+
"search_query": f"all:{query}",
|
|
205
|
+
"start": 0,
|
|
206
|
+
"max_results": max_results,
|
|
207
|
+
"sortBy": "relevance",
|
|
208
|
+
"sortOrder": "descending",
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
async with session.get(url, params=params) as response:
|
|
212
|
+
xml_data = await response.text()
|
|
213
|
+
|
|
214
|
+
# Parse XML with namespace
|
|
215
|
+
root = ET.fromstring(xml_data)
|
|
216
|
+
namespace = {"atom": "http://www.w3.org/2005/Atom"}
|
|
217
|
+
|
|
218
|
+
for entry in root.findall("atom:entry", namespace):
|
|
219
|
+
try:
|
|
220
|
+
# Title
|
|
221
|
+
title_elem = entry.find("atom:title", namespace)
|
|
222
|
+
title = title_elem.text.strip() if title_elem is not None else "No title"
|
|
223
|
+
|
|
224
|
+
# Authors
|
|
225
|
+
authors = []
|
|
226
|
+
for author_elem in entry.findall("atom:author", namespace):
|
|
227
|
+
name_elem = author_elem.find("atom:name", namespace)
|
|
228
|
+
if name_elem is not None:
|
|
229
|
+
authors.append(name_elem.text.strip())
|
|
230
|
+
|
|
231
|
+
# Abstract
|
|
232
|
+
summary_elem = entry.find("atom:summary", namespace)
|
|
233
|
+
abstract = summary_elem.text.strip() if summary_elem is not None else "No abstract"
|
|
234
|
+
|
|
235
|
+
# arXiv ID
|
|
236
|
+
id_elem = entry.find("atom:id", namespace)
|
|
237
|
+
arxiv_id = None
|
|
238
|
+
if id_elem is not None:
|
|
239
|
+
# Extract ID from URL
|
|
240
|
+
match = re.search(r"arxiv.org/abs/(.+)", id_elem.text)
|
|
241
|
+
if match:
|
|
242
|
+
arxiv_id = match.group(1)
|
|
243
|
+
|
|
244
|
+
# Published date (year)
|
|
245
|
+
year = None
|
|
246
|
+
published_elem = entry.find("atom:published", namespace)
|
|
247
|
+
if published_elem is not None:
|
|
248
|
+
try:
|
|
249
|
+
year = int(published_elem.text[:4])
|
|
250
|
+
except:
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
# DOI (if available)
|
|
254
|
+
doi = None
|
|
255
|
+
for link_elem in entry.findall("atom:link", namespace):
|
|
256
|
+
if link_elem.get("title") == "doi":
|
|
257
|
+
doi = link_elem.get("href", "").replace("http://dx.doi.org/", "")
|
|
258
|
+
break
|
|
259
|
+
|
|
260
|
+
# Categories as keywords
|
|
261
|
+
keywords = []
|
|
262
|
+
for cat_elem in entry.findall("atom:category", namespace):
|
|
263
|
+
term = cat_elem.get("term")
|
|
264
|
+
if term:
|
|
265
|
+
keywords.append(term)
|
|
266
|
+
|
|
267
|
+
# PDF link
|
|
268
|
+
pdf_url = None
|
|
269
|
+
for link_elem in entry.findall("atom:link", namespace):
|
|
270
|
+
if link_elem.get("type") == "application/pdf":
|
|
271
|
+
pdf_url = link_elem.get("href")
|
|
272
|
+
break
|
|
273
|
+
|
|
274
|
+
# Create Paper object
|
|
275
|
+
paper = Paper(
|
|
276
|
+
title=title,
|
|
277
|
+
authors=authors,
|
|
278
|
+
abstract=abstract,
|
|
279
|
+
source="arxiv",
|
|
280
|
+
year=year,
|
|
281
|
+
doi=doi,
|
|
282
|
+
arxiv_id=arxiv_id,
|
|
283
|
+
keywords=keywords,
|
|
284
|
+
metadata={"pdf_url": pdf_url} if pdf_url else {},
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
papers.append(paper)
|
|
288
|
+
|
|
289
|
+
except Exception as e:
|
|
290
|
+
logger.error(f"Error parsing arXiv entry: {e}")
|
|
291
|
+
continue
|
|
292
|
+
|
|
293
|
+
except Exception as e:
|
|
294
|
+
logger.error(f"Error searching arXiv: {e}")
|
|
295
|
+
finally:
|
|
296
|
+
if close_session:
|
|
297
|
+
await session.close()
|
|
298
|
+
|
|
299
|
+
return papers
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
async def search_semantic_scholar(
|
|
303
|
+
query: str,
|
|
304
|
+
max_results: int = 10,
|
|
305
|
+
session: Optional[aiohttp.ClientSession] = None,
|
|
306
|
+
) -> List[Paper]:
|
|
307
|
+
"""Search Semantic Scholar for papers.
|
|
308
|
+
|
|
309
|
+
Parameters
|
|
310
|
+
----------
|
|
311
|
+
query : str
|
|
312
|
+
Search query
|
|
313
|
+
max_results : int
|
|
314
|
+
Maximum number of results
|
|
315
|
+
session : aiohttp.ClientSession, optional
|
|
316
|
+
Aiohttp session for connection pooling
|
|
317
|
+
|
|
318
|
+
Returns
|
|
319
|
+
-------
|
|
320
|
+
List[Paper]
|
|
321
|
+
List of papers from Semantic Scholar
|
|
322
|
+
"""
|
|
323
|
+
papers = []
|
|
324
|
+
close_session = False
|
|
325
|
+
|
|
326
|
+
if session is None:
|
|
327
|
+
session = aiohttp.ClientSession()
|
|
328
|
+
close_session = True
|
|
329
|
+
|
|
330
|
+
try:
|
|
331
|
+
# Semantic Scholar API
|
|
332
|
+
url = "https://api.semanticscholar.org/graph/v1/paper/search"
|
|
333
|
+
params = {
|
|
334
|
+
"query": query,
|
|
335
|
+
"limit": max_results,
|
|
336
|
+
"fields": "paperId,title,abstract,authors,year,doi,arxivId,publicationTypes,journal",
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
headers = {
|
|
340
|
+
"User-Agent": "SciTeX Scholar Library",
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
async with session.get(url, params=params, headers=headers) as response:
|
|
344
|
+
if response.status == 200:
|
|
345
|
+
data = await response.json()
|
|
346
|
+
|
|
347
|
+
for item in data.get("data", []):
|
|
348
|
+
try:
|
|
349
|
+
# Extract authors
|
|
350
|
+
authors = []
|
|
351
|
+
for author_data in item.get("authors", []):
|
|
352
|
+
name = author_data.get("name")
|
|
353
|
+
if name:
|
|
354
|
+
authors.append(name)
|
|
355
|
+
|
|
356
|
+
# Create Paper object
|
|
357
|
+
paper = Paper(
|
|
358
|
+
title=item.get("title", "No title"),
|
|
359
|
+
authors=authors,
|
|
360
|
+
abstract=item.get("abstract", "No abstract available"),
|
|
361
|
+
source="semantic_scholar",
|
|
362
|
+
year=item.get("year"),
|
|
363
|
+
doi=item.get("doi"),
|
|
364
|
+
arxiv_id=item.get("arxivId"),
|
|
365
|
+
journal=item.get("journal", {}).get("name"),
|
|
366
|
+
keywords=item.get("publicationTypes", []),
|
|
367
|
+
metadata={"ss_id": item.get("paperId")},
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
papers.append(paper)
|
|
371
|
+
|
|
372
|
+
except Exception as e:
|
|
373
|
+
logger.error(f"Error parsing Semantic Scholar paper: {e}")
|
|
374
|
+
continue
|
|
375
|
+
else:
|
|
376
|
+
logger.warning(f"Semantic Scholar API returned status {response.status}")
|
|
377
|
+
|
|
378
|
+
except Exception as e:
|
|
379
|
+
logger.error(f"Error searching Semantic Scholar: {e}")
|
|
380
|
+
finally:
|
|
381
|
+
if close_session:
|
|
382
|
+
await session.close()
|
|
383
|
+
|
|
384
|
+
return papers
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
async def search_all_sources(
|
|
388
|
+
query: str,
|
|
389
|
+
max_results_per_source: int = 5,
|
|
390
|
+
sources: Optional[List[str]] = None,
|
|
391
|
+
) -> Dict[str, List[Paper]]:
|
|
392
|
+
"""Search multiple sources concurrently.
|
|
393
|
+
|
|
394
|
+
Parameters
|
|
395
|
+
----------
|
|
396
|
+
query : str
|
|
397
|
+
Search query
|
|
398
|
+
max_results_per_source : int
|
|
399
|
+
Maximum results per source
|
|
400
|
+
sources : List[str], optional
|
|
401
|
+
Sources to search (default: all available)
|
|
402
|
+
|
|
403
|
+
Returns
|
|
404
|
+
-------
|
|
405
|
+
Dict[str, List[Paper]]
|
|
406
|
+
Dictionary mapping source names to paper lists
|
|
407
|
+
"""
|
|
408
|
+
if sources is None:
|
|
409
|
+
sources = ["pubmed", "arxiv", "semantic_scholar"]
|
|
410
|
+
|
|
411
|
+
# Create session for connection pooling
|
|
412
|
+
async with aiohttp.ClientSession() as session:
|
|
413
|
+
tasks = []
|
|
414
|
+
|
|
415
|
+
if "pubmed" in sources:
|
|
416
|
+
tasks.append(("pubmed", search_pubmed(query, max_results_per_source, session)))
|
|
417
|
+
|
|
418
|
+
if "arxiv" in sources:
|
|
419
|
+
tasks.append(("arxiv", search_arxiv(query, max_results_per_source, session)))
|
|
420
|
+
|
|
421
|
+
if "semantic_scholar" in sources:
|
|
422
|
+
tasks.append(("semantic_scholar", search_semantic_scholar(query, max_results_per_source, session)))
|
|
423
|
+
|
|
424
|
+
# Run searches concurrently
|
|
425
|
+
results = {}
|
|
426
|
+
for source_name, task in tasks:
|
|
427
|
+
try:
|
|
428
|
+
papers = await task
|
|
429
|
+
results[source_name] = papers
|
|
430
|
+
logger.info(f"Found {len(papers)} papers from {source_name}")
|
|
431
|
+
except Exception as e:
|
|
432
|
+
logger.error(f"Error searching {source_name}: {e}")
|
|
433
|
+
results[source_name] = []
|
|
434
|
+
|
|
435
|
+
return results
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
# Example usage
|
|
439
|
+
if __name__ == "__main__":
|
|
440
|
+
async def main():
|
|
441
|
+
# Search individual sources
|
|
442
|
+
print("Searching PubMed...")
|
|
443
|
+
pubmed_papers = await search_pubmed("machine learning cancer", max_results=3)
|
|
444
|
+
for paper in pubmed_papers:
|
|
445
|
+
print(f"- {paper.title}")
|
|
446
|
+
|
|
447
|
+
print("\nSearching arXiv...")
|
|
448
|
+
arxiv_papers = await search_arxiv("neural networks", max_results=3)
|
|
449
|
+
for paper in arxiv_papers:
|
|
450
|
+
print(f"- {paper.title}")
|
|
451
|
+
|
|
452
|
+
print("\nSearching all sources...")
|
|
453
|
+
all_results = await search_all_sources("deep learning", max_results_per_source=2)
|
|
454
|
+
for source, papers in all_results.items():
|
|
455
|
+
print(f"\n{source}: {len(papers)} papers")
|
|
456
|
+
for paper in papers:
|
|
457
|
+
print(f" - {paper.title}")
|
scitex/stats/__init__.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Scitex stats module."""
|
|
3
|
+
|
|
4
|
+
from ._calc_partial_corr import calc_partial_corr
|
|
5
|
+
from ._corr_test_multi import corr_test_multi, nocorrelation_test
|
|
6
|
+
from ._corr_test_wrapper import corr_test, corr_test_pearson, corr_test_spearman
|
|
7
|
+
from ._describe_wrapper import describe
|
|
8
|
+
from ._multiple_corrections import bonferroni_correction, fdr_correction, multicompair
|
|
9
|
+
from ._nan_stats import nan, real
|
|
10
|
+
from ._p2stars import p2stars
|
|
11
|
+
from ._p2stars_wrapper import p2stars
|
|
12
|
+
from ._statistical_tests import brunner_munzel_test, smirnov_grubbs
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"bonferroni_correction",
|
|
16
|
+
"brunner_munzel_test",
|
|
17
|
+
"calc_partial_corr",
|
|
18
|
+
"corr_test",
|
|
19
|
+
"corr_test_multi",
|
|
20
|
+
"corr_test_pearson",
|
|
21
|
+
"corr_test_spearman",
|
|
22
|
+
"describe",
|
|
23
|
+
"fdr_correction",
|
|
24
|
+
"multicompair",
|
|
25
|
+
"nan",
|
|
26
|
+
"nocorrelation_test",
|
|
27
|
+
"p2stars",
|
|
28
|
+
"p2stars",
|
|
29
|
+
"real",
|
|
30
|
+
"smirnov_grubbs",
|
|
31
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def calc_partial_corr(x, y, z):
|
|
7
|
+
"""remove the influence of the variable z from the correlation between x and y."""
|
|
8
|
+
|
|
9
|
+
x = np.array(x).astype(np.float128)
|
|
10
|
+
y = np.array(y).astype(np.float128)
|
|
11
|
+
z = np.array(z).astype(np.float128)
|
|
12
|
+
|
|
13
|
+
r_xy = np.corrcoef(x, y)[0, 1]
|
|
14
|
+
r_xz = np.corrcoef(x, z)[0, 1]
|
|
15
|
+
r_yz = np.corrcoef(y, z)[0, 1]
|
|
16
|
+
r_xy_z = (r_xy - r_xz * r_yz) / (np.sqrt(1 - r_xz**2) * np.sqrt(1 - r_yz**2))
|
|
17
|
+
return float(r_xy_z)
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Time-stamp: "2025-05-30 auto-created"
|
|
4
|
+
# File: ./src/scitex/stats/_corr_test_multi.py
|
|
5
|
+
|
|
6
|
+
"""
|
|
7
|
+
Multiple correlation tests for dataframes
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from scipy import stats
|
|
13
|
+
from typing import Union
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def corr_test_multi(data: Union[pd.DataFrame, np.ndarray]) -> pd.DataFrame:
|
|
17
|
+
"""
|
|
18
|
+
Perform pairwise correlation tests on all columns of a DataFrame.
|
|
19
|
+
|
|
20
|
+
Parameters
|
|
21
|
+
----------
|
|
22
|
+
data : pd.DataFrame or np.ndarray
|
|
23
|
+
Data with multiple variables (columns)
|
|
24
|
+
|
|
25
|
+
Returns
|
|
26
|
+
-------
|
|
27
|
+
pd.DataFrame
|
|
28
|
+
Correlation matrix with correlation coefficients
|
|
29
|
+
"""
|
|
30
|
+
if isinstance(data, np.ndarray):
|
|
31
|
+
# Convert to DataFrame if array
|
|
32
|
+
data = pd.DataFrame(data)
|
|
33
|
+
|
|
34
|
+
# Get column names
|
|
35
|
+
cols = data.columns
|
|
36
|
+
n_cols = len(cols)
|
|
37
|
+
|
|
38
|
+
# Initialize correlation matrix
|
|
39
|
+
corr_matrix = pd.DataFrame(index=cols, columns=cols, dtype=float)
|
|
40
|
+
|
|
41
|
+
# Calculate pairwise correlations
|
|
42
|
+
for i, col1 in enumerate(cols):
|
|
43
|
+
for j, col2 in enumerate(cols):
|
|
44
|
+
if i == j:
|
|
45
|
+
# Diagonal is always 1
|
|
46
|
+
corr_matrix.loc[col1, col2] = 1.0
|
|
47
|
+
else:
|
|
48
|
+
# Calculate correlation
|
|
49
|
+
corr_coef, _ = stats.pearsonr(data[col1], data[col2])
|
|
50
|
+
corr_matrix.loc[col1, col2] = corr_coef
|
|
51
|
+
|
|
52
|
+
return corr_matrix
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def nocorrelation_test(x: np.ndarray, y: np.ndarray) -> dict:
|
|
56
|
+
"""
|
|
57
|
+
Test the null hypothesis that there is no correlation between x and y.
|
|
58
|
+
|
|
59
|
+
Parameters
|
|
60
|
+
----------
|
|
61
|
+
x : np.ndarray
|
|
62
|
+
First variable
|
|
63
|
+
y : np.ndarray
|
|
64
|
+
Second variable
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
dict
|
|
69
|
+
Dictionary containing:
|
|
70
|
+
- statistic: The test statistic
|
|
71
|
+
- p_value: The p-value for the test
|
|
72
|
+
"""
|
|
73
|
+
# Convert to numpy arrays
|
|
74
|
+
x = np.asarray(x)
|
|
75
|
+
y = np.asarray(y)
|
|
76
|
+
|
|
77
|
+
# Remove NaN values
|
|
78
|
+
mask = ~(np.isnan(x) | np.isnan(y))
|
|
79
|
+
x = x[mask]
|
|
80
|
+
y = y[mask]
|
|
81
|
+
|
|
82
|
+
# Calculate correlation and p-value
|
|
83
|
+
corr_coef, p_value = stats.pearsonr(x, y)
|
|
84
|
+
|
|
85
|
+
# Calculate test statistic (t-statistic for correlation)
|
|
86
|
+
n = len(x)
|
|
87
|
+
t_stat = corr_coef * np.sqrt((n - 2) / (1 - corr_coef**2))
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
"statistic": float(t_stat),
|
|
91
|
+
"p_value": float(p_value),
|
|
92
|
+
"correlation": float(corr_coef),
|
|
93
|
+
"n": n,
|
|
94
|
+
}
|