scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,722 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-09-30 22:24:29 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/SciTeX-Code/src/scitex/scholar/core/Papers.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = __file__
|
|
9
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
10
|
+
# ----------------------------------------
|
|
11
|
+
|
|
12
|
+
"""Papers class for SciTeX Scholar module.
|
|
13
|
+
|
|
14
|
+
Papers is a simple collection of Paper objects.
|
|
15
|
+
All business logic is handled by Scholar or utility functions.
|
|
16
|
+
|
|
17
|
+
This is a simplified version - reduced from 39 methods to ~15 methods.
|
|
18
|
+
Business logic has been moved to Scholar and utility functions.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any, Callable, Dict, Iterator, List, Optional, Union
|
|
23
|
+
|
|
24
|
+
from scitex import logging
|
|
25
|
+
from scitex.scholar.config import ScholarConfig
|
|
26
|
+
from scitex.scholar.core.Paper import Paper
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class Papers:
|
|
32
|
+
"""A simple collection of Paper objects.
|
|
33
|
+
|
|
34
|
+
This is a minimal collection class. Most business logic
|
|
35
|
+
(loading, saving, enrichment, etc.) is handled by Scholar.
|
|
36
|
+
|
|
37
|
+
Methods have been reduced from 39 to ~15 for simplicity.
|
|
38
|
+
Complex operations should use Scholar or utility functions.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self,
|
|
43
|
+
papers: Optional[Union[List[Paper], List[Dict]]] = None,
|
|
44
|
+
project: Optional[str] = None,
|
|
45
|
+
config: Optional[ScholarConfig] = None,
|
|
46
|
+
):
|
|
47
|
+
"""Initialize Papers collection.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
papers: List of Paper objects or dicts to convert to Papers
|
|
51
|
+
project: Project name for organizing papers
|
|
52
|
+
config: Scholar configuration
|
|
53
|
+
"""
|
|
54
|
+
self.project = project or "default"
|
|
55
|
+
self.config = config or ScholarConfig()
|
|
56
|
+
|
|
57
|
+
# Initialize papers list
|
|
58
|
+
self._papers: List[Paper] = []
|
|
59
|
+
|
|
60
|
+
if papers:
|
|
61
|
+
for item in papers:
|
|
62
|
+
if isinstance(item, Paper):
|
|
63
|
+
self._papers.append(item)
|
|
64
|
+
elif isinstance(item, dict):
|
|
65
|
+
# Handle dict input - Pydantic handles validation
|
|
66
|
+
paper = Paper.from_dict(item)
|
|
67
|
+
self._papers.append(paper)
|
|
68
|
+
else:
|
|
69
|
+
logger.warning(f"Skipping invalid item type: {type(item)}")
|
|
70
|
+
|
|
71
|
+
# =========================================================================
|
|
72
|
+
# BASIC COLLECTION METHODS
|
|
73
|
+
# =========================================================================
|
|
74
|
+
|
|
75
|
+
def __len__(self) -> int:
|
|
76
|
+
"""Number of papers in collection."""
|
|
77
|
+
return len(self._papers)
|
|
78
|
+
|
|
79
|
+
def __iter__(self) -> Iterator[Paper]:
|
|
80
|
+
"""Iterate over papers."""
|
|
81
|
+
return iter(self._papers)
|
|
82
|
+
|
|
83
|
+
def __getitem__(self, index: Union[int, slice]) -> Union[Paper, "Papers"]:
|
|
84
|
+
"""Get paper(s) by index or slice.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
index: Integer index or slice
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Single Paper if integer index, Papers collection if slice
|
|
91
|
+
"""
|
|
92
|
+
if isinstance(index, slice):
|
|
93
|
+
return Papers(
|
|
94
|
+
self._papers[index], project=self.project, config=self.config
|
|
95
|
+
)
|
|
96
|
+
return self._papers[index]
|
|
97
|
+
|
|
98
|
+
def __repr__(self) -> str:
|
|
99
|
+
"""String representation."""
|
|
100
|
+
return f"Papers(count={len(self)}, project={self.project})"
|
|
101
|
+
|
|
102
|
+
def __str__(self) -> str:
|
|
103
|
+
"""Human-readable string."""
|
|
104
|
+
if len(self) == 0:
|
|
105
|
+
return "Empty Papers collection"
|
|
106
|
+
elif len(self) == 1:
|
|
107
|
+
return f"Papers collection with 1 paper"
|
|
108
|
+
else:
|
|
109
|
+
return f"Papers collection with {len(self)} papers"
|
|
110
|
+
|
|
111
|
+
def __dir__(self) -> List[str]:
|
|
112
|
+
"""Custom dir for better discoverability."""
|
|
113
|
+
base_attrs = object.__dir__(self)
|
|
114
|
+
custom_attrs = [
|
|
115
|
+
"papers",
|
|
116
|
+
"filter",
|
|
117
|
+
"sort_by",
|
|
118
|
+
"append",
|
|
119
|
+
"extend",
|
|
120
|
+
"to_list",
|
|
121
|
+
"summary",
|
|
122
|
+
"to_dict",
|
|
123
|
+
"to_dataframe",
|
|
124
|
+
"from_bibtex",
|
|
125
|
+
"save",
|
|
126
|
+
]
|
|
127
|
+
return sorted(set(base_attrs + custom_attrs))
|
|
128
|
+
|
|
129
|
+
# =========================================================================
|
|
130
|
+
# SIMPLE COLLECTION OPERATIONS
|
|
131
|
+
# =========================================================================
|
|
132
|
+
|
|
133
|
+
@property
|
|
134
|
+
def papers(self) -> List[Paper]:
|
|
135
|
+
"""Get the underlying papers list."""
|
|
136
|
+
return self._papers
|
|
137
|
+
|
|
138
|
+
def append(self, paper: Paper) -> None:
|
|
139
|
+
"""Add a paper to the collection.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
paper: Paper to add
|
|
143
|
+
"""
|
|
144
|
+
if isinstance(paper, Paper):
|
|
145
|
+
self._papers.append(paper)
|
|
146
|
+
else:
|
|
147
|
+
logger.warning(f"Cannot append non-Paper object: {type(paper)}")
|
|
148
|
+
|
|
149
|
+
def extend(self, papers: Union[List[Paper], "Papers"]) -> None:
|
|
150
|
+
"""Add multiple papers to the collection.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
papers: List of papers or another Papers collection
|
|
154
|
+
"""
|
|
155
|
+
if isinstance(papers, Papers):
|
|
156
|
+
self._papers.extend(papers._papers)
|
|
157
|
+
elif isinstance(papers, list):
|
|
158
|
+
for paper in papers:
|
|
159
|
+
if isinstance(paper, Paper):
|
|
160
|
+
self._papers.append(paper)
|
|
161
|
+
else:
|
|
162
|
+
logger.warning(f"Cannot extend with type: {type(papers)}")
|
|
163
|
+
|
|
164
|
+
def to_list(self) -> List[Paper]:
|
|
165
|
+
"""Get papers as a list.
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
List of Paper objects
|
|
169
|
+
"""
|
|
170
|
+
return list(self._papers)
|
|
171
|
+
|
|
172
|
+
def filter(
|
|
173
|
+
self,
|
|
174
|
+
condition: Optional[Callable[[Paper], bool]] = None,
|
|
175
|
+
year_min: Optional[int] = None,
|
|
176
|
+
year_max: Optional[int] = None,
|
|
177
|
+
has_doi: Optional[bool] = None,
|
|
178
|
+
has_abstract: Optional[bool] = None,
|
|
179
|
+
has_pdf: Optional[bool] = None,
|
|
180
|
+
min_citations: Optional[int] = None,
|
|
181
|
+
max_citations: Optional[int] = None,
|
|
182
|
+
min_impact_factor: Optional[float] = None,
|
|
183
|
+
max_impact_factor: Optional[float] = None,
|
|
184
|
+
journal: Optional[str] = None,
|
|
185
|
+
author: Optional[str] = None,
|
|
186
|
+
keyword: Optional[str] = None,
|
|
187
|
+
publisher: Optional[str] = None,
|
|
188
|
+
**kwargs,
|
|
189
|
+
) -> "Papers":
|
|
190
|
+
"""Filter papers by condition or criteria.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
condition: Function that takes a Paper and returns bool
|
|
194
|
+
year_min: Minimum year
|
|
195
|
+
year_max: Maximum year
|
|
196
|
+
has_doi: Filter papers with/without DOI
|
|
197
|
+
has_abstract: Filter papers with/without abstract
|
|
198
|
+
has_pdf: Filter papers with/without PDF URL
|
|
199
|
+
min_citations: Minimum citation count
|
|
200
|
+
max_citations: Maximum citation count
|
|
201
|
+
min_impact_factor: Minimum journal impact factor
|
|
202
|
+
max_impact_factor: Maximum journal impact factor
|
|
203
|
+
journal: Journal name (partial match)
|
|
204
|
+
author: Author name (partial match)
|
|
205
|
+
keyword: Keyword (searches in keywords, title, abstract)
|
|
206
|
+
publisher: Publisher name (partial match)
|
|
207
|
+
**kwargs: Additional keyword arguments for backward compatibility
|
|
208
|
+
|
|
209
|
+
Returns:
|
|
210
|
+
New Papers collection with filtered papers
|
|
211
|
+
|
|
212
|
+
Examples:
|
|
213
|
+
# Using lambda condition with Paper fields
|
|
214
|
+
# Available Paper fields: title, authors, year, abstract, keywords,
|
|
215
|
+
# doi, pmid, arxiv_id, journal, volume, issue, pages, publisher,
|
|
216
|
+
# citation_count, journal_impact_factor, url, pdf_url, etc.
|
|
217
|
+
|
|
218
|
+
# Filter by single condition
|
|
219
|
+
high_impact = papers.filter(lambda p: p.journal_impact_factor and p.journal_impact_factor > 10)
|
|
220
|
+
highly_cited = papers.filter(lambda p: p.citation_count and p.citation_count > 500)
|
|
221
|
+
recent = papers.filter(lambda p: p.year and p.year >= 2020)
|
|
222
|
+
|
|
223
|
+
# Complex conditions
|
|
224
|
+
elite = papers.filter(
|
|
225
|
+
lambda p: p.journal_impact_factor and p.journal_impact_factor > 10
|
|
226
|
+
and p.citation_count and p.citation_count > 500
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# Using built-in parameters
|
|
230
|
+
high_impact_v2 = papers.filter(min_impact_factor=10.0)
|
|
231
|
+
highly_cited_v2 = papers.filter(min_citations=500)
|
|
232
|
+
recent_v2 = papers.filter(year_min=2020)
|
|
233
|
+
|
|
234
|
+
# Combining multiple parameters
|
|
235
|
+
filtered = papers.filter(
|
|
236
|
+
min_impact_factor=5.0,
|
|
237
|
+
min_citations=100,
|
|
238
|
+
year_min=2015,
|
|
239
|
+
year_max=2023,
|
|
240
|
+
journal="Nature",
|
|
241
|
+
has_doi=True
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
# Range filtering
|
|
245
|
+
mid_impact = papers.filter(min_impact_factor=3.0, max_impact_factor=10.0)
|
|
246
|
+
mid_citations = papers.filter(min_citations=100, max_citations=1000)
|
|
247
|
+
|
|
248
|
+
# Keyword search (searches in keywords, title, and abstract)
|
|
249
|
+
ml_papers = papers.filter(keyword="machine learning")
|
|
250
|
+
eeg_papers = papers.filter(keyword="EEG")
|
|
251
|
+
|
|
252
|
+
# Journal and author filtering
|
|
253
|
+
nature_papers = papers.filter(journal="Nature") # Partial match
|
|
254
|
+
smith_papers = papers.filter(author="Smith") # Partial match
|
|
255
|
+
|
|
256
|
+
# Boolean filters
|
|
257
|
+
with_doi = papers.filter(has_doi=True)
|
|
258
|
+
with_abstract = papers.filter(has_abstract=True)
|
|
259
|
+
with_pdf = papers.filter(has_pdf=True)
|
|
260
|
+
|
|
261
|
+
# Chain filters for AND logic
|
|
262
|
+
elite_recent = papers.filter(min_impact_factor=10).filter(year_min=2020)
|
|
263
|
+
"""
|
|
264
|
+
# If a lambda/function condition is provided, use it
|
|
265
|
+
if condition is not None and callable(condition):
|
|
266
|
+
filtered = [p for p in self._papers if condition(p)]
|
|
267
|
+
logger.info(
|
|
268
|
+
f"Lambda filter: {len(self._papers)} -> {len(filtered)} papers"
|
|
269
|
+
)
|
|
270
|
+
return Papers(filtered, project=self.project, config=self.config)
|
|
271
|
+
|
|
272
|
+
# Otherwise use criteria-based filtering
|
|
273
|
+
from scitex.scholar.utils.papers_utils import filter_papers_advanced
|
|
274
|
+
|
|
275
|
+
result = filter_papers_advanced(
|
|
276
|
+
self,
|
|
277
|
+
year_min=year_min,
|
|
278
|
+
year_max=year_max,
|
|
279
|
+
has_doi=has_doi,
|
|
280
|
+
has_abstract=has_abstract,
|
|
281
|
+
has_pdf=has_pdf,
|
|
282
|
+
min_citations=min_citations or kwargs.get("min_citations"),
|
|
283
|
+
max_citations=max_citations or kwargs.get("max_citations"),
|
|
284
|
+
min_impact_factor=min_impact_factor
|
|
285
|
+
or kwargs.get("min_impact_factor"),
|
|
286
|
+
max_impact_factor=max_impact_factor
|
|
287
|
+
or kwargs.get("max_impact_factor"),
|
|
288
|
+
journal=journal,
|
|
289
|
+
author=author,
|
|
290
|
+
keyword=keyword,
|
|
291
|
+
publisher=publisher,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# Preserve project and config
|
|
295
|
+
result.project = self.project
|
|
296
|
+
result.config = self.config
|
|
297
|
+
|
|
298
|
+
logger.info(f"Filtered: {len(self._papers)} -> {len(result)} papers")
|
|
299
|
+
return result
|
|
300
|
+
|
|
301
|
+
def sort_by(self, *criteria, reverse: bool = False, **kwargs) -> "Papers":
|
|
302
|
+
"""Sort papers by criteria.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
*criteria: Field names (as strings) or lambda functions to sort by
|
|
306
|
+
reverse: Sort in descending order (default: False)
|
|
307
|
+
**kwargs: Additional options
|
|
308
|
+
|
|
309
|
+
Returns:
|
|
310
|
+
New sorted Papers collection
|
|
311
|
+
|
|
312
|
+
Available Paper fields for sorting:
|
|
313
|
+
- 'title': Paper title
|
|
314
|
+
- 'year': Publication year
|
|
315
|
+
- 'citation_count': Number of citations
|
|
316
|
+
- 'journal_impact_factor': Journal impact factor
|
|
317
|
+
- 'journal': Journal name
|
|
318
|
+
- 'publisher': Publisher name
|
|
319
|
+
- 'doi': Digital Object Identifier
|
|
320
|
+
- 'created_at': When record was created
|
|
321
|
+
- 'updated_at': When record was last updated
|
|
322
|
+
|
|
323
|
+
Examples:
|
|
324
|
+
# Sort by single field (ascending)
|
|
325
|
+
by_year = papers.sort_by('year')
|
|
326
|
+
by_title = papers.sort_by('title')
|
|
327
|
+
|
|
328
|
+
# Sort by single field (descending)
|
|
329
|
+
by_citations_desc = papers.sort_by('citation_count', reverse=True)
|
|
330
|
+
by_impact_desc = papers.sort_by('journal_impact_factor', reverse=True)
|
|
331
|
+
|
|
332
|
+
# Sort by multiple fields (primary, secondary, etc.)
|
|
333
|
+
by_year_then_citations = papers.sort_by('year', 'citation_count')
|
|
334
|
+
|
|
335
|
+
# Using lambda functions for custom sorting
|
|
336
|
+
by_citations = papers.sort_by(lambda p: p.citation_count or 0, reverse=True)
|
|
337
|
+
by_impact = papers.sort_by(lambda p: p.journal_impact_factor or 0, reverse=True)
|
|
338
|
+
|
|
339
|
+
# Complex sorting with null handling
|
|
340
|
+
by_year_safe = papers.sort_by(lambda p: p.year if p.year else 9999)
|
|
341
|
+
|
|
342
|
+
# Sort by computed values
|
|
343
|
+
by_citation_per_year = papers.sort_by(
|
|
344
|
+
lambda p: (p.citation_count or 0) / (2024 - p.year) if p.year else 0,
|
|
345
|
+
reverse=True
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# Top papers by impact factor
|
|
349
|
+
top_impact = papers.sort_by('journal_impact_factor', reverse=True)
|
|
350
|
+
for p in top_impact[:10]:
|
|
351
|
+
print(f"IF={p.journal_impact_factor:.1f} - {p.journal}")
|
|
352
|
+
|
|
353
|
+
# Top papers by citations
|
|
354
|
+
top_cited = papers.sort_by('citation_count', reverse=True)
|
|
355
|
+
for p in top_cited[:10]:
|
|
356
|
+
print(f"{p.citation_count} citations - {p.title[:50]}...")
|
|
357
|
+
"""
|
|
358
|
+
if not criteria:
|
|
359
|
+
return Papers(
|
|
360
|
+
self._papers, project=self.project, config=self.config
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Handle single lambda
|
|
364
|
+
if len(criteria) == 1 and callable(criteria[0]):
|
|
365
|
+
sorted_papers = sorted(
|
|
366
|
+
self._papers, key=criteria[0], reverse=reverse
|
|
367
|
+
)
|
|
368
|
+
return Papers(
|
|
369
|
+
sorted_papers, project=self.project, config=self.config
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Handle field names
|
|
373
|
+
from scitex.scholar.utils.papers_utils import sort_papers_multi
|
|
374
|
+
|
|
375
|
+
return sort_papers_multi(self, list(criteria), reverse=reverse)
|
|
376
|
+
|
|
377
|
+
# =========================================================================
|
|
378
|
+
# BACKWARD COMPATIBILITY METHODS
|
|
379
|
+
# These delegate to utilities or Scholar for the actual implementation
|
|
380
|
+
# =========================================================================
|
|
381
|
+
|
|
382
|
+
@classmethod
|
|
383
|
+
def from_bibtex(cls, bibtex_input: Union[str, Path]) -> "Papers":
|
|
384
|
+
"""Load papers from BibTeX.
|
|
385
|
+
|
|
386
|
+
DEPRECATED: Use Scholar.from_bibtex() instead.
|
|
387
|
+
This method is kept for backward compatibility.
|
|
388
|
+
|
|
389
|
+
Args:
|
|
390
|
+
bibtex_input: Path to BibTeX file or BibTeX string
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
Papers collection
|
|
394
|
+
"""
|
|
395
|
+
logger.warning(
|
|
396
|
+
"Papers.from_bibtex() is deprecated. Use Scholar.from_bibtex() instead."
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
# Check if it's a file path
|
|
400
|
+
if isinstance(bibtex_input, (str, Path)):
|
|
401
|
+
path = Path(bibtex_input)
|
|
402
|
+
if path.exists():
|
|
403
|
+
return cls._from_bibtex_file(path)
|
|
404
|
+
|
|
405
|
+
# Otherwise treat as BibTeX text
|
|
406
|
+
return cls._from_bibtex_text(str(bibtex_input))
|
|
407
|
+
|
|
408
|
+
@classmethod
|
|
409
|
+
def _from_bibtex_file(cls, file_path: Union[str, Path]) -> "Papers":
|
|
410
|
+
"""Load papers from BibTeX file.
|
|
411
|
+
|
|
412
|
+
Args:
|
|
413
|
+
file_path: Path to BibTeX file
|
|
414
|
+
|
|
415
|
+
Returns:
|
|
416
|
+
Papers collection
|
|
417
|
+
"""
|
|
418
|
+
import bibtexparser
|
|
419
|
+
|
|
420
|
+
file_path = Path(file_path)
|
|
421
|
+
if not file_path.exists():
|
|
422
|
+
raise FileNotFoundError(f"BibTeX file not found: {file_path}")
|
|
423
|
+
|
|
424
|
+
logger.info(f"Loading BibTeX from {file_path}")
|
|
425
|
+
|
|
426
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
427
|
+
bib_db = bibtexparser.load(f)
|
|
428
|
+
|
|
429
|
+
logger.info(
|
|
430
|
+
f"Loaded {len(bib_db.entries)} BibTeX entries from {file_path}"
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
papers = []
|
|
434
|
+
for entry in bib_db.entries:
|
|
435
|
+
paper = cls._bibtex_entry_to_paper(entry)
|
|
436
|
+
if paper:
|
|
437
|
+
papers.append(paper)
|
|
438
|
+
|
|
439
|
+
logger.success(f"Created {len(papers)} papers from BibTeX file")
|
|
440
|
+
return cls(papers)
|
|
441
|
+
|
|
442
|
+
@classmethod
|
|
443
|
+
def _from_bibtex_text(cls, bibtex_content: str) -> "Papers":
|
|
444
|
+
"""Load papers from BibTeX text.
|
|
445
|
+
|
|
446
|
+
Args:
|
|
447
|
+
bibtex_content: BibTeX content as string
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
Papers collection
|
|
451
|
+
"""
|
|
452
|
+
import bibtexparser
|
|
453
|
+
|
|
454
|
+
bib_db = bibtexparser.loads(bibtex_content)
|
|
455
|
+
logger.info(f"Parsed {len(bib_db.entries)} BibTeX entries from text")
|
|
456
|
+
|
|
457
|
+
papers = []
|
|
458
|
+
for entry in bib_db.entries:
|
|
459
|
+
paper = cls._bibtex_entry_to_paper(entry)
|
|
460
|
+
if paper:
|
|
461
|
+
papers.append(paper)
|
|
462
|
+
|
|
463
|
+
logger.success(f"Created {len(papers)} papers from BibTeX text")
|
|
464
|
+
return cls(papers)
|
|
465
|
+
|
|
466
|
+
@staticmethod
|
|
467
|
+
def _bibtex_entry_to_paper(entry: Dict[str, Any]) -> Paper:
|
|
468
|
+
"""Convert BibTeX entry to Paper object.
|
|
469
|
+
|
|
470
|
+
Args:
|
|
471
|
+
entry: BibTeX entry dictionary
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
Paper object
|
|
475
|
+
"""
|
|
476
|
+
# Get fields from BibTeX entry
|
|
477
|
+
fields = {k.lower(): v for k, v in entry.items()}
|
|
478
|
+
|
|
479
|
+
# Parse authors
|
|
480
|
+
authors = []
|
|
481
|
+
if "author" in fields:
|
|
482
|
+
author_str = fields["author"]
|
|
483
|
+
authors = [a.strip() for a in author_str.split(" and ")]
|
|
484
|
+
|
|
485
|
+
# Parse year - let Pydantic handle validation
|
|
486
|
+
year = None
|
|
487
|
+
if "year" in fields:
|
|
488
|
+
year_str = str(fields["year"])
|
|
489
|
+
if year_str.isdigit():
|
|
490
|
+
year = int(year_str)
|
|
491
|
+
|
|
492
|
+
# Parse keywords
|
|
493
|
+
keywords = []
|
|
494
|
+
if "keywords" in fields:
|
|
495
|
+
keywords = [k.strip() for k in fields["keywords"].split(",")]
|
|
496
|
+
|
|
497
|
+
# Create structured data for Paper
|
|
498
|
+
basic_data = {
|
|
499
|
+
"title": fields.get("title", "").strip("{}"),
|
|
500
|
+
"authors": authors,
|
|
501
|
+
"abstract": fields.get("abstract", ""),
|
|
502
|
+
"year": year,
|
|
503
|
+
"keywords": keywords,
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
id_data = {
|
|
507
|
+
"doi": fields.get("doi"),
|
|
508
|
+
"pmid": fields.get("pmid"),
|
|
509
|
+
"arxiv_id": fields.get("arxiv"),
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
publication_data = {
|
|
513
|
+
"journal": fields.get("journal"),
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
url_data = {
|
|
517
|
+
"pdf": fields.get("url"),
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
# Create Paper with Pydantic structure
|
|
521
|
+
paper = Paper()
|
|
522
|
+
|
|
523
|
+
# Set basic metadata
|
|
524
|
+
paper.metadata.basic.title = basic_data.get("title", "")
|
|
525
|
+
paper.metadata.basic.authors = basic_data.get("authors")
|
|
526
|
+
paper.metadata.basic.abstract = basic_data.get("abstract")
|
|
527
|
+
paper.metadata.basic.year = basic_data.get("year")
|
|
528
|
+
paper.metadata.basic.keywords = basic_data.get("keywords")
|
|
529
|
+
|
|
530
|
+
# Set ID metadata
|
|
531
|
+
if id_data.get("doi"):
|
|
532
|
+
paper.metadata.set_doi(id_data["doi"])
|
|
533
|
+
paper.metadata.id.pmid = id_data.get("pmid")
|
|
534
|
+
paper.metadata.id.arxiv_id = id_data.get("arxiv_id")
|
|
535
|
+
|
|
536
|
+
# Set publication metadata
|
|
537
|
+
paper.metadata.publication.journal = publication_data.get("journal")
|
|
538
|
+
|
|
539
|
+
# Set URL metadata
|
|
540
|
+
if url_data.get("pdf"):
|
|
541
|
+
paper.metadata.url.pdfs.append({"url": url_data["pdf"], "source": "bibtex"})
|
|
542
|
+
|
|
543
|
+
# Store original BibTeX fields for later reconstruction
|
|
544
|
+
paper._original_bibtex_fields = fields.copy()
|
|
545
|
+
paper._bibtex_entry_type = entry.get("entry_type", "misc")
|
|
546
|
+
paper._bibtex_key = entry.get("key", "")
|
|
547
|
+
|
|
548
|
+
return paper
|
|
549
|
+
|
|
550
|
+
def save(
|
|
551
|
+
self,
|
|
552
|
+
output_path: Union[str, Path],
|
|
553
|
+
format: Optional[str] = "auto",
|
|
554
|
+
**kwargs,
|
|
555
|
+
) -> None:
|
|
556
|
+
"""Save papers to file.
|
|
557
|
+
|
|
558
|
+
DEPRECATED: Use Scholar.save_papers() or Scholar.export_bibtex() instead.
|
|
559
|
+
This method is kept for backward compatibility.
|
|
560
|
+
|
|
561
|
+
Args:
|
|
562
|
+
output_path: Path to save file
|
|
563
|
+
format: Output format (auto, bibtex, json, csv)
|
|
564
|
+
**kwargs: Additional options
|
|
565
|
+
"""
|
|
566
|
+
logger.warning(
|
|
567
|
+
"Papers.save() is deprecated. Use Scholar.export_bibtex() instead."
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
output_path = Path(output_path)
|
|
571
|
+
|
|
572
|
+
# Auto-detect format from extension
|
|
573
|
+
if format == "auto":
|
|
574
|
+
ext = output_path.suffix.lower()
|
|
575
|
+
if ext in [".bib", ".bibtex"]:
|
|
576
|
+
format = "bibtex"
|
|
577
|
+
elif ext == ".json":
|
|
578
|
+
format = "json"
|
|
579
|
+
elif ext == ".csv":
|
|
580
|
+
format = "csv"
|
|
581
|
+
else:
|
|
582
|
+
format = "bibtex"
|
|
583
|
+
|
|
584
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
585
|
+
|
|
586
|
+
if format.lower() == "bibtex":
|
|
587
|
+
from scitex.scholar.utils.papers_utils import papers_to_bibtex
|
|
588
|
+
|
|
589
|
+
bibtex_content = papers_to_bibtex(self, output_path=None)
|
|
590
|
+
output_path.write_text(bibtex_content)
|
|
591
|
+
logger.success(f"Saved {len(self)} papers to {output_path}")
|
|
592
|
+
|
|
593
|
+
elif format.lower() == "json":
|
|
594
|
+
import json
|
|
595
|
+
|
|
596
|
+
from scitex.scholar.utils.papers_utils import papers_to_dict
|
|
597
|
+
|
|
598
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
599
|
+
json.dump(
|
|
600
|
+
papers_to_dict(self), f, indent=2, ensure_ascii=False
|
|
601
|
+
)
|
|
602
|
+
logger.success(f"Saved {len(self)} papers to {output_path}")
|
|
603
|
+
|
|
604
|
+
elif format.lower() == "csv":
|
|
605
|
+
from scitex.scholar.utils.papers_utils import papers_to_dataframe
|
|
606
|
+
|
|
607
|
+
df = papers_to_dataframe(self)
|
|
608
|
+
df.to_csv(output_path, index=False)
|
|
609
|
+
logger.success(f"Saved {len(self)} papers to {output_path}")
|
|
610
|
+
|
|
611
|
+
else:
|
|
612
|
+
raise ValueError(f"Unsupported format: {format}")
|
|
613
|
+
|
|
614
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
615
|
+
"""Convert to dictionary.
|
|
616
|
+
|
|
617
|
+
DEPRECATED: Use papers_utils.papers_to_dict() for new code.
|
|
618
|
+
|
|
619
|
+
Returns:
|
|
620
|
+
Dictionary representation
|
|
621
|
+
"""
|
|
622
|
+
from scitex.scholar.utils.papers_utils import papers_to_dict
|
|
623
|
+
|
|
624
|
+
return papers_to_dict(self)
|
|
625
|
+
|
|
626
|
+
def to_dataframe(self) -> Any:
|
|
627
|
+
"""Convert to pandas DataFrame.
|
|
628
|
+
|
|
629
|
+
DEPRECATED: Use papers_utils.papers_to_dataframe() for new code.
|
|
630
|
+
|
|
631
|
+
Returns:
|
|
632
|
+
DataFrame with papers data
|
|
633
|
+
"""
|
|
634
|
+
try:
|
|
635
|
+
from scitex.scholar.utils.papers_utils import papers_to_dataframe
|
|
636
|
+
|
|
637
|
+
return papers_to_dataframe(self)
|
|
638
|
+
except ImportError:
|
|
639
|
+
logger.error("pandas is required for to_dataframe()")
|
|
640
|
+
return None
|
|
641
|
+
|
|
642
|
+
def summary(self) -> Dict[str, Any]:
|
|
643
|
+
"""Get summary statistics.
|
|
644
|
+
|
|
645
|
+
DEPRECATED: Use papers_utils.papers_statistics() for new code.
|
|
646
|
+
|
|
647
|
+
Returns:
|
|
648
|
+
Dictionary with statistics
|
|
649
|
+
"""
|
|
650
|
+
from scitex.scholar.utils.papers_utils import papers_statistics
|
|
651
|
+
|
|
652
|
+
return papers_statistics(self)
|
|
653
|
+
|
|
654
|
+
# =========================================================================
|
|
655
|
+
# METHODS REMOVED (use Scholar or utilities instead):
|
|
656
|
+
# =========================================================================
|
|
657
|
+
# The following methods have been removed to simplify the class:
|
|
658
|
+
# - sync_with_library() -> Use Scholar internally
|
|
659
|
+
# - create_project_symlinks() -> Use Scholar internally
|
|
660
|
+
# - get_project_statistics() -> Use Scholar.get_library_statistics()
|
|
661
|
+
# - download_pdfs() -> Use Scholar.download_pdfs()
|
|
662
|
+
# - enrich() -> Use Scholar.enrich()
|
|
663
|
+
# - merge_papers() -> Use papers_utils.merge_papers()
|
|
664
|
+
# - deduplicate() -> Use papers_utils.deduplicate_papers()
|
|
665
|
+
#
|
|
666
|
+
# This reduces complexity from 39 methods to ~15 methods.
|
|
667
|
+
# All business logic is now in Scholar or utility functions.
|
|
668
|
+
|
|
669
|
+
|
|
670
|
+
# For backward compatibility
|
|
671
|
+
__all__ = ["Papers"]
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
if __name__ == "__main__":
|
|
675
|
+
|
|
676
|
+
def main():
|
|
677
|
+
"""Demonstrate simplified Papers class."""
|
|
678
|
+
print("=" * 60)
|
|
679
|
+
print("Papers Class - Simplified Collection")
|
|
680
|
+
print("=" * 60)
|
|
681
|
+
|
|
682
|
+
# Create test papers
|
|
683
|
+
# Create sample papers with Pydantic structure
|
|
684
|
+
p1 = Paper()
|
|
685
|
+
p1.metadata.basic.title = "Paper 1"
|
|
686
|
+
p1.metadata.basic.year = 2023
|
|
687
|
+
p1.metadata.publication.journal = "Nature"
|
|
688
|
+
|
|
689
|
+
p2 = Paper()
|
|
690
|
+
p2.metadata.basic.title = "Paper 2"
|
|
691
|
+
p2.metadata.basic.year = 2024
|
|
692
|
+
p2.metadata.publication.journal = "Science"
|
|
693
|
+
|
|
694
|
+
p3 = Paper()
|
|
695
|
+
p3.metadata.basic.title = "Paper 3"
|
|
696
|
+
p3.metadata.basic.year = 2022
|
|
697
|
+
p3.metadata.publication.journal = "Cell"
|
|
698
|
+
|
|
699
|
+
papers = Papers([p1, p2, p3])
|
|
700
|
+
|
|
701
|
+
print(f"\n1. Collection: {papers}")
|
|
702
|
+
print(f" Count: {len(papers)}")
|
|
703
|
+
print(f" First: {papers[0].metadata.basic.title}")
|
|
704
|
+
|
|
705
|
+
# Test filtering
|
|
706
|
+
recent = papers.filter(lambda p: p.metadata.basic.year and p.metadata.basic.year >= 2023)
|
|
707
|
+
print(f"\n2. Filtered (year >= 2023): {len(recent)} papers")
|
|
708
|
+
|
|
709
|
+
# Test sorting
|
|
710
|
+
sorted_papers = papers.sort_by(lambda p: p.metadata.basic.year or 0)
|
|
711
|
+
print(f"\n3. Sorted by year:")
|
|
712
|
+
for p in sorted_papers:
|
|
713
|
+
print(f" {p.metadata.basic.year}: {p.metadata.basic.title}")
|
|
714
|
+
|
|
715
|
+
print("\n✅ Papers class simplified!")
|
|
716
|
+
print(" - Reduced from 39 to ~15 methods")
|
|
717
|
+
print(" - Business logic moved to Scholar")
|
|
718
|
+
print(" - Clean collection interface")
|
|
719
|
+
|
|
720
|
+
main()
|
|
721
|
+
|
|
722
|
+
# EOF
|