scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1018 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-08-22 23:01:42 (ywatanabe)"
|
|
4
|
+
# File: /home/ywatanabe/proj/SciTeX-Code/src/scitex/scholar/storage/_BibTeXHandler.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = __file__
|
|
9
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
10
|
+
# ----------------------------------------
|
|
11
|
+
import tempfile
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any, Dict, List, Optional, Union
|
|
14
|
+
|
|
15
|
+
from scitex import logging
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BibTeXHandler:
|
|
21
|
+
"""Handles BibTeX parsing and conversion to Paper objects."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, project: str = None, config=None):
|
|
24
|
+
self.name = self.__class__.__name__
|
|
25
|
+
self.project = project
|
|
26
|
+
self.config = config
|
|
27
|
+
|
|
28
|
+
def _extract_primitive(self, value):
|
|
29
|
+
"""Extract primitive value from DotDict or nested structure."""
|
|
30
|
+
from scitex.dict import DotDict
|
|
31
|
+
|
|
32
|
+
if value is None:
|
|
33
|
+
return None
|
|
34
|
+
if isinstance(value, DotDict):
|
|
35
|
+
# Convert DotDict to plain dict first
|
|
36
|
+
value = dict(value)
|
|
37
|
+
if isinstance(value, dict):
|
|
38
|
+
# For nested dict structures, return as-is
|
|
39
|
+
return value
|
|
40
|
+
# Return primitive types as-is
|
|
41
|
+
return value
|
|
42
|
+
|
|
43
|
+
def papers_from_bibtex(
|
|
44
|
+
self, bibtex_input: Union[str, Path]
|
|
45
|
+
) -> List["Paper"]:
|
|
46
|
+
"""Create Papers from BibTeX file or content."""
|
|
47
|
+
is_path = False
|
|
48
|
+
input_str = str(bibtex_input)
|
|
49
|
+
|
|
50
|
+
if len(input_str) < 500:
|
|
51
|
+
if (
|
|
52
|
+
input_str.endswith(".bib")
|
|
53
|
+
or input_str.endswith(".bibtex")
|
|
54
|
+
or "/" in input_str
|
|
55
|
+
or "\\" in input_str
|
|
56
|
+
or input_str.startswith("~")
|
|
57
|
+
or input_str.startswith(".")
|
|
58
|
+
or os.path.exists(os.path.expanduser(input_str))
|
|
59
|
+
):
|
|
60
|
+
is_path = True
|
|
61
|
+
|
|
62
|
+
if "\n@" in input_str or input_str.strip().startswith("@"):
|
|
63
|
+
is_path = False
|
|
64
|
+
|
|
65
|
+
if is_path:
|
|
66
|
+
return self._papers_from_bibtex_file(input_str)
|
|
67
|
+
else:
|
|
68
|
+
return self._papers_from_bibtex_text(input_str)
|
|
69
|
+
|
|
70
|
+
def _papers_from_bibtex_file(
|
|
71
|
+
self, file_path: Union[str, Path]
|
|
72
|
+
) -> List["Paper"]:
|
|
73
|
+
"""Create Papers from a BibTeX file."""
|
|
74
|
+
bibtex_path = Path(os.path.expanduser(str(file_path)))
|
|
75
|
+
if not bibtex_path.exists():
|
|
76
|
+
raise ValueError(f"BibTeX file not found: {bibtex_path}")
|
|
77
|
+
|
|
78
|
+
from scitex.io import load
|
|
79
|
+
|
|
80
|
+
entries = load(str(bibtex_path))
|
|
81
|
+
|
|
82
|
+
papers = []
|
|
83
|
+
for entry in entries:
|
|
84
|
+
paper = self.paper_from_bibtex_entry(entry)
|
|
85
|
+
if paper:
|
|
86
|
+
papers.append(paper)
|
|
87
|
+
|
|
88
|
+
logger.info(f"Created {len(papers)} papers from BibTeX file")
|
|
89
|
+
return papers
|
|
90
|
+
|
|
91
|
+
def _papers_from_bibtex_text(self, bibtex_content: str) -> List["Paper"]:
|
|
92
|
+
"""Create Papers from BibTeX content string."""
|
|
93
|
+
with tempfile.NamedTemporaryFile(
|
|
94
|
+
mode="w", suffix=".bib", delete=False
|
|
95
|
+
) as f:
|
|
96
|
+
f.write(bibtex_content)
|
|
97
|
+
temp_path = f.name
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
from scitex.io import load
|
|
101
|
+
|
|
102
|
+
entries = load(temp_path)
|
|
103
|
+
finally:
|
|
104
|
+
os.unlink(temp_path)
|
|
105
|
+
|
|
106
|
+
papers = []
|
|
107
|
+
for entry in entries:
|
|
108
|
+
paper = self.paper_from_bibtex_entry(entry)
|
|
109
|
+
if paper:
|
|
110
|
+
papers.append(paper)
|
|
111
|
+
|
|
112
|
+
logger.info(f"Created {len(papers)} papers from BibTeX text")
|
|
113
|
+
return papers
|
|
114
|
+
|
|
115
|
+
def paper_from_bibtex_entry(
|
|
116
|
+
self, entry: Dict[str, Any]
|
|
117
|
+
) -> Optional["Paper"]:
|
|
118
|
+
"""Convert BibTeX entry to Paper."""
|
|
119
|
+
from ..core.Paper import Paper
|
|
120
|
+
|
|
121
|
+
fields = entry.get("fields", {})
|
|
122
|
+
title = fields.get("title", "")
|
|
123
|
+
if not title:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
author_str = fields.get("author", "")
|
|
127
|
+
authors = []
|
|
128
|
+
if author_str:
|
|
129
|
+
authors = [a.strip() for a in author_str.split(" and ")]
|
|
130
|
+
|
|
131
|
+
basic_data = {
|
|
132
|
+
"title": title,
|
|
133
|
+
"title_source": "input",
|
|
134
|
+
"authors": authors,
|
|
135
|
+
"authors_source": "input" if authors else None,
|
|
136
|
+
"abstract": fields.get("abstract", ""),
|
|
137
|
+
"abstract_source": "input" if fields.get("abstract") else None,
|
|
138
|
+
"year": int(fields.get("year")) if fields.get("year") else None,
|
|
139
|
+
"year_source": "input" if fields.get("year") else None,
|
|
140
|
+
"keywords": (
|
|
141
|
+
fields.get("keywords", "").split(", ")
|
|
142
|
+
if fields.get("keywords")
|
|
143
|
+
else []
|
|
144
|
+
),
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
id_data = {
|
|
148
|
+
"doi": fields.get("doi"),
|
|
149
|
+
"doi_source": "input" if fields.get("doi") else None,
|
|
150
|
+
"pmid": fields.get("pmid"),
|
|
151
|
+
"pmid_source": "input" if fields.get("pmid") else None,
|
|
152
|
+
"arxiv_id": fields.get("eprint"),
|
|
153
|
+
"arxiv_id_source": "input" if fields.get("eprint") else None,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
publication_data = {
|
|
157
|
+
"journal": fields.get("journal"),
|
|
158
|
+
"journal_source": "input" if fields.get("journal") else None,
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
# Parse citation count
|
|
162
|
+
citation_count_data = None
|
|
163
|
+
if "citation_count" in fields:
|
|
164
|
+
try:
|
|
165
|
+
# Try parsing as JSON first (for enriched BibTeX files)
|
|
166
|
+
import json
|
|
167
|
+
cc_raw = fields["citation_count"]
|
|
168
|
+
if isinstance(cc_raw, str) and cc_raw.strip().startswith("{"):
|
|
169
|
+
citation_count_data = json.loads(cc_raw)
|
|
170
|
+
# Add source if not present
|
|
171
|
+
if "total_source" not in citation_count_data:
|
|
172
|
+
citation_count_data["total_source"] = "input"
|
|
173
|
+
else:
|
|
174
|
+
# Simple integer format
|
|
175
|
+
citation_count_data = {
|
|
176
|
+
"total": int(cc_raw),
|
|
177
|
+
"total_source": "input"
|
|
178
|
+
}
|
|
179
|
+
except (ValueError, TypeError, json.JSONDecodeError):
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
url_data = {
|
|
183
|
+
"pdf": fields.get("url"),
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
# Create Paper with Pydantic structure
|
|
187
|
+
paper = Paper()
|
|
188
|
+
|
|
189
|
+
# Set basic metadata
|
|
190
|
+
paper.metadata.basic.title = basic_data.get("title", "")
|
|
191
|
+
paper.metadata.basic.authors = basic_data.get("authors")
|
|
192
|
+
paper.metadata.basic.abstract = basic_data.get("abstract")
|
|
193
|
+
paper.metadata.basic.year = basic_data.get("year")
|
|
194
|
+
paper.metadata.basic.keywords = basic_data.get("keywords")
|
|
195
|
+
|
|
196
|
+
# Set ID metadata
|
|
197
|
+
if id_data.get("doi"):
|
|
198
|
+
paper.metadata.set_doi(id_data["doi"])
|
|
199
|
+
paper.metadata.id.pmid = id_data.get("pmid")
|
|
200
|
+
paper.metadata.id.arxiv_id = id_data.get("arxiv_id")
|
|
201
|
+
|
|
202
|
+
# Set publication metadata
|
|
203
|
+
paper.metadata.publication.journal = publication_data.get("journal")
|
|
204
|
+
paper.metadata.publication.volume = publication_data.get("volume")
|
|
205
|
+
paper.metadata.publication.issue = publication_data.get("issue")
|
|
206
|
+
paper.metadata.publication.publisher = publication_data.get("publisher")
|
|
207
|
+
|
|
208
|
+
# Set citation count
|
|
209
|
+
if citation_count_data and citation_count_data.get("total") is not None:
|
|
210
|
+
paper.metadata.citation_count.total = citation_count_data["total"]
|
|
211
|
+
|
|
212
|
+
# Set impact factor
|
|
213
|
+
if "journal_impact_factor" in fields:
|
|
214
|
+
impact_str = str(fields["journal_impact_factor"])
|
|
215
|
+
if impact_str.replace(".", "").isdigit():
|
|
216
|
+
paper.metadata.publication.impact_factor = float(impact_str)
|
|
217
|
+
|
|
218
|
+
# Set URL metadata
|
|
219
|
+
if url_data.get("pdf"):
|
|
220
|
+
paper.metadata.url.pdfs.append({"url": url_data["pdf"], "source": "bibtex"})
|
|
221
|
+
|
|
222
|
+
# Set container metadata
|
|
223
|
+
paper.container.projects = [self.project] if self.project else []
|
|
224
|
+
|
|
225
|
+
# Set BibTeX metadata as special fields
|
|
226
|
+
paper._original_bibtex_fields = fields.copy()
|
|
227
|
+
paper._bibtex_entry_type = entry.get("entry_type", "misc")
|
|
228
|
+
paper._bibtex_key = entry.get("key", "")
|
|
229
|
+
|
|
230
|
+
self._handle_enriched_metadata(paper, fields)
|
|
231
|
+
|
|
232
|
+
return paper
|
|
233
|
+
|
|
234
|
+
def _handle_enriched_metadata(
|
|
235
|
+
self, paper: "Paper", fields: Dict[str, Any]
|
|
236
|
+
) -> None:
|
|
237
|
+
"""Handle enriched metadata from BibTeX fields."""
|
|
238
|
+
if "citation_count" in fields:
|
|
239
|
+
try:
|
|
240
|
+
citation_str = str(fields["citation_count"]).replace(",", "")
|
|
241
|
+
paper.citation_count.total = int(citation_str)
|
|
242
|
+
paper.citation_count.total_engines = fields.get(
|
|
243
|
+
"citation_count_source", "bibtex"
|
|
244
|
+
)
|
|
245
|
+
except (ValueError, AttributeError):
|
|
246
|
+
pass
|
|
247
|
+
|
|
248
|
+
for field_name in fields:
|
|
249
|
+
if "impact_factor" in field_name and "JCR" in field_name:
|
|
250
|
+
try:
|
|
251
|
+
paper.publication.impact_factor = float(fields[field_name])
|
|
252
|
+
paper.publication.impact_factor_engines = fields.get(
|
|
253
|
+
"impact_factor_source", "bibtex"
|
|
254
|
+
)
|
|
255
|
+
break
|
|
256
|
+
except (ValueError, AttributeError):
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
for field_name in fields:
|
|
260
|
+
if "quartile" in field_name and "JCR" in field_name:
|
|
261
|
+
try:
|
|
262
|
+
# Store in system or publication section
|
|
263
|
+
paper.publication["journal_quartile"] = fields[field_name]
|
|
264
|
+
break
|
|
265
|
+
except AttributeError:
|
|
266
|
+
pass
|
|
267
|
+
|
|
268
|
+
if "volume" in fields:
|
|
269
|
+
try:
|
|
270
|
+
paper.publication.volume = fields["volume"]
|
|
271
|
+
except AttributeError:
|
|
272
|
+
pass
|
|
273
|
+
if "pages" in fields:
|
|
274
|
+
try:
|
|
275
|
+
# Split pages into first_page and last_page
|
|
276
|
+
pages = fields["pages"]
|
|
277
|
+
if pages and "-" in str(pages):
|
|
278
|
+
first, last = str(pages).split("-", 1)
|
|
279
|
+
paper.publication.first_page = first.strip()
|
|
280
|
+
paper.publication.last_page = last.strip()
|
|
281
|
+
else:
|
|
282
|
+
paper.publication.first_page = pages
|
|
283
|
+
except AttributeError:
|
|
284
|
+
pass
|
|
285
|
+
|
|
286
|
+
def paper_to_bibtex_entry(self, paper: "Paper") -> Dict[str, Any]:
|
|
287
|
+
"""Convert a Paper object to a BibTeX entry dictionary."""
|
|
288
|
+
# Create entry type based on available data
|
|
289
|
+
entry_type = getattr(paper, "_bibtex_entry_type", "misc")
|
|
290
|
+
if paper.metadata.publication.journal:
|
|
291
|
+
entry_type = "article"
|
|
292
|
+
elif hasattr(paper, "booktitle") and paper.booktitle:
|
|
293
|
+
entry_type = "inproceedings"
|
|
294
|
+
|
|
295
|
+
# Create a unique key from authors and year
|
|
296
|
+
authors = paper.metadata.basic.authors
|
|
297
|
+
first_author = authors[0].split()[-1] if authors else "Unknown"
|
|
298
|
+
year = paper.metadata.basic.year or "NoYear"
|
|
299
|
+
key = getattr(paper, "_bibtex_key", f"{first_author}-{year}")
|
|
300
|
+
|
|
301
|
+
# Build fields dictionary with all available data
|
|
302
|
+
fields = {}
|
|
303
|
+
|
|
304
|
+
# Basic fields
|
|
305
|
+
if paper.metadata.basic.title:
|
|
306
|
+
fields["title"] = paper.metadata.basic.title
|
|
307
|
+
if paper.metadata.basic.authors:
|
|
308
|
+
fields["author"] = " and ".join(paper.metadata.basic.authors)
|
|
309
|
+
if paper.metadata.basic.year:
|
|
310
|
+
fields["year"] = str(paper.metadata.basic.year)
|
|
311
|
+
if paper.metadata.basic.abstract:
|
|
312
|
+
fields["abstract"] = paper.metadata.basic.abstract
|
|
313
|
+
if paper.metadata.basic.keywords:
|
|
314
|
+
fields["keywords"] = ", ".join(paper.metadata.basic.keywords)
|
|
315
|
+
|
|
316
|
+
# Identifiers
|
|
317
|
+
if paper.metadata.id.doi:
|
|
318
|
+
fields["doi"] = paper.metadata.id.doi
|
|
319
|
+
if paper.metadata.id.pmid:
|
|
320
|
+
fields["pmid"] = paper.metadata.id.pmid
|
|
321
|
+
if paper.metadata.id.arxiv_id:
|
|
322
|
+
fields["eprint"] = paper.metadata.id.arxiv_id
|
|
323
|
+
|
|
324
|
+
# Publication info
|
|
325
|
+
if paper.metadata.publication.journal:
|
|
326
|
+
fields["journal"] = paper.metadata.publication.journal
|
|
327
|
+
if paper.metadata.publication.volume:
|
|
328
|
+
fields["volume"] = paper.metadata.publication.volume
|
|
329
|
+
if paper.metadata.publication.pages:
|
|
330
|
+
fields["pages"] = paper.metadata.publication.pages
|
|
331
|
+
|
|
332
|
+
# Metrics
|
|
333
|
+
citation_count_val = paper.metadata.citation_count.total
|
|
334
|
+
if citation_count_val is not None and citation_count_val != 0:
|
|
335
|
+
fields["citation_count"] = str(int(citation_count_val))
|
|
336
|
+
|
|
337
|
+
impact_factor_val = paper.metadata.publication.impact_factor
|
|
338
|
+
if impact_factor_val is not None:
|
|
339
|
+
fields["journal_impact_factor"] = str(impact_factor_val)
|
|
340
|
+
|
|
341
|
+
# URLs
|
|
342
|
+
if paper.metadata.url.pdfs and len(paper.metadata.url.pdfs) > 0:
|
|
343
|
+
# Use the first PDF URL
|
|
344
|
+
pdf_url = paper.metadata.url.pdfs[0].get("url")
|
|
345
|
+
if pdf_url:
|
|
346
|
+
fields["url"] = pdf_url if isinstance(pdf_url, str) else str(pdf_url)
|
|
347
|
+
|
|
348
|
+
# Include original BibTeX fields if they exist
|
|
349
|
+
if hasattr(paper, "_original_bibtex_fields"):
|
|
350
|
+
for k, v in paper._original_bibtex_fields.items():
|
|
351
|
+
if k not in fields: # Don't override updated fields
|
|
352
|
+
fields[k] = v
|
|
353
|
+
|
|
354
|
+
return {
|
|
355
|
+
"entry_type": entry_type,
|
|
356
|
+
"key": key,
|
|
357
|
+
"fields": fields
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
def papers_to_bibtex(
|
|
361
|
+
self, papers: Union[List["Paper"], "Papers"], output_path: Optional[Union[str, Path]] = None
|
|
362
|
+
) -> str:
|
|
363
|
+
"""Convert Papers collection to BibTeX format.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
papers: Papers object or list of Paper objects
|
|
367
|
+
output_path: Optional path to save the BibTeX file
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
BibTeX content as string
|
|
371
|
+
"""
|
|
372
|
+
# Handle Papers object
|
|
373
|
+
if hasattr(papers, "papers"):
|
|
374
|
+
paper_list = papers.papers
|
|
375
|
+
else:
|
|
376
|
+
paper_list = papers
|
|
377
|
+
|
|
378
|
+
# Convert each paper to BibTeX entry
|
|
379
|
+
entries = []
|
|
380
|
+
for paper in paper_list:
|
|
381
|
+
entry = self.paper_to_bibtex_entry(paper)
|
|
382
|
+
entries.append(entry)
|
|
383
|
+
|
|
384
|
+
# Generate BibTeX content
|
|
385
|
+
bibtex_lines = []
|
|
386
|
+
for entry in entries:
|
|
387
|
+
entry_type = entry["entry_type"]
|
|
388
|
+
key = entry["key"]
|
|
389
|
+
fields = entry["fields"]
|
|
390
|
+
|
|
391
|
+
bibtex_lines.append(f"@{entry_type}{{{key},")
|
|
392
|
+
for field, value in fields.items():
|
|
393
|
+
# Escape special characters in BibTeX
|
|
394
|
+
value = str(value).replace("{", "\\{").replace("}", "\\}")
|
|
395
|
+
bibtex_lines.append(f" {field} = {{{value}}},")
|
|
396
|
+
bibtex_lines.append("}\n")
|
|
397
|
+
|
|
398
|
+
bibtex_content = "\n".join(bibtex_lines)
|
|
399
|
+
|
|
400
|
+
# Save to file if path provided
|
|
401
|
+
if output_path:
|
|
402
|
+
output_path = Path(output_path)
|
|
403
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
404
|
+
output_path.write_text(bibtex_content)
|
|
405
|
+
logger.success(f"Saved BibTeX to {output_path}")
|
|
406
|
+
|
|
407
|
+
return bibtex_content
|
|
408
|
+
|
|
409
|
+
def merge_bibtex_files(
|
|
410
|
+
self,
|
|
411
|
+
file_paths: List[Union[str, Path]],
|
|
412
|
+
output_path: Optional[Union[str, Path]] = None,
|
|
413
|
+
dedup_strategy: str = "smart",
|
|
414
|
+
return_details: bool = False
|
|
415
|
+
) -> Union["Papers", Dict[str, Any]]:
|
|
416
|
+
"""Merge multiple BibTeX files intelligently handling duplicates.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
file_paths: List of BibTeX files to merge
|
|
420
|
+
output_path: Optional path to save merged BibTeX
|
|
421
|
+
dedup_strategy: 'smart' (merge metadata), 'keep_first', 'keep_all'
|
|
422
|
+
return_details: If True, return dict with papers and metadata
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
Merged Papers collection, or dict with 'papers', 'file_papers', 'stats'
|
|
426
|
+
"""
|
|
427
|
+
from ..core.Papers import Papers
|
|
428
|
+
|
|
429
|
+
all_papers = []
|
|
430
|
+
file_papers = {} # Track which papers came from which file
|
|
431
|
+
duplicate_stats = {
|
|
432
|
+
'total_input': 0,
|
|
433
|
+
'duplicates_found': 0,
|
|
434
|
+
'duplicates_merged': 0,
|
|
435
|
+
'unique_papers': 0,
|
|
436
|
+
'files_processed': []
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
# Load all papers from files
|
|
440
|
+
for file_path in file_paths:
|
|
441
|
+
file_path = Path(file_path)
|
|
442
|
+
try:
|
|
443
|
+
papers = self.papers_from_bibtex(file_path)
|
|
444
|
+
all_papers.extend(papers)
|
|
445
|
+
file_papers[file_path.stem] = papers # Store papers by source file
|
|
446
|
+
duplicate_stats['total_input'] += len(papers)
|
|
447
|
+
duplicate_stats['files_processed'].append(file_path)
|
|
448
|
+
logger.info(f"Loaded {len(papers)} papers from {file_path}")
|
|
449
|
+
except Exception as e:
|
|
450
|
+
logger.warning(f"Failed to load {file_path}: {e}")
|
|
451
|
+
|
|
452
|
+
if dedup_strategy == "keep_all":
|
|
453
|
+
merged_papers = Papers(all_papers)
|
|
454
|
+
else:
|
|
455
|
+
# Deduplicate papers
|
|
456
|
+
unique_papers = self._deduplicate_papers(
|
|
457
|
+
all_papers,
|
|
458
|
+
strategy=dedup_strategy,
|
|
459
|
+
stats=duplicate_stats
|
|
460
|
+
)
|
|
461
|
+
merged_papers = Papers(unique_papers)
|
|
462
|
+
|
|
463
|
+
# Save if output path provided
|
|
464
|
+
if output_path:
|
|
465
|
+
self.papers_to_bibtex_with_sources(
|
|
466
|
+
merged_papers,
|
|
467
|
+
output_path,
|
|
468
|
+
source_files=duplicate_stats['files_processed'],
|
|
469
|
+
file_papers=file_papers,
|
|
470
|
+
stats=duplicate_stats
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
# Log statistics
|
|
474
|
+
logger.info(f"Merge complete: {duplicate_stats['unique_papers']} unique papers "
|
|
475
|
+
f"from {duplicate_stats['total_input']} total "
|
|
476
|
+
f"({duplicate_stats['duplicates_found']} duplicates)")
|
|
477
|
+
|
|
478
|
+
if return_details:
|
|
479
|
+
return {
|
|
480
|
+
"papers": merged_papers,
|
|
481
|
+
"file_papers": file_papers,
|
|
482
|
+
"stats": duplicate_stats
|
|
483
|
+
}
|
|
484
|
+
else:
|
|
485
|
+
return merged_papers
|
|
486
|
+
|
|
487
|
+
def _deduplicate_papers(
|
|
488
|
+
self,
|
|
489
|
+
papers: List["Paper"],
|
|
490
|
+
strategy: str = "smart",
|
|
491
|
+
stats: Optional[Dict] = None
|
|
492
|
+
) -> List["Paper"]:
|
|
493
|
+
"""Deduplicate a list of papers based on strategy.
|
|
494
|
+
|
|
495
|
+
Args:
|
|
496
|
+
papers: List of Paper objects
|
|
497
|
+
strategy: 'smart' or 'keep_first'
|
|
498
|
+
stats: Optional dict to track statistics
|
|
499
|
+
|
|
500
|
+
Returns:
|
|
501
|
+
List of unique papers
|
|
502
|
+
"""
|
|
503
|
+
if not stats:
|
|
504
|
+
stats = {'duplicates_found': 0, 'duplicates_merged': 0}
|
|
505
|
+
|
|
506
|
+
unique_papers = []
|
|
507
|
+
paper_index = {} # Track papers by DOI and title
|
|
508
|
+
|
|
509
|
+
for paper in papers:
|
|
510
|
+
# Create keys for indexing
|
|
511
|
+
doi = paper.metadata.id.doi
|
|
512
|
+
doi_key = doi.lower() if doi else None
|
|
513
|
+
title = paper.metadata.basic.title
|
|
514
|
+
title_key = self._normalize_title(title) if title else None
|
|
515
|
+
|
|
516
|
+
is_duplicate = False
|
|
517
|
+
merge_with = None
|
|
518
|
+
|
|
519
|
+
# Check by DOI first (most reliable)
|
|
520
|
+
if doi_key and doi_key in paper_index:
|
|
521
|
+
is_duplicate = True
|
|
522
|
+
merge_with = paper_index[doi_key]
|
|
523
|
+
|
|
524
|
+
# Check by title if no DOI match
|
|
525
|
+
elif title_key and title_key in paper_index:
|
|
526
|
+
existing = paper_index[title_key]
|
|
527
|
+
if self._are_same_paper(existing, paper):
|
|
528
|
+
is_duplicate = True
|
|
529
|
+
merge_with = existing
|
|
530
|
+
|
|
531
|
+
if is_duplicate and merge_with:
|
|
532
|
+
stats['duplicates_found'] += 1
|
|
533
|
+
|
|
534
|
+
if strategy == "smart":
|
|
535
|
+
# Merge metadata from both papers
|
|
536
|
+
merged = self._merge_paper_metadata(merge_with, paper)
|
|
537
|
+
# Update the paper in our list
|
|
538
|
+
idx = unique_papers.index(merge_with)
|
|
539
|
+
unique_papers[idx] = merged
|
|
540
|
+
# Update index
|
|
541
|
+
if doi_key:
|
|
542
|
+
paper_index[doi_key] = merged
|
|
543
|
+
if title_key:
|
|
544
|
+
paper_index[title_key] = merged
|
|
545
|
+
stats['duplicates_merged'] += 1
|
|
546
|
+
# else: keep_first - do nothing
|
|
547
|
+
|
|
548
|
+
else:
|
|
549
|
+
# New unique paper
|
|
550
|
+
unique_papers.append(paper)
|
|
551
|
+
if doi_key:
|
|
552
|
+
paper_index[doi_key] = paper
|
|
553
|
+
if title_key:
|
|
554
|
+
paper_index[title_key] = paper
|
|
555
|
+
|
|
556
|
+
stats['unique_papers'] = len(unique_papers)
|
|
557
|
+
return unique_papers
|
|
558
|
+
|
|
559
|
+
def _normalize_title(self, title: str) -> str:
|
|
560
|
+
"""Normalize title for comparison."""
|
|
561
|
+
if not title:
|
|
562
|
+
return ""
|
|
563
|
+
# Remove punctuation, lowercase, collapse whitespace
|
|
564
|
+
import re
|
|
565
|
+
normalized = re.sub(r'[^\w\s]', '', title.lower())
|
|
566
|
+
normalized = ' '.join(normalized.split())
|
|
567
|
+
return normalized
|
|
568
|
+
|
|
569
|
+
def _are_same_paper(self, paper1: "Paper", paper2: "Paper") -> bool:
|
|
570
|
+
"""Determine if two papers are the same based on metadata."""
|
|
571
|
+
# If both have DOIs and they match
|
|
572
|
+
doi1 = paper1.metadata.id.doi
|
|
573
|
+
doi2 = paper2.metadata.id.doi
|
|
574
|
+
if doi1 and doi2:
|
|
575
|
+
return doi1.lower() == doi2.lower()
|
|
576
|
+
|
|
577
|
+
# Check title similarity
|
|
578
|
+
title1_raw = paper1.metadata.basic.title
|
|
579
|
+
title2_raw = paper2.metadata.basic.title
|
|
580
|
+
if title1_raw and title2_raw:
|
|
581
|
+
title1 = self._normalize_title(title1_raw)
|
|
582
|
+
title2 = self._normalize_title(title2_raw)
|
|
583
|
+
|
|
584
|
+
if title1 == title2:
|
|
585
|
+
# Check year (allow 1 year difference for online vs print)
|
|
586
|
+
year1 = paper1.metadata.basic.year
|
|
587
|
+
year2 = paper2.metadata.basic.year
|
|
588
|
+
if year1 and year2:
|
|
589
|
+
if abs(year1 - year2) <= 1:
|
|
590
|
+
return True
|
|
591
|
+
else:
|
|
592
|
+
# No year to compare, assume same if title matches
|
|
593
|
+
return True
|
|
594
|
+
|
|
595
|
+
return False
|
|
596
|
+
|
|
597
|
+
def _merge_paper_metadata(self, paper1: "Paper", paper2: "Paper") -> "Paper":
|
|
598
|
+
"""Merge metadata from two papers, keeping the most complete information."""
|
|
599
|
+
from copy import deepcopy
|
|
600
|
+
|
|
601
|
+
# Calculate completeness score for each paper
|
|
602
|
+
score1 = sum([
|
|
603
|
+
1 for field in [
|
|
604
|
+
paper1.metadata.id.doi, paper1.metadata.basic.abstract,
|
|
605
|
+
paper1.metadata.publication.journal,
|
|
606
|
+
paper1.metadata.citation_count.total,
|
|
607
|
+
paper1.metadata.url.pdfs, paper1.metadata.basic.authors
|
|
608
|
+
] if field
|
|
609
|
+
])
|
|
610
|
+
score2 = sum([
|
|
611
|
+
1 for field in [
|
|
612
|
+
paper2.metadata.id.doi, paper2.metadata.basic.abstract,
|
|
613
|
+
paper2.metadata.publication.journal,
|
|
614
|
+
paper2.metadata.citation_count.total,
|
|
615
|
+
paper2.metadata.url.pdfs, paper2.metadata.basic.authors
|
|
616
|
+
] if field
|
|
617
|
+
])
|
|
618
|
+
|
|
619
|
+
# Start with the more complete paper
|
|
620
|
+
if score1 >= score2:
|
|
621
|
+
merged = deepcopy(paper1)
|
|
622
|
+
donor = paper2
|
|
623
|
+
else:
|
|
624
|
+
merged = deepcopy(paper2)
|
|
625
|
+
donor = paper1
|
|
626
|
+
|
|
627
|
+
# Fill in missing fields from donor
|
|
628
|
+
if not merged.metadata.id.doi and donor.metadata.id.doi:
|
|
629
|
+
merged.metadata.set_doi(donor.metadata.id.doi)
|
|
630
|
+
if not merged.metadata.basic.abstract and donor.metadata.basic.abstract:
|
|
631
|
+
merged.metadata.basic.abstract = donor.metadata.basic.abstract
|
|
632
|
+
if not merged.metadata.publication.journal and donor.metadata.publication.journal:
|
|
633
|
+
merged.metadata.publication.journal = donor.metadata.publication.journal
|
|
634
|
+
if not merged.metadata.publication.publisher and donor.metadata.publication.publisher:
|
|
635
|
+
merged.metadata.publication.publisher = donor.metadata.publication.publisher
|
|
636
|
+
if not merged.metadata.publication.volume and donor.metadata.publication.volume:
|
|
637
|
+
merged.metadata.publication.volume = donor.metadata.publication.volume
|
|
638
|
+
if not merged.metadata.publication.issue and donor.metadata.publication.issue:
|
|
639
|
+
merged.metadata.publication.issue = donor.metadata.publication.issue
|
|
640
|
+
if not merged.metadata.publication.pages and donor.metadata.publication.pages:
|
|
641
|
+
merged.metadata.publication.pages = donor.metadata.publication.pages
|
|
642
|
+
# Merge PDF URLs (union)
|
|
643
|
+
for donor_pdf in donor.metadata.url.pdfs:
|
|
644
|
+
if not any(p.get("url") == donor_pdf.get("url") for p in merged.metadata.url.pdfs):
|
|
645
|
+
merged.metadata.url.pdfs.append(donor_pdf)
|
|
646
|
+
if not merged.metadata.url.publisher and donor.metadata.url.publisher:
|
|
647
|
+
merged.metadata.url.publisher = donor.metadata.url.publisher
|
|
648
|
+
|
|
649
|
+
# Take maximum citation count
|
|
650
|
+
donor_cc = donor.metadata.citation_count.total or 0
|
|
651
|
+
merged_cc = merged.metadata.citation_count.total or 0
|
|
652
|
+
|
|
653
|
+
if donor_cc > merged_cc:
|
|
654
|
+
merged.metadata.citation_count.total = donor_cc
|
|
655
|
+
|
|
656
|
+
# Merge authors (union, preserving order)
|
|
657
|
+
if donor.metadata.basic.authors and not merged.metadata.basic.authors:
|
|
658
|
+
merged.metadata.basic.authors = donor.metadata.basic.authors
|
|
659
|
+
elif donor.metadata.basic.authors and merged.metadata.basic.authors:
|
|
660
|
+
# Add unique authors from donor
|
|
661
|
+
for author in donor.metadata.basic.authors:
|
|
662
|
+
if author not in merged.metadata.basic.authors:
|
|
663
|
+
merged.metadata.basic.authors.append(author)
|
|
664
|
+
|
|
665
|
+
# Merge keywords (union)
|
|
666
|
+
donor_keywords = donor.metadata.basic.keywords
|
|
667
|
+
merged_keywords = merged.metadata.basic.keywords
|
|
668
|
+
if donor_keywords:
|
|
669
|
+
if merged_keywords:
|
|
670
|
+
all_keywords = list(set(merged_keywords + donor_keywords))
|
|
671
|
+
merged.metadata.basic.keywords = sorted(all_keywords)
|
|
672
|
+
else:
|
|
673
|
+
merged.metadata.basic.keywords = donor_keywords
|
|
674
|
+
|
|
675
|
+
return merged
|
|
676
|
+
|
|
677
|
+
def papers_to_bibtex_with_sources(
|
|
678
|
+
self,
|
|
679
|
+
papers: Union[List["Paper"], "Papers"],
|
|
680
|
+
output_path: Union[str, Path],
|
|
681
|
+
source_files: List[Path] = None,
|
|
682
|
+
file_papers: Dict[str, List["Paper"]] = None,
|
|
683
|
+
stats: Dict = None
|
|
684
|
+
) -> str:
|
|
685
|
+
"""Save papers to BibTeX with source file comments and SciTeX header.
|
|
686
|
+
|
|
687
|
+
Args:
|
|
688
|
+
papers: Papers collection to save
|
|
689
|
+
output_path: Path to save the BibTeX file
|
|
690
|
+
source_files: List of source file paths
|
|
691
|
+
file_papers: Dict mapping source file names to their papers
|
|
692
|
+
stats: Merge statistics
|
|
693
|
+
|
|
694
|
+
Returns:
|
|
695
|
+
BibTeX content as string
|
|
696
|
+
"""
|
|
697
|
+
from datetime import datetime
|
|
698
|
+
|
|
699
|
+
# Handle Papers object
|
|
700
|
+
if hasattr(papers, "papers"):
|
|
701
|
+
paper_list = papers.papers
|
|
702
|
+
else:
|
|
703
|
+
paper_list = papers
|
|
704
|
+
|
|
705
|
+
output_path = Path(output_path)
|
|
706
|
+
|
|
707
|
+
# Generate header
|
|
708
|
+
bibtex_lines = []
|
|
709
|
+
bibtex_lines.append("% ============================================================")
|
|
710
|
+
bibtex_lines.append("% SciTeX Scholar - Merged BibTeX File")
|
|
711
|
+
bibtex_lines.append(f"% Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
712
|
+
bibtex_lines.append("% Author: Yusuke Watanabe (ywatanabe@scitex.ai)")
|
|
713
|
+
bibtex_lines.append("% ============================================================")
|
|
714
|
+
|
|
715
|
+
if source_files:
|
|
716
|
+
bibtex_lines.append("%")
|
|
717
|
+
bibtex_lines.append("% Source Files:")
|
|
718
|
+
for i, source_file in enumerate(source_files, 1):
|
|
719
|
+
bibtex_lines.append(f"% {i}. {source_file.name}")
|
|
720
|
+
|
|
721
|
+
if stats:
|
|
722
|
+
bibtex_lines.append("%")
|
|
723
|
+
bibtex_lines.append("% Merge Statistics:")
|
|
724
|
+
bibtex_lines.append(f"% Total entries loaded: {stats.get('total_input', 0)}")
|
|
725
|
+
bibtex_lines.append(f"% Unique entries: {stats.get('unique_papers', len(paper_list))}")
|
|
726
|
+
bibtex_lines.append(f"% Duplicates found: {stats.get('duplicates_found', 0)}")
|
|
727
|
+
if stats.get('duplicates_merged'):
|
|
728
|
+
bibtex_lines.append(f"% Duplicates merged: {stats['duplicates_merged']}")
|
|
729
|
+
|
|
730
|
+
bibtex_lines.append("% ============================================================")
|
|
731
|
+
bibtex_lines.append("")
|
|
732
|
+
|
|
733
|
+
# Group papers by source file if available
|
|
734
|
+
if file_papers:
|
|
735
|
+
for source_name, source_papers in file_papers.items():
|
|
736
|
+
# Add section comment
|
|
737
|
+
bibtex_lines.append("")
|
|
738
|
+
bibtex_lines.append(f"% ============================================================")
|
|
739
|
+
bibtex_lines.append(f"% Source: {source_name}.bib")
|
|
740
|
+
bibtex_lines.append(f"% Entries: {len(source_papers)}")
|
|
741
|
+
bibtex_lines.append(f"% ============================================================")
|
|
742
|
+
bibtex_lines.append("")
|
|
743
|
+
|
|
744
|
+
# Add papers from this source
|
|
745
|
+
source_paper_set = set(
|
|
746
|
+
p.metadata.basic.title
|
|
747
|
+
for p in source_papers
|
|
748
|
+
if p.metadata.basic.title
|
|
749
|
+
)
|
|
750
|
+
for paper in paper_list:
|
|
751
|
+
title = paper.metadata.basic.title
|
|
752
|
+
if title and title in source_paper_set:
|
|
753
|
+
entry = self.paper_to_bibtex_entry(paper)
|
|
754
|
+
bibtex_lines.append(self._format_bibtex_entry(entry))
|
|
755
|
+
# Remove from set to avoid duplicates
|
|
756
|
+
source_paper_set.discard(title)
|
|
757
|
+
|
|
758
|
+
# Add any papers not assigned to a source (e.g., merged duplicates)
|
|
759
|
+
all_source_titles = set()
|
|
760
|
+
for source_papers in file_papers.values():
|
|
761
|
+
all_source_titles.update(p.title for p in source_papers if p.title)
|
|
762
|
+
|
|
763
|
+
unassigned = [p for p in paper_list if not p.title or p.title not in all_source_titles]
|
|
764
|
+
if unassigned:
|
|
765
|
+
bibtex_lines.append("")
|
|
766
|
+
bibtex_lines.append(f"% ============================================================")
|
|
767
|
+
bibtex_lines.append(f"% Merged/Unassigned Entries")
|
|
768
|
+
bibtex_lines.append(f"% Entries: {len(unassigned)}")
|
|
769
|
+
bibtex_lines.append(f"% ============================================================")
|
|
770
|
+
bibtex_lines.append("")
|
|
771
|
+
for paper in unassigned:
|
|
772
|
+
entry = self.paper_to_bibtex_entry(paper)
|
|
773
|
+
bibtex_lines.append(self._format_bibtex_entry(entry))
|
|
774
|
+
else:
|
|
775
|
+
# No source tracking, just convert all papers
|
|
776
|
+
for paper in paper_list:
|
|
777
|
+
entry = self.paper_to_bibtex_entry(paper)
|
|
778
|
+
bibtex_lines.append(self._format_bibtex_entry(entry))
|
|
779
|
+
|
|
780
|
+
bibtex_content = "\n".join(bibtex_lines)
|
|
781
|
+
|
|
782
|
+
# Save to file
|
|
783
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
784
|
+
output_path.write_text(bibtex_content)
|
|
785
|
+
logger.success(f"Saved merged BibTeX to {output_path}")
|
|
786
|
+
|
|
787
|
+
return bibtex_content
|
|
788
|
+
|
|
789
|
+
def _format_bibtex_entry(self, entry: Dict) -> str:
|
|
790
|
+
"""Format a single BibTeX entry."""
|
|
791
|
+
lines = []
|
|
792
|
+
entry_type = entry["entry_type"]
|
|
793
|
+
key = entry["key"]
|
|
794
|
+
fields = entry["fields"]
|
|
795
|
+
|
|
796
|
+
lines.append(f"@{entry_type}{{{key},")
|
|
797
|
+
for field, value in fields.items():
|
|
798
|
+
# Escape special characters in BibTeX
|
|
799
|
+
value = str(value).replace("{", "\\{").replace("}", "\\}")
|
|
800
|
+
lines.append(f" {field} = {{{value}}},")
|
|
801
|
+
lines.append("}\n")
|
|
802
|
+
|
|
803
|
+
return "\n".join(lines)
|
|
804
|
+
|
|
805
|
+
# =========================================================================
|
|
806
|
+
# Bibliography Directory Management
|
|
807
|
+
# =========================================================================
|
|
808
|
+
|
|
809
|
+
def setup_project_bibliography(
|
|
810
|
+
self,
|
|
811
|
+
project: str,
|
|
812
|
+
bibtex_files: Optional[List[Union[str, Path]]] = None,
|
|
813
|
+
) -> Path:
|
|
814
|
+
"""Setup info/bibliography directory structure for a project.
|
|
815
|
+
|
|
816
|
+
Creates:
|
|
817
|
+
- info/bibliography/
|
|
818
|
+
- info/bibliography/*.bib (symlinks to source files)
|
|
819
|
+
- info/bibliography/combined.bib (merged unique entries)
|
|
820
|
+
- info/{project}.bib -> bibliography/combined.bib
|
|
821
|
+
|
|
822
|
+
Args:
|
|
823
|
+
project: Project name
|
|
824
|
+
bibtex_files: Optional list of BibTeX files to include
|
|
825
|
+
|
|
826
|
+
Returns:
|
|
827
|
+
Path to combined.bib file
|
|
828
|
+
"""
|
|
829
|
+
if not self.config:
|
|
830
|
+
raise ValueError("Config required for project bibliography management")
|
|
831
|
+
|
|
832
|
+
# Get project directory
|
|
833
|
+
project_dir = self.config.path_manager.get_library_project_dir(project)
|
|
834
|
+
bib_dir = project_dir / "info" / "bibliography"
|
|
835
|
+
bib_dir.mkdir(parents=True, exist_ok=True)
|
|
836
|
+
|
|
837
|
+
logger.info(f"Setting up bibliography for project: {project}")
|
|
838
|
+
|
|
839
|
+
# Link provided BibTeX files
|
|
840
|
+
if bibtex_files:
|
|
841
|
+
for bib_file in bibtex_files:
|
|
842
|
+
bib_file = Path(bib_file)
|
|
843
|
+
if bib_file.exists():
|
|
844
|
+
link_name = bib_dir / f"{bib_file.stem}.bib"
|
|
845
|
+
if not link_name.exists():
|
|
846
|
+
link_name.symlink_to(bib_file.absolute())
|
|
847
|
+
logger.info(f"Linked: {link_name.name} -> {bib_file}")
|
|
848
|
+
|
|
849
|
+
# Merge all BibTeX files in bibliography directory
|
|
850
|
+
combined_path = self.update_combined_bibliography(project)
|
|
851
|
+
|
|
852
|
+
# Create convenience symlink at project root
|
|
853
|
+
project_bib_link = project_dir / "info" / f"{project}.bib"
|
|
854
|
+
if project_bib_link.exists() or project_bib_link.is_symlink():
|
|
855
|
+
project_bib_link.unlink()
|
|
856
|
+
project_bib_link.symlink_to(f"bibliography/combined.bib")
|
|
857
|
+
logger.success(f"Created {project}.bib -> bibliography/combined.bib")
|
|
858
|
+
|
|
859
|
+
return combined_path
|
|
860
|
+
|
|
861
|
+
def update_combined_bibliography(self, project: str) -> Path:
|
|
862
|
+
"""Update combined.bib with all BibTeX files in bibliography directory.
|
|
863
|
+
|
|
864
|
+
Args:
|
|
865
|
+
project: Project name
|
|
866
|
+
|
|
867
|
+
Returns:
|
|
868
|
+
Path to updated combined.bib
|
|
869
|
+
"""
|
|
870
|
+
if not self.config:
|
|
871
|
+
raise ValueError("Config required for project bibliography management")
|
|
872
|
+
|
|
873
|
+
project_dir = self.config.path_manager.get_library_project_dir(project)
|
|
874
|
+
bib_dir = project_dir / "info" / "bibliography"
|
|
875
|
+
|
|
876
|
+
if not bib_dir.exists():
|
|
877
|
+
logger.warning(f"Bibliography directory not found: {bib_dir}")
|
|
878
|
+
return None
|
|
879
|
+
|
|
880
|
+
# Find all BibTeX files (excluding combined.bib itself)
|
|
881
|
+
bib_files = [
|
|
882
|
+
f for f in bib_dir.glob("*.bib")
|
|
883
|
+
if f.name not in ["combined.bib", "merged.bib"]
|
|
884
|
+
]
|
|
885
|
+
|
|
886
|
+
if not bib_files:
|
|
887
|
+
logger.warning("No BibTeX files found in bibliography directory")
|
|
888
|
+
return None
|
|
889
|
+
|
|
890
|
+
logger.info(f"Merging {len(bib_files)} BibTeX files...")
|
|
891
|
+
|
|
892
|
+
# Merge files
|
|
893
|
+
combined_path = bib_dir / "combined.bib"
|
|
894
|
+
merged_papers = self.merge_bibtex_files(
|
|
895
|
+
bib_files,
|
|
896
|
+
output_path=combined_path,
|
|
897
|
+
dedup_strategy="smart"
|
|
898
|
+
)
|
|
899
|
+
|
|
900
|
+
logger.success(
|
|
901
|
+
f"Updated combined.bib: {len(merged_papers)} unique papers "
|
|
902
|
+
f"from {len(bib_files)} files"
|
|
903
|
+
)
|
|
904
|
+
|
|
905
|
+
return combined_path
|
|
906
|
+
|
|
907
|
+
def export_project_bibliography(
|
|
908
|
+
self,
|
|
909
|
+
project: str,
|
|
910
|
+
output_path: Optional[Union[str, Path]] = None,
|
|
911
|
+
include_all_entries: bool = True
|
|
912
|
+
) -> Path:
|
|
913
|
+
"""Export all papers from project library to BibTeX file.
|
|
914
|
+
|
|
915
|
+
This creates a BibTeX file from ALL papers in the project library,
|
|
916
|
+
not just from existing BibTeX files. Useful for exporting the complete
|
|
917
|
+
project bibliography after downloads and enrichment.
|
|
918
|
+
|
|
919
|
+
Args:
|
|
920
|
+
project: Project name
|
|
921
|
+
output_path: Optional output path (default: info/bibliography/library_export.bib)
|
|
922
|
+
include_all_entries: If True, export all papers; if False, only papers with PDFs
|
|
923
|
+
|
|
924
|
+
Returns:
|
|
925
|
+
Path to exported BibTeX file
|
|
926
|
+
"""
|
|
927
|
+
if not self.config:
|
|
928
|
+
raise ValueError("Config required for project bibliography export")
|
|
929
|
+
|
|
930
|
+
project_dir = self.config.path_manager.get_library_project_dir(project)
|
|
931
|
+
master_dir = self.config.path_manager.get_library_master_dir()
|
|
932
|
+
|
|
933
|
+
# Default output path
|
|
934
|
+
if output_path is None:
|
|
935
|
+
bib_dir = project_dir / "info" / "bibliography"
|
|
936
|
+
bib_dir.mkdir(parents=True, exist_ok=True)
|
|
937
|
+
output_path = bib_dir / "library_export.bib"
|
|
938
|
+
else:
|
|
939
|
+
output_path = Path(output_path)
|
|
940
|
+
|
|
941
|
+
logger.info(f"Exporting project bibliography: {project}")
|
|
942
|
+
|
|
943
|
+
# Collect all papers from project symlinks
|
|
944
|
+
from ..core.Paper import Paper
|
|
945
|
+
papers = []
|
|
946
|
+
|
|
947
|
+
for item in project_dir.iterdir():
|
|
948
|
+
if not item.is_symlink():
|
|
949
|
+
continue
|
|
950
|
+
|
|
951
|
+
# Resolve symlink to master directory
|
|
952
|
+
try:
|
|
953
|
+
master_path = item.resolve()
|
|
954
|
+
if not master_path.exists():
|
|
955
|
+
logger.warning(f"Broken symlink: {item.name}")
|
|
956
|
+
continue
|
|
957
|
+
|
|
958
|
+
# Load metadata.json
|
|
959
|
+
metadata_file = master_path / "metadata.json"
|
|
960
|
+
if not metadata_file.exists():
|
|
961
|
+
logger.warning(f"No metadata: {master_path.name}")
|
|
962
|
+
continue
|
|
963
|
+
|
|
964
|
+
# Check for PDF if filtering
|
|
965
|
+
if not include_all_entries:
|
|
966
|
+
pdf_files = list(master_path.glob("*.pdf"))
|
|
967
|
+
if not pdf_files:
|
|
968
|
+
continue
|
|
969
|
+
|
|
970
|
+
# Load paper
|
|
971
|
+
paper = Paper.from_file(metadata_file)
|
|
972
|
+
if paper:
|
|
973
|
+
papers.append(paper)
|
|
974
|
+
|
|
975
|
+
except Exception as e:
|
|
976
|
+
logger.warning(f"Error loading {item.name}: {e}")
|
|
977
|
+
continue
|
|
978
|
+
|
|
979
|
+
logger.info(f"Found {len(papers)} papers in project library")
|
|
980
|
+
|
|
981
|
+
if not papers:
|
|
982
|
+
logger.warning("No papers found to export")
|
|
983
|
+
return None
|
|
984
|
+
|
|
985
|
+
# Convert to BibTeX
|
|
986
|
+
from datetime import datetime
|
|
987
|
+
from ..core.Papers import Papers
|
|
988
|
+
|
|
989
|
+
papers_collection = Papers(papers, project=project)
|
|
990
|
+
|
|
991
|
+
# Save with project info header
|
|
992
|
+
bibtex_content = []
|
|
993
|
+
bibtex_content.append("% ============================================================")
|
|
994
|
+
bibtex_content.append(f"% SciTeX Scholar - Project Library Export")
|
|
995
|
+
bibtex_content.append(f"% Project: {project}")
|
|
996
|
+
bibtex_content.append(f"% Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
997
|
+
bibtex_content.append(f"% Entries: {len(papers)}")
|
|
998
|
+
bibtex_content.append(f"% Filter: {'All papers' if include_all_entries else 'Papers with PDFs only'}")
|
|
999
|
+
bibtex_content.append("% ============================================================")
|
|
1000
|
+
bibtex_content.append("")
|
|
1001
|
+
|
|
1002
|
+
# Add papers
|
|
1003
|
+
for paper in papers:
|
|
1004
|
+
entry = self.paper_to_bibtex_entry(paper)
|
|
1005
|
+
bibtex_content.append(self._format_bibtex_entry(entry))
|
|
1006
|
+
|
|
1007
|
+
# Write to file
|
|
1008
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1009
|
+
output_path.write_text("\n".join(bibtex_content))
|
|
1010
|
+
|
|
1011
|
+
logger.success(f"Exported {len(papers)} papers to: {output_path}")
|
|
1012
|
+
|
|
1013
|
+
# Update combined.bib to include this export
|
|
1014
|
+
self.update_combined_bibliography(project)
|
|
1015
|
+
|
|
1016
|
+
return output_path
|
|
1017
|
+
|
|
1018
|
+
# EOF
|