scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
|
-
#
|
|
4
|
-
# File:
|
|
5
|
-
|
|
6
|
-
|
|
3
|
+
# Timestamp: "2025-09-11 05:49:14 (ywatanabe)"
|
|
4
|
+
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/db/_sqlite3/_SQLite3Mixins/_QueryMixin.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = __file__
|
|
9
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
10
|
+
# ----------------------------------------
|
|
7
11
|
|
|
8
12
|
import sqlite3
|
|
9
13
|
from typing import List, Tuple
|
|
10
14
|
|
|
11
15
|
import pandas as pd
|
|
12
|
-
from .._BaseMixins._BaseQueryMixin import _BaseQueryMixin
|
|
13
16
|
|
|
14
17
|
|
|
15
18
|
class _QueryMixin:
|
|
@@ -18,34 +21,58 @@ class _QueryMixin:
|
|
|
18
21
|
def _sanitize_parameters(self, parameters):
|
|
19
22
|
"""Convert pandas Timestamp objects to strings"""
|
|
20
23
|
if isinstance(parameters, (list, tuple)):
|
|
21
|
-
return [
|
|
24
|
+
return [
|
|
25
|
+
str(p) if isinstance(p, pd.Timestamp) else p
|
|
26
|
+
for p in parameters
|
|
27
|
+
]
|
|
22
28
|
return parameters
|
|
23
29
|
|
|
24
30
|
def execute(self, query: str, parameters: Tuple = ()) -> None:
|
|
31
|
+
self.ensure_connection()
|
|
32
|
+
self._check_context_manager()
|
|
33
|
+
|
|
25
34
|
if not self.cursor:
|
|
26
35
|
raise ConnectionError("Database not connected")
|
|
27
36
|
|
|
28
37
|
if any(
|
|
29
38
|
keyword in query.upper()
|
|
30
|
-
for keyword in [
|
|
39
|
+
for keyword in [
|
|
40
|
+
"INSERT",
|
|
41
|
+
"UPDATE",
|
|
42
|
+
"DELETE",
|
|
43
|
+
"DROP",
|
|
44
|
+
"CREATE",
|
|
45
|
+
"ALTER",
|
|
46
|
+
]
|
|
31
47
|
):
|
|
32
48
|
self._check_writable()
|
|
33
49
|
|
|
34
50
|
try:
|
|
35
51
|
parameters = self._sanitize_parameters(parameters)
|
|
36
52
|
self.cursor.execute(query, parameters)
|
|
37
|
-
self.
|
|
53
|
+
if self.autocommit:
|
|
54
|
+
self.conn.commit()
|
|
55
|
+
self.cursor.execute("PRAGMA wal_checkpoint(PASSIVE)")
|
|
56
|
+
# self.cursor.execute("PRAGMA wal_checkpoint(FULL)")
|
|
38
57
|
return self.cursor
|
|
39
58
|
except sqlite3.Error as err:
|
|
40
59
|
raise sqlite3.Error(f"Query execution failed: {err}")
|
|
41
60
|
|
|
42
61
|
def executemany(self, query: str, parameters: List[Tuple]) -> None:
|
|
62
|
+
self.ensure_connection()
|
|
43
63
|
if not self.cursor:
|
|
44
64
|
raise ConnectionError("Database not connected")
|
|
45
65
|
|
|
46
66
|
if any(
|
|
47
67
|
keyword in query.upper()
|
|
48
|
-
for keyword in [
|
|
68
|
+
for keyword in [
|
|
69
|
+
"INSERT",
|
|
70
|
+
"UPDATE",
|
|
71
|
+
"DELETE",
|
|
72
|
+
"DROP",
|
|
73
|
+
"CREATE",
|
|
74
|
+
"ALTER",
|
|
75
|
+
]
|
|
49
76
|
):
|
|
50
77
|
self._check_writable()
|
|
51
78
|
|
|
@@ -57,6 +84,7 @@ class _QueryMixin:
|
|
|
57
84
|
raise sqlite3.Error(f"Batch query execution failed: {err}")
|
|
58
85
|
|
|
59
86
|
def executescript(self, script: str) -> None:
|
|
87
|
+
self.ensure_connection()
|
|
60
88
|
if not self.cursor:
|
|
61
89
|
raise ConnectionError("Database not connected")
|
|
62
90
|
|
|
@@ -79,5 +107,4 @@ class _QueryMixin:
|
|
|
79
107
|
except sqlite3.Error as err:
|
|
80
108
|
raise sqlite3.Error(f"Script execution failed: {err}")
|
|
81
109
|
|
|
82
|
-
|
|
83
110
|
# EOF
|
|
@@ -1,15 +1,26 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-09-08 12:00:38 (ywatanabe)"
|
|
4
|
+
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/db/_sqlite3/_SQLite3Mixins/_RowMixin.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = __file__
|
|
9
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
10
|
+
# ----------------------------------------
|
|
11
|
+
|
|
3
12
|
# Time-stamp: "2024-11-25 01:38:17 (ywatanabe)"
|
|
4
|
-
|
|
13
|
+
|
|
14
|
+
from typing import Union
|
|
5
15
|
|
|
6
16
|
THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/db/_SQLite3Mixins/_RowMixin.py"
|
|
7
17
|
|
|
8
18
|
import sqlite3
|
|
9
|
-
from typing import List
|
|
10
|
-
|
|
19
|
+
from typing import List, Optional
|
|
20
|
+
|
|
11
21
|
import pandas as pd
|
|
12
|
-
|
|
22
|
+
|
|
23
|
+
from ..._BaseMixins._BaseRowMixin import _BaseRowMixin
|
|
13
24
|
|
|
14
25
|
|
|
15
26
|
class _RowMixin:
|
|
@@ -19,12 +30,27 @@ class _RowMixin:
|
|
|
19
30
|
self,
|
|
20
31
|
table_name: str,
|
|
21
32
|
columns: List[str] = None,
|
|
33
|
+
ids: Union[int, List[int], str] = "all",
|
|
22
34
|
where: str = None,
|
|
23
35
|
order_by: str = None,
|
|
24
36
|
limit: Optional[int] = None,
|
|
25
37
|
offset: Optional[int] = None,
|
|
26
38
|
return_as: str = "dataframe",
|
|
39
|
+
include_blobs: bool = True,
|
|
27
40
|
):
|
|
41
|
+
if not include_blobs:
|
|
42
|
+
if columns is None:
|
|
43
|
+
schema = self.get_table_schema(table_name)
|
|
44
|
+
columns = schema[
|
|
45
|
+
~schema["type"].str.contains("BLOB", case=False)
|
|
46
|
+
]["name"].tolist()
|
|
47
|
+
else:
|
|
48
|
+
schema = self.get_table_schema(table_name)
|
|
49
|
+
blob_columns = schema[
|
|
50
|
+
schema["type"].str.contains("BLOB", case=False)
|
|
51
|
+
]["name"].tolist()
|
|
52
|
+
columns = [col for col in columns if col not in blob_columns]
|
|
53
|
+
|
|
28
54
|
if columns is None:
|
|
29
55
|
columns_str = "*"
|
|
30
56
|
elif isinstance(columns, str):
|
|
@@ -32,6 +58,19 @@ class _RowMixin:
|
|
|
32
58
|
else:
|
|
33
59
|
columns_str = ", ".join(f'"{col}"' for col in columns)
|
|
34
60
|
|
|
61
|
+
# Handle ids parameter
|
|
62
|
+
if ids != "all":
|
|
63
|
+
if isinstance(ids, int):
|
|
64
|
+
id_where = f"id = {ids}"
|
|
65
|
+
else:
|
|
66
|
+
id_list = ",".join(map(str, ids))
|
|
67
|
+
id_where = f"id IN ({id_list})"
|
|
68
|
+
|
|
69
|
+
if where:
|
|
70
|
+
where = f"({where}) AND ({id_where})"
|
|
71
|
+
else:
|
|
72
|
+
where = id_where
|
|
73
|
+
|
|
35
74
|
try:
|
|
36
75
|
query_parts = [f"SELECT {columns_str} FROM {table_name}"]
|
|
37
76
|
|
|
@@ -47,7 +86,9 @@ class _RowMixin:
|
|
|
47
86
|
query = " ".join(query_parts)
|
|
48
87
|
self.cursor.execute(query)
|
|
49
88
|
|
|
50
|
-
column_names = [
|
|
89
|
+
column_names = [
|
|
90
|
+
description[0] for description in self.cursor.description
|
|
91
|
+
]
|
|
51
92
|
data = self.cursor.fetchall()
|
|
52
93
|
|
|
53
94
|
if return_as == "list":
|
|
@@ -71,5 +112,4 @@ class _RowMixin:
|
|
|
71
112
|
self.cursor.execute(query)
|
|
72
113
|
return self.cursor.fetchone()[0]
|
|
73
114
|
|
|
74
|
-
|
|
75
115
|
# EOF
|
|
@@ -1,19 +1,26 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-09-11 05:47:57 (ywatanabe)"
|
|
4
|
+
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/db/_sqlite3/_SQLite3Mixins/_TableMixin.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
import os
|
|
8
|
+
__FILE__ = __file__
|
|
9
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
10
|
+
# ----------------------------------------
|
|
11
|
+
|
|
3
12
|
# Time-stamp: "2024-11-25 01:38:47 (ywatanabe)"
|
|
4
|
-
# File: ./scitex_repo/src/scitex/db/_SQLite3Mixins/_TableMixin.py
|
|
5
13
|
|
|
6
14
|
THIS_FILE = "/home/ywatanabe/proj/scitex_repo/src/scitex/db/_SQLite3Mixins/_TableMixin.py"
|
|
7
15
|
|
|
8
|
-
#!/usr/bin/env python3
|
|
9
|
-
# -*- coding: utf-8 -*-
|
|
10
16
|
# Time-stamp: "2024-11-11 19:13:19 (ywatanabe)"
|
|
11
|
-
# File: ./scitex_repo/src/scitex/db/_BaseSQLiteDB_modules/_TableMixin.py
|
|
12
17
|
|
|
13
18
|
import sqlite3
|
|
14
19
|
from typing import Any, Dict, List, Union
|
|
20
|
+
|
|
15
21
|
import pandas as pd
|
|
16
|
-
|
|
22
|
+
|
|
23
|
+
from ..._BaseMixins._BaseTableMixin import _BaseTableMixin
|
|
17
24
|
|
|
18
25
|
|
|
19
26
|
class _TableMixin:
|
|
@@ -38,6 +45,8 @@ class _TableMixin:
|
|
|
38
45
|
[
|
|
39
46
|
f"{col_name}_dtype TEXT DEFAULT 'unknown'",
|
|
40
47
|
f"{col_name}_shape TEXT DEFAULT 'unknown'",
|
|
48
|
+
f"{col_name}_is_compressed BOOLEAN DEFAULT FALSE",
|
|
49
|
+
f"{col_name}_hash TEXT DEFAULT NULL",
|
|
41
50
|
]
|
|
42
51
|
)
|
|
43
52
|
|
|
@@ -101,9 +110,7 @@ class _TableMixin:
|
|
|
101
110
|
return
|
|
102
111
|
|
|
103
112
|
try:
|
|
104
|
-
query =
|
|
105
|
-
f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
|
|
106
|
-
)
|
|
113
|
+
query = f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}"
|
|
107
114
|
if default_value is not None:
|
|
108
115
|
query += f" DEFAULT {default_value}"
|
|
109
116
|
self.execute(query)
|
|
@@ -121,6 +128,18 @@ class _TableMixin:
|
|
|
121
128
|
"TEXT",
|
|
122
129
|
default_value="'unknown'",
|
|
123
130
|
)
|
|
131
|
+
self.add_column(
|
|
132
|
+
table_name,
|
|
133
|
+
f"{column_name}_is_compressed",
|
|
134
|
+
"BOOLEAN",
|
|
135
|
+
default_value="FALSE",
|
|
136
|
+
)
|
|
137
|
+
self.add_column(
|
|
138
|
+
table_name,
|
|
139
|
+
f"{column_name}_hash",
|
|
140
|
+
"TEXT",
|
|
141
|
+
default_value="NULL",
|
|
142
|
+
)
|
|
124
143
|
|
|
125
144
|
except sqlite3.OperationalError as err:
|
|
126
145
|
raise ValueError(f"Failed to add column: {err}")
|
|
@@ -131,6 +150,29 @@ class _TableMixin:
|
|
|
131
150
|
columns: Union[str, List[str]],
|
|
132
151
|
if_exists: bool = True,
|
|
133
152
|
) -> None:
|
|
153
|
+
"""
|
|
154
|
+
DEPRECATED: Use the new drop_columns method from _DropMixin for better compatibility.
|
|
155
|
+
This method will be removed in a future version.
|
|
156
|
+
"""
|
|
157
|
+
import warnings
|
|
158
|
+
warnings.warn(
|
|
159
|
+
"TableMixin.drop_columns is deprecated. Use the enhanced drop_columns method "
|
|
160
|
+
"from DropMixin which handles SQLite version compatibility automatically.",
|
|
161
|
+
DeprecationWarning,
|
|
162
|
+
stacklevel=2
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Delegate to the new implementation if available
|
|
166
|
+
# Check all classes in MRO for the enhanced drop_columns method
|
|
167
|
+
for cls in self.__class__.__mro__:
|
|
168
|
+
if (hasattr(cls, 'drop_columns') and
|
|
169
|
+
hasattr(cls, '_supports_native_drop_column') and
|
|
170
|
+
cls.__name__ == '_DropMixin'):
|
|
171
|
+
# Call DropMixin's drop_columns directly
|
|
172
|
+
cls.drop_columns(self, table_name, columns, if_exists)
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
# Fallback to original implementation for compatibility
|
|
134
176
|
with self.transaction():
|
|
135
177
|
if isinstance(columns, str):
|
|
136
178
|
columns = [columns]
|
|
@@ -146,15 +188,19 @@ class _TableMixin:
|
|
|
146
188
|
return
|
|
147
189
|
|
|
148
190
|
# Drop multiple columns in a single ALTER TABLE statement
|
|
149
|
-
drop_clause = ", ".join(
|
|
191
|
+
drop_clause = ", ".join(
|
|
192
|
+
f"DROP COLUMN {col}" for col in columns_to_drop
|
|
193
|
+
)
|
|
150
194
|
self.execute(f"ALTER TABLE {table_name} {drop_clause}")
|
|
151
195
|
|
|
152
196
|
def get_table_names(self) -> List[str]:
|
|
197
|
+
self.ensure_connection()
|
|
153
198
|
query = "SELECT name FROM sqlite_master WHERE type='table'"
|
|
154
199
|
self.cursor.execute(query)
|
|
155
200
|
return [table[0] for table in self.cursor.fetchall()]
|
|
156
201
|
|
|
157
202
|
def get_table_schema(self, table_name: str) -> pd.DataFrame:
|
|
203
|
+
self.ensure_connection()
|
|
158
204
|
query = f"PRAGMA table_info({table_name})"
|
|
159
205
|
self.cursor.execute(query)
|
|
160
206
|
columns = ["cid", "name", "type", "notnull", "dflt_value", "pk"]
|
|
@@ -166,6 +212,7 @@ class _TableMixin:
|
|
|
166
212
|
return pk_col[0] if len(pk_col) > 0 else None
|
|
167
213
|
|
|
168
214
|
def get_table_stats(self, table_name: str) -> Dict[str, int]:
|
|
215
|
+
self.ensure_connection()
|
|
169
216
|
try:
|
|
170
217
|
pages = self.cursor.execute(f"PRAGMA page_count").fetchone()[0]
|
|
171
218
|
page_size = self.cursor.execute(f"PRAGMA page_size").fetchone()[0]
|
|
@@ -179,5 +226,4 @@ class _TableMixin:
|
|
|
179
226
|
except sqlite3.Error as err:
|
|
180
227
|
raise ValueError(f"Failed to get table size: {err}")
|
|
181
228
|
|
|
182
|
-
|
|
183
229
|
# EOF
|
|
@@ -1,11 +1,19 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-07-16 09:46:33 (ywatanabe)"
|
|
4
|
+
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/db/_sqlite3/_SQLite3Mixins/__init__.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
import os
|
|
7
|
+
__FILE__ = __file__
|
|
8
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
9
|
+
# ----------------------------------------
|
|
3
10
|
# Time-stamp: "2024-11-12 09:29:50 (ywatanabe)"
|
|
4
|
-
# File: ./scitex_repo/src/scitex/db/_SQLite3Mixins/__init__.py
|
|
5
11
|
|
|
12
|
+
from ._ArrayMixin import _ArrayMixin
|
|
6
13
|
from ._BatchMixin import _BatchMixin
|
|
7
14
|
from ._BlobMixin import _BlobMixin
|
|
8
15
|
from ._ConnectionMixin import _ConnectionMixin
|
|
16
|
+
from ._ColumnMixin import _ColumnMixin
|
|
9
17
|
from ._ImportExportMixin import _ImportExportMixin
|
|
10
18
|
from ._IndexMixin import _IndexMixin
|
|
11
19
|
from ._MaintenanceMixin import _MaintenanceMixin
|
|
@@ -13,11 +21,14 @@ from ._QueryMixin import _QueryMixin
|
|
|
13
21
|
from ._RowMixin import _RowMixin
|
|
14
22
|
from ._TableMixin import _TableMixin
|
|
15
23
|
from ._TransactionMixin import _TransactionMixin
|
|
24
|
+
from ._GitMixin import _GitMixin
|
|
16
25
|
|
|
17
26
|
__all__ = [
|
|
27
|
+
"_ArrayMixin",
|
|
18
28
|
"_BatchMixin",
|
|
19
29
|
"_BlobMixin",
|
|
20
30
|
"_ConnectionMixin",
|
|
31
|
+
"_ColumnMixin",
|
|
21
32
|
"_ImportExportMixin",
|
|
22
33
|
"_IndexMixin",
|
|
23
34
|
"_MaintenanceMixin",
|
|
@@ -25,6 +36,7 @@ __all__ = [
|
|
|
25
36
|
"_RowMixin",
|
|
26
37
|
"_TableMixin",
|
|
27
38
|
"_TransactionMixin",
|
|
39
|
+
"_GitMixin",
|
|
28
40
|
]
|
|
29
41
|
|
|
30
|
-
# EOF
|
|
42
|
+
# EOF
|
|
@@ -0,0 +1,274 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Timestamp: "2025-07-16 14:00:04 (ywatanabe)"
|
|
4
|
+
# File: /ssh:sp:/home/ywatanabe/proj/scitex_repo/src/scitex/db/_sqlite3/_delete_duplicates.py
|
|
5
|
+
# ----------------------------------------
|
|
6
|
+
import os
|
|
7
|
+
__FILE__ = __file__
|
|
8
|
+
__DIR__ = os.path.dirname(__FILE__)
|
|
9
|
+
# ----------------------------------------
|
|
10
|
+
# Time-stamp: "2024-11-11 14:16:58 (ywatanabe)"
|
|
11
|
+
|
|
12
|
+
import sqlite3
|
|
13
|
+
from typing import List
|
|
14
|
+
from typing import Optional
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
|
+
Functionality:
|
|
22
|
+
- Deletes duplicate entries from an SQLite database table
|
|
23
|
+
Input:
|
|
24
|
+
- SQLite database file path, table name, columns to consider for duplicates
|
|
25
|
+
Output:
|
|
26
|
+
- Updated SQLite database with duplicates removed
|
|
27
|
+
Prerequisites:
|
|
28
|
+
- sqlite3, pandas, tqdm, scitex
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _sort_db(
|
|
33
|
+
cursor: sqlite3.Cursor, table_name: str, columns: List[str]
|
|
34
|
+
) -> None:
|
|
35
|
+
"""
|
|
36
|
+
Sorts the database table based on the specified columns.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
cursor : sqlite3.Cursor
|
|
41
|
+
The cursor object for executing SQL commands.
|
|
42
|
+
table_name : str
|
|
43
|
+
The name of the table to be sorted.
|
|
44
|
+
columns : List[str]
|
|
45
|
+
The list of column names to sort by, in order of priority.
|
|
46
|
+
|
|
47
|
+
Example
|
|
48
|
+
-------
|
|
49
|
+
>>> conn = sqlite3.connect('example.db')
|
|
50
|
+
>>> cursor = conn.cursor()
|
|
51
|
+
>>> _sort_db(cursor, 'my_table', ['column1', 'column2'])
|
|
52
|
+
>>> conn.commit()
|
|
53
|
+
>>> conn.close()
|
|
54
|
+
"""
|
|
55
|
+
columns_str = ", ".join(columns)
|
|
56
|
+
temp_table = f"{table_name}_temp"
|
|
57
|
+
|
|
58
|
+
cursor.execute(
|
|
59
|
+
f"CREATE TABLE {temp_table} AS SELECT * FROM {table_name} ORDER BY {columns_str}"
|
|
60
|
+
)
|
|
61
|
+
cursor.execute(f"DROP TABLE {table_name}")
|
|
62
|
+
cursor.execute(f"ALTER TABLE {temp_table} RENAME TO {table_name}")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
# def _determine_columns(
|
|
66
|
+
# cursor: sqlite3.Cursor,
|
|
67
|
+
# table_name: str,
|
|
68
|
+
# columns: Union[str, List[str]],
|
|
69
|
+
# include_blob: bool,
|
|
70
|
+
# ) -> List[str]:
|
|
71
|
+
# cursor.execute(f"PRAGMA table_info({table_name})")
|
|
72
|
+
# table_info = cursor.fetchall()
|
|
73
|
+
# all_columns = [col[1] for col in table_info]
|
|
74
|
+
# column_types = {col[1]: col[2] for col in table_info}
|
|
75
|
+
|
|
76
|
+
# if columns == "all":
|
|
77
|
+
# columns = (
|
|
78
|
+
# all_columns
|
|
79
|
+
# if include_blob
|
|
80
|
+
# else [
|
|
81
|
+
# col
|
|
82
|
+
# for col in all_columns
|
|
83
|
+
# if column_types[col].lower() != "blob"
|
|
84
|
+
# ]
|
|
85
|
+
# )
|
|
86
|
+
# elif isinstance(columns, str):
|
|
87
|
+
# columns = [columns]
|
|
88
|
+
|
|
89
|
+
# columns_str = ", ".join(columns)
|
|
90
|
+
# print(f"Columns considered for duplicates: {columns_str}")
|
|
91
|
+
|
|
92
|
+
# return columns
|
|
93
|
+
|
|
94
|
+
def _determine_columns(
|
|
95
|
+
cursor: sqlite3.Cursor,
|
|
96
|
+
table_name: str,
|
|
97
|
+
columns: Union[str, List[str]],
|
|
98
|
+
include_blob: bool,
|
|
99
|
+
) -> List[str]:
|
|
100
|
+
cursor.execute(f"PRAGMA table_info({table_name})")
|
|
101
|
+
table_info = cursor.fetchall()
|
|
102
|
+
all_columns = [col[1] for col in table_info]
|
|
103
|
+
column_types = {col[1]: col[2] for col in table_info}
|
|
104
|
+
|
|
105
|
+
if columns == "all":
|
|
106
|
+
columns = all_columns
|
|
107
|
+
# Exclude blob columns
|
|
108
|
+
if not include_blob:
|
|
109
|
+
columns = [col for col in columns if column_types[col].lower() != "blob"]
|
|
110
|
+
# Exclude timestamp columns
|
|
111
|
+
columns = [col for col in columns if not col.endswith("_at")]
|
|
112
|
+
elif isinstance(columns, str):
|
|
113
|
+
columns = [columns]
|
|
114
|
+
|
|
115
|
+
columns_str = ", ".join(columns)
|
|
116
|
+
print(f"Columns considered for duplicates: {columns_str}")
|
|
117
|
+
|
|
118
|
+
return columns
|
|
119
|
+
|
|
120
|
+
def _fetch_as_df(
|
|
121
|
+
cursor: sqlite3.Cursor, columns: List[str], table_name: str
|
|
122
|
+
) -> pd.DataFrame:
|
|
123
|
+
print("\nFetching all database entries...")
|
|
124
|
+
columns_str = ", ".join(columns)
|
|
125
|
+
query = f"SELECT {columns_str} FROM {table_name}"
|
|
126
|
+
cursor.execute(query)
|
|
127
|
+
df_entries = cursor.fetchall()
|
|
128
|
+
return pd.DataFrame(df_entries, columns=columns)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _find_duplicated(df: pd.DataFrame) -> pd.DataFrame:
|
|
132
|
+
df_duplicated = df[df.duplicated(keep="first")].copy()
|
|
133
|
+
duplication_rate = len(df_duplicated) / (len(df) - len(df_duplicated))
|
|
134
|
+
print(
|
|
135
|
+
f"\n{100*duplication_rate:.2f}% of data was duplicated. Cleaning up..."
|
|
136
|
+
)
|
|
137
|
+
print(f"\nOriginal entries:\n{df.head()}")
|
|
138
|
+
print(f"\nDuplicated entries:\n{df_duplicated.head()}")
|
|
139
|
+
return df_duplicated
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def verify_duplicated_index(
|
|
143
|
+
cursor: sqlite3.Cursor,
|
|
144
|
+
duplicated_row: pd.Series,
|
|
145
|
+
table_name: str,
|
|
146
|
+
dry_run: bool,
|
|
147
|
+
) -> Tuple[str, bool]:
|
|
148
|
+
"""Check if entry to delete is the one intended"""
|
|
149
|
+
columns = list(duplicated_row.index)
|
|
150
|
+
columns_str = ", ".join(columns)
|
|
151
|
+
|
|
152
|
+
where_conditions = " AND ".join([f"{col} = ?" for col in columns])
|
|
153
|
+
select_query = f"""
|
|
154
|
+
SELECT {columns_str}
|
|
155
|
+
FROM {table_name}
|
|
156
|
+
WHERE {where_conditions}
|
|
157
|
+
"""
|
|
158
|
+
cursor.execute(select_query, tuple(duplicated_row))
|
|
159
|
+
entries = cursor.fetchall()
|
|
160
|
+
|
|
161
|
+
is_verified = len(entries) >= 1
|
|
162
|
+
|
|
163
|
+
if dry_run:
|
|
164
|
+
print(f"Expected duplicate entry: {tuple(duplicated_row)}")
|
|
165
|
+
print(f"Found entries: {entries}")
|
|
166
|
+
print(f"Verification {'succeeded' if is_verified else 'failed'}")
|
|
167
|
+
|
|
168
|
+
return select_query, is_verified
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _delete_entry(
|
|
172
|
+
cursor: sqlite3.Cursor,
|
|
173
|
+
duplicated_row: pd.Series,
|
|
174
|
+
table_name: str,
|
|
175
|
+
dry_run: bool = True,
|
|
176
|
+
) -> None:
|
|
177
|
+
select_query, is_verified = verify_duplicated_index(
|
|
178
|
+
cursor, duplicated_row, table_name, dry_run
|
|
179
|
+
)
|
|
180
|
+
if is_verified:
|
|
181
|
+
delete_query = select_query.replace("SELECT", "DELETE")
|
|
182
|
+
if dry_run:
|
|
183
|
+
print(f"[DRY RUN] Would delete entry:\n{duplicated_row}")
|
|
184
|
+
else:
|
|
185
|
+
cursor.execute(delete_query, tuple(duplicated_row))
|
|
186
|
+
print(f"Deleted entry:\n{duplicated_row}")
|
|
187
|
+
else:
|
|
188
|
+
print(
|
|
189
|
+
f"Skipping entry (not found or already deleted):\n{duplicated_row}"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def delete_sqlite3_duplicates(
|
|
194
|
+
lpath_db: str,
|
|
195
|
+
table_name: str,
|
|
196
|
+
columns: Union[str, List[str]] = "all",
|
|
197
|
+
include_blob: bool = False,
|
|
198
|
+
chunk_size: int = 10_000,
|
|
199
|
+
dry_run: bool = True,
|
|
200
|
+
) -> Tuple[Optional[int], Optional[int]]:
|
|
201
|
+
try:
|
|
202
|
+
conn = sqlite3.connect(lpath_db)
|
|
203
|
+
cursor = conn.cursor()
|
|
204
|
+
|
|
205
|
+
# Vacuum the database to free up space
|
|
206
|
+
if not dry_run:
|
|
207
|
+
cursor.execute("VACUUM")
|
|
208
|
+
conn.commit()
|
|
209
|
+
|
|
210
|
+
columns = _determine_columns(cursor, table_name, columns, include_blob)
|
|
211
|
+
columns_str = ", ".join(columns)
|
|
212
|
+
|
|
213
|
+
# Drop temp table if exists from previous run
|
|
214
|
+
temp_table = f"{table_name}_temp"
|
|
215
|
+
cursor.execute(f"DROP TABLE IF EXISTS {temp_table}")
|
|
216
|
+
|
|
217
|
+
# Get all columns for creating temp table with same structure
|
|
218
|
+
cursor.execute(f"PRAGMA table_info({table_name})")
|
|
219
|
+
all_cols_info = cursor.fetchall()
|
|
220
|
+
all_cols = [col[1] for col in all_cols_info]
|
|
221
|
+
all_cols_str = ", ".join(all_cols)
|
|
222
|
+
|
|
223
|
+
# Create temp table with same structure
|
|
224
|
+
cursor.execute(f"CREATE TABLE {temp_table} AS SELECT {all_cols_str} FROM {table_name} LIMIT 0")
|
|
225
|
+
|
|
226
|
+
# Get total row count
|
|
227
|
+
total_rows = cursor.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
|
|
228
|
+
print(f"Total rows in table: {total_rows}")
|
|
229
|
+
|
|
230
|
+
# Insert unique rows based on specified columns
|
|
231
|
+
insert_query = f"""
|
|
232
|
+
INSERT INTO {temp_table}
|
|
233
|
+
SELECT {all_cols_str}
|
|
234
|
+
FROM (
|
|
235
|
+
SELECT *, ROW_NUMBER() OVER (PARTITION BY {columns_str} ORDER BY rowid) as rn
|
|
236
|
+
FROM {table_name}
|
|
237
|
+
)
|
|
238
|
+
WHERE rn = 1
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
if dry_run:
|
|
242
|
+
print(f"[DRY RUN] Would execute deduplication based on: {columns_str}")
|
|
243
|
+
else:
|
|
244
|
+
cursor.execute(insert_query)
|
|
245
|
+
conn.commit()
|
|
246
|
+
|
|
247
|
+
# Count unique rows
|
|
248
|
+
total_unique = cursor.execute(f"SELECT COUNT(*) FROM {temp_table}").fetchone()[0]
|
|
249
|
+
total_duplicates = total_rows - total_unique
|
|
250
|
+
|
|
251
|
+
if not dry_run:
|
|
252
|
+
# Replace original table with deduplicated one
|
|
253
|
+
cursor.execute(f"DROP TABLE {table_name}")
|
|
254
|
+
cursor.execute(f"ALTER TABLE {temp_table} RENAME TO {table_name}")
|
|
255
|
+
cursor.execute("VACUUM")
|
|
256
|
+
conn.commit()
|
|
257
|
+
else:
|
|
258
|
+
# Clean up temp table in dry run
|
|
259
|
+
cursor.execute(f"DROP TABLE IF EXISTS {temp_table}")
|
|
260
|
+
|
|
261
|
+
print(f"Total rows processed: {total_rows}")
|
|
262
|
+
print(f"Total unique rows: {total_unique}")
|
|
263
|
+
print(f"Total duplicates removed: {total_duplicates}")
|
|
264
|
+
|
|
265
|
+
return total_rows, total_duplicates
|
|
266
|
+
|
|
267
|
+
except Exception as error:
|
|
268
|
+
print(f"An error occurred: {error}")
|
|
269
|
+
return None, None
|
|
270
|
+
|
|
271
|
+
finally:
|
|
272
|
+
conn.close()
|
|
273
|
+
|
|
274
|
+
# EOF
|
scitex/decorators/__init__.py
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
from ._auto_order import AutoOrderDecorator, batch_fn, disable_auto_order, enable_auto_order, numpy_fn, pandas_fn, torch_fn
|
|
5
5
|
from ._batch_fn import batch_fn
|
|
6
6
|
from ._cache_disk import cache_disk
|
|
7
|
+
from ._cache_disk_async import cache_disk_async
|
|
7
8
|
from ._cache_mem import cache_mem
|
|
8
9
|
from ._combined import batch_numpy_fn, batch_pandas_fn, batch_torch_fn, numpy_batch_fn, pandas_batch_fn, torch_batch_fn
|
|
9
10
|
from ._converters import ConversionWarning, is_cuda, is_nested_decorator, is_torch, to_numpy, to_torch
|
|
@@ -27,6 +28,7 @@ __all__ = [
|
|
|
27
28
|
"batch_pandas_fn",
|
|
28
29
|
"batch_torch_fn",
|
|
29
30
|
"cache_disk",
|
|
31
|
+
"cache_disk_async",
|
|
30
32
|
"cache_mem",
|
|
31
33
|
"deprecated",
|
|
32
34
|
"disable_auto_order",
|