scitex 2.0.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +73 -0
- scitex/__main__.py +89 -0
- scitex/__version__.py +14 -0
- scitex/_sh.py +59 -0
- scitex/ai/_LearningCurveLogger.py +583 -0
- scitex/ai/__Classifiers.py +101 -0
- scitex/ai/__init__.py +55 -0
- scitex/ai/_gen_ai/_Anthropic.py +173 -0
- scitex/ai/_gen_ai/_BaseGenAI.py +336 -0
- scitex/ai/_gen_ai/_DeepSeek.py +175 -0
- scitex/ai/_gen_ai/_Google.py +161 -0
- scitex/ai/_gen_ai/_Groq.py +97 -0
- scitex/ai/_gen_ai/_Llama.py +142 -0
- scitex/ai/_gen_ai/_OpenAI.py +230 -0
- scitex/ai/_gen_ai/_PARAMS.py +565 -0
- scitex/ai/_gen_ai/_Perplexity.py +191 -0
- scitex/ai/_gen_ai/__init__.py +32 -0
- scitex/ai/_gen_ai/_calc_cost.py +78 -0
- scitex/ai/_gen_ai/_format_output_func.py +183 -0
- scitex/ai/_gen_ai/_genai_factory.py +71 -0
- scitex/ai/act/__init__.py +8 -0
- scitex/ai/act/_define.py +11 -0
- scitex/ai/classification/__init__.py +7 -0
- scitex/ai/classification/classification_reporter.py +1137 -0
- scitex/ai/classification/classifier_server.py +131 -0
- scitex/ai/classification/classifiers.py +101 -0
- scitex/ai/classification_reporter.py +1161 -0
- scitex/ai/classifier_server.py +131 -0
- scitex/ai/clustering/__init__.py +11 -0
- scitex/ai/clustering/_pca.py +115 -0
- scitex/ai/clustering/_umap.py +376 -0
- scitex/ai/early_stopping.py +149 -0
- scitex/ai/feature_extraction/__init__.py +56 -0
- scitex/ai/feature_extraction/vit.py +148 -0
- scitex/ai/genai/__init__.py +277 -0
- scitex/ai/genai/anthropic.py +177 -0
- scitex/ai/genai/anthropic_provider.py +320 -0
- scitex/ai/genai/anthropic_refactored.py +109 -0
- scitex/ai/genai/auth_manager.py +200 -0
- scitex/ai/genai/base_genai.py +336 -0
- scitex/ai/genai/base_provider.py +291 -0
- scitex/ai/genai/calc_cost.py +78 -0
- scitex/ai/genai/chat_history.py +307 -0
- scitex/ai/genai/cost_tracker.py +276 -0
- scitex/ai/genai/deepseek.py +188 -0
- scitex/ai/genai/deepseek_provider.py +251 -0
- scitex/ai/genai/format_output_func.py +183 -0
- scitex/ai/genai/genai_factory.py +71 -0
- scitex/ai/genai/google.py +169 -0
- scitex/ai/genai/google_provider.py +228 -0
- scitex/ai/genai/groq.py +104 -0
- scitex/ai/genai/groq_provider.py +248 -0
- scitex/ai/genai/image_processor.py +250 -0
- scitex/ai/genai/llama.py +155 -0
- scitex/ai/genai/llama_provider.py +214 -0
- scitex/ai/genai/mock_provider.py +127 -0
- scitex/ai/genai/model_registry.py +304 -0
- scitex/ai/genai/openai.py +230 -0
- scitex/ai/genai/openai_provider.py +293 -0
- scitex/ai/genai/params.py +565 -0
- scitex/ai/genai/perplexity.py +202 -0
- scitex/ai/genai/perplexity_provider.py +205 -0
- scitex/ai/genai/provider_base.py +302 -0
- scitex/ai/genai/provider_factory.py +370 -0
- scitex/ai/genai/response_handler.py +235 -0
- scitex/ai/layer/_Pass.py +21 -0
- scitex/ai/layer/__init__.py +10 -0
- scitex/ai/layer/_switch.py +8 -0
- scitex/ai/loss/_L1L2Losses.py +34 -0
- scitex/ai/loss/__init__.py +12 -0
- scitex/ai/loss/multi_task_loss.py +47 -0
- scitex/ai/metrics/__init__.py +9 -0
- scitex/ai/metrics/_bACC.py +51 -0
- scitex/ai/metrics/silhoute_score_block.py +496 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ai/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ai/optim/__init__.py +13 -0
- scitex/ai/optim/_get_set.py +31 -0
- scitex/ai/optim/_optimizers.py +71 -0
- scitex/ai/plt/__init__.py +21 -0
- scitex/ai/plt/_conf_mat.py +592 -0
- scitex/ai/plt/_learning_curve.py +194 -0
- scitex/ai/plt/_optuna_study.py +111 -0
- scitex/ai/plt/aucs/__init__.py +2 -0
- scitex/ai/plt/aucs/example.py +60 -0
- scitex/ai/plt/aucs/pre_rec_auc.py +223 -0
- scitex/ai/plt/aucs/roc_auc.py +246 -0
- scitex/ai/sampling/undersample.py +29 -0
- scitex/ai/sk/__init__.py +11 -0
- scitex/ai/sk/_clf.py +58 -0
- scitex/ai/sk/_to_sktime.py +100 -0
- scitex/ai/sklearn/__init__.py +26 -0
- scitex/ai/sklearn/clf.py +58 -0
- scitex/ai/sklearn/to_sktime.py +100 -0
- scitex/ai/training/__init__.py +7 -0
- scitex/ai/training/early_stopping.py +150 -0
- scitex/ai/training/learning_curve_logger.py +555 -0
- scitex/ai/utils/__init__.py +22 -0
- scitex/ai/utils/_check_params.py +50 -0
- scitex/ai/utils/_default_dataset.py +46 -0
- scitex/ai/utils/_format_samples_for_sktime.py +26 -0
- scitex/ai/utils/_label_encoder.py +134 -0
- scitex/ai/utils/_merge_labels.py +22 -0
- scitex/ai/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ai/utils/_under_sample.py +51 -0
- scitex/ai/utils/_verify_n_gpus.py +16 -0
- scitex/ai/utils/grid_search.py +148 -0
- scitex/context/__init__.py +9 -0
- scitex/context/_suppress_output.py +38 -0
- scitex/db/_BaseMixins/_BaseBackupMixin.py +30 -0
- scitex/db/_BaseMixins/_BaseBatchMixin.py +31 -0
- scitex/db/_BaseMixins/_BaseBlobMixin.py +81 -0
- scitex/db/_BaseMixins/_BaseConnectionMixin.py +43 -0
- scitex/db/_BaseMixins/_BaseImportExportMixin.py +39 -0
- scitex/db/_BaseMixins/_BaseIndexMixin.py +29 -0
- scitex/db/_BaseMixins/_BaseMaintenanceMixin.py +33 -0
- scitex/db/_BaseMixins/_BaseQueryMixin.py +52 -0
- scitex/db/_BaseMixins/_BaseRowMixin.py +32 -0
- scitex/db/_BaseMixins/_BaseSchemaMixin.py +44 -0
- scitex/db/_BaseMixins/_BaseTableMixin.py +66 -0
- scitex/db/_BaseMixins/_BaseTransactionMixin.py +52 -0
- scitex/db/_BaseMixins/__init__.py +30 -0
- scitex/db/_PostgreSQL.py +126 -0
- scitex/db/_PostgreSQLMixins/_BackupMixin.py +166 -0
- scitex/db/_PostgreSQLMixins/_BatchMixin.py +82 -0
- scitex/db/_PostgreSQLMixins/_BlobMixin.py +231 -0
- scitex/db/_PostgreSQLMixins/_ConnectionMixin.py +92 -0
- scitex/db/_PostgreSQLMixins/_ImportExportMixin.py +59 -0
- scitex/db/_PostgreSQLMixins/_IndexMixin.py +64 -0
- scitex/db/_PostgreSQLMixins/_MaintenanceMixin.py +175 -0
- scitex/db/_PostgreSQLMixins/_QueryMixin.py +108 -0
- scitex/db/_PostgreSQLMixins/_RowMixin.py +75 -0
- scitex/db/_PostgreSQLMixins/_SchemaMixin.py +126 -0
- scitex/db/_PostgreSQLMixins/_TableMixin.py +176 -0
- scitex/db/_PostgreSQLMixins/_TransactionMixin.py +57 -0
- scitex/db/_PostgreSQLMixins/__init__.py +34 -0
- scitex/db/_SQLite3.py +2136 -0
- scitex/db/_SQLite3Mixins/_BatchMixin.py +243 -0
- scitex/db/_SQLite3Mixins/_BlobMixin.py +229 -0
- scitex/db/_SQLite3Mixins/_ConnectionMixin.py +108 -0
- scitex/db/_SQLite3Mixins/_ImportExportMixin.py +80 -0
- scitex/db/_SQLite3Mixins/_IndexMixin.py +32 -0
- scitex/db/_SQLite3Mixins/_MaintenanceMixin.py +176 -0
- scitex/db/_SQLite3Mixins/_QueryMixin.py +83 -0
- scitex/db/_SQLite3Mixins/_RowMixin.py +75 -0
- scitex/db/_SQLite3Mixins/_TableMixin.py +183 -0
- scitex/db/_SQLite3Mixins/_TransactionMixin.py +71 -0
- scitex/db/_SQLite3Mixins/__init__.py +30 -0
- scitex/db/__init__.py +14 -0
- scitex/db/_delete_duplicates.py +397 -0
- scitex/db/_inspect.py +163 -0
- scitex/decorators/__init__.py +54 -0
- scitex/decorators/_auto_order.py +172 -0
- scitex/decorators/_batch_fn.py +127 -0
- scitex/decorators/_cache_disk.py +32 -0
- scitex/decorators/_cache_mem.py +12 -0
- scitex/decorators/_combined.py +98 -0
- scitex/decorators/_converters.py +282 -0
- scitex/decorators/_deprecated.py +26 -0
- scitex/decorators/_not_implemented.py +30 -0
- scitex/decorators/_numpy_fn.py +86 -0
- scitex/decorators/_pandas_fn.py +121 -0
- scitex/decorators/_preserve_doc.py +19 -0
- scitex/decorators/_signal_fn.py +95 -0
- scitex/decorators/_timeout.py +55 -0
- scitex/decorators/_torch_fn.py +136 -0
- scitex/decorators/_wrap.py +39 -0
- scitex/decorators/_xarray_fn.py +88 -0
- scitex/dev/__init__.py +15 -0
- scitex/dev/_analyze_code_flow.py +284 -0
- scitex/dev/_reload.py +59 -0
- scitex/dict/_DotDict.py +442 -0
- scitex/dict/__init__.py +18 -0
- scitex/dict/_listed_dict.py +42 -0
- scitex/dict/_pop_keys.py +36 -0
- scitex/dict/_replace.py +13 -0
- scitex/dict/_safe_merge.py +62 -0
- scitex/dict/_to_str.py +32 -0
- scitex/dsp/__init__.py +72 -0
- scitex/dsp/_crop.py +122 -0
- scitex/dsp/_demo_sig.py +331 -0
- scitex/dsp/_detect_ripples.py +212 -0
- scitex/dsp/_ensure_3d.py +18 -0
- scitex/dsp/_hilbert.py +78 -0
- scitex/dsp/_listen.py +702 -0
- scitex/dsp/_misc.py +30 -0
- scitex/dsp/_mne.py +32 -0
- scitex/dsp/_modulation_index.py +79 -0
- scitex/dsp/_pac.py +319 -0
- scitex/dsp/_psd.py +102 -0
- scitex/dsp/_resample.py +65 -0
- scitex/dsp/_time.py +36 -0
- scitex/dsp/_transform.py +68 -0
- scitex/dsp/_wavelet.py +212 -0
- scitex/dsp/add_noise.py +111 -0
- scitex/dsp/example.py +253 -0
- scitex/dsp/filt.py +155 -0
- scitex/dsp/norm.py +18 -0
- scitex/dsp/params.py +51 -0
- scitex/dsp/reference.py +43 -0
- scitex/dsp/template.py +25 -0
- scitex/dsp/utils/__init__.py +15 -0
- scitex/dsp/utils/_differential_bandpass_filters.py +120 -0
- scitex/dsp/utils/_ensure_3d.py +18 -0
- scitex/dsp/utils/_ensure_even_len.py +10 -0
- scitex/dsp/utils/_zero_pad.py +48 -0
- scitex/dsp/utils/filter.py +408 -0
- scitex/dsp/utils/pac.py +177 -0
- scitex/dt/__init__.py +8 -0
- scitex/dt/_linspace.py +130 -0
- scitex/etc/__init__.py +15 -0
- scitex/etc/wait_key.py +34 -0
- scitex/gen/_DimHandler.py +196 -0
- scitex/gen/_TimeStamper.py +244 -0
- scitex/gen/__init__.py +95 -0
- scitex/gen/_alternate_kwarg.py +13 -0
- scitex/gen/_cache.py +11 -0
- scitex/gen/_check_host.py +34 -0
- scitex/gen/_ci.py +12 -0
- scitex/gen/_close.py +222 -0
- scitex/gen/_embed.py +78 -0
- scitex/gen/_inspect_module.py +257 -0
- scitex/gen/_is_ipython.py +12 -0
- scitex/gen/_less.py +48 -0
- scitex/gen/_list_packages.py +139 -0
- scitex/gen/_mat2py.py +88 -0
- scitex/gen/_norm.py +170 -0
- scitex/gen/_paste.py +18 -0
- scitex/gen/_print_config.py +84 -0
- scitex/gen/_shell.py +48 -0
- scitex/gen/_src.py +111 -0
- scitex/gen/_start.py +451 -0
- scitex/gen/_symlink.py +55 -0
- scitex/gen/_symlog.py +27 -0
- scitex/gen/_tee.py +238 -0
- scitex/gen/_title2path.py +60 -0
- scitex/gen/_title_case.py +88 -0
- scitex/gen/_to_even.py +84 -0
- scitex/gen/_to_odd.py +34 -0
- scitex/gen/_to_rank.py +39 -0
- scitex/gen/_transpose.py +37 -0
- scitex/gen/_type.py +78 -0
- scitex/gen/_var_info.py +73 -0
- scitex/gen/_wrap.py +17 -0
- scitex/gen/_xml2dict.py +76 -0
- scitex/gen/misc.py +730 -0
- scitex/gen/path.py +0 -0
- scitex/general/__init__.py +5 -0
- scitex/gists/_SigMacro_processFigure_S.py +128 -0
- scitex/gists/_SigMacro_toBlue.py +172 -0
- scitex/gists/__init__.py +12 -0
- scitex/io/_H5Explorer.py +292 -0
- scitex/io/__init__.py +82 -0
- scitex/io/_cache.py +101 -0
- scitex/io/_flush.py +24 -0
- scitex/io/_glob.py +103 -0
- scitex/io/_json2md.py +113 -0
- scitex/io/_load.py +168 -0
- scitex/io/_load_configs.py +146 -0
- scitex/io/_load_modules/__init__.py +38 -0
- scitex/io/_load_modules/_catboost.py +66 -0
- scitex/io/_load_modules/_con.py +20 -0
- scitex/io/_load_modules/_db.py +24 -0
- scitex/io/_load_modules/_docx.py +42 -0
- scitex/io/_load_modules/_eeg.py +110 -0
- scitex/io/_load_modules/_hdf5.py +196 -0
- scitex/io/_load_modules/_image.py +19 -0
- scitex/io/_load_modules/_joblib.py +19 -0
- scitex/io/_load_modules/_json.py +18 -0
- scitex/io/_load_modules/_markdown.py +103 -0
- scitex/io/_load_modules/_matlab.py +37 -0
- scitex/io/_load_modules/_numpy.py +39 -0
- scitex/io/_load_modules/_optuna.py +155 -0
- scitex/io/_load_modules/_pandas.py +69 -0
- scitex/io/_load_modules/_pdf.py +31 -0
- scitex/io/_load_modules/_pickle.py +24 -0
- scitex/io/_load_modules/_torch.py +16 -0
- scitex/io/_load_modules/_txt.py +126 -0
- scitex/io/_load_modules/_xml.py +49 -0
- scitex/io/_load_modules/_yaml.py +23 -0
- scitex/io/_mv_to_tmp.py +19 -0
- scitex/io/_path.py +286 -0
- scitex/io/_reload.py +78 -0
- scitex/io/_save.py +539 -0
- scitex/io/_save_modules/__init__.py +66 -0
- scitex/io/_save_modules/_catboost.py +22 -0
- scitex/io/_save_modules/_csv.py +89 -0
- scitex/io/_save_modules/_excel.py +49 -0
- scitex/io/_save_modules/_hdf5.py +249 -0
- scitex/io/_save_modules/_html.py +48 -0
- scitex/io/_save_modules/_image.py +140 -0
- scitex/io/_save_modules/_joblib.py +25 -0
- scitex/io/_save_modules/_json.py +25 -0
- scitex/io/_save_modules/_listed_dfs_as_csv.py +57 -0
- scitex/io/_save_modules/_listed_scalars_as_csv.py +42 -0
- scitex/io/_save_modules/_matlab.py +24 -0
- scitex/io/_save_modules/_mp4.py +29 -0
- scitex/io/_save_modules/_numpy.py +57 -0
- scitex/io/_save_modules/_optuna_study_as_csv_and_pngs.py +38 -0
- scitex/io/_save_modules/_pickle.py +45 -0
- scitex/io/_save_modules/_plotly.py +27 -0
- scitex/io/_save_modules/_text.py +23 -0
- scitex/io/_save_modules/_torch.py +26 -0
- scitex/io/_save_modules/_yaml.py +29 -0
- scitex/life/__init__.py +10 -0
- scitex/life/_monitor_rain.py +49 -0
- scitex/linalg/__init__.py +17 -0
- scitex/linalg/_distance.py +63 -0
- scitex/linalg/_geometric_median.py +64 -0
- scitex/linalg/_misc.py +73 -0
- scitex/nn/_AxiswiseDropout.py +27 -0
- scitex/nn/_BNet.py +126 -0
- scitex/nn/_BNet_Res.py +164 -0
- scitex/nn/_ChannelGainChanger.py +44 -0
- scitex/nn/_DropoutChannels.py +50 -0
- scitex/nn/_Filters.py +489 -0
- scitex/nn/_FreqGainChanger.py +110 -0
- scitex/nn/_GaussianFilter.py +48 -0
- scitex/nn/_Hilbert.py +111 -0
- scitex/nn/_MNet_1000.py +157 -0
- scitex/nn/_ModulationIndex.py +221 -0
- scitex/nn/_PAC.py +414 -0
- scitex/nn/_PSD.py +40 -0
- scitex/nn/_ResNet1D.py +120 -0
- scitex/nn/_SpatialAttention.py +25 -0
- scitex/nn/_Spectrogram.py +161 -0
- scitex/nn/_SwapChannels.py +50 -0
- scitex/nn/_TransposeLayer.py +19 -0
- scitex/nn/_Wavelet.py +183 -0
- scitex/nn/__init__.py +63 -0
- scitex/os/__init__.py +8 -0
- scitex/os/_mv.py +50 -0
- scitex/parallel/__init__.py +8 -0
- scitex/parallel/_run.py +151 -0
- scitex/path/__init__.py +33 -0
- scitex/path/_clean.py +52 -0
- scitex/path/_find.py +108 -0
- scitex/path/_get_module_path.py +51 -0
- scitex/path/_get_spath.py +35 -0
- scitex/path/_getsize.py +18 -0
- scitex/path/_increment_version.py +87 -0
- scitex/path/_mk_spath.py +51 -0
- scitex/path/_path.py +19 -0
- scitex/path/_split.py +23 -0
- scitex/path/_this_path.py +19 -0
- scitex/path/_version.py +101 -0
- scitex/pd/__init__.py +41 -0
- scitex/pd/_find_indi.py +126 -0
- scitex/pd/_find_pval.py +113 -0
- scitex/pd/_force_df.py +154 -0
- scitex/pd/_from_xyz.py +71 -0
- scitex/pd/_ignore_SettingWithCopyWarning.py +34 -0
- scitex/pd/_melt_cols.py +81 -0
- scitex/pd/_merge_columns.py +221 -0
- scitex/pd/_mv.py +63 -0
- scitex/pd/_replace.py +62 -0
- scitex/pd/_round.py +93 -0
- scitex/pd/_slice.py +63 -0
- scitex/pd/_sort.py +91 -0
- scitex/pd/_to_numeric.py +53 -0
- scitex/pd/_to_xy.py +59 -0
- scitex/pd/_to_xyz.py +110 -0
- scitex/plt/__init__.py +36 -0
- scitex/plt/_subplots/_AxesWrapper.py +182 -0
- scitex/plt/_subplots/_AxisWrapper.py +249 -0
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +414 -0
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +896 -0
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +368 -0
- scitex/plt/_subplots/_AxisWrapperMixins/_TrackingMixin.py +185 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +16 -0
- scitex/plt/_subplots/_FigWrapper.py +226 -0
- scitex/plt/_subplots/_SubplotsWrapper.py +171 -0
- scitex/plt/_subplots/__init__.py +111 -0
- scitex/plt/_subplots/_export_as_csv.py +232 -0
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +61 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +90 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +49 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +46 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +39 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +125 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +72 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +34 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +36 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +79 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +59 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +32 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +79 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +75 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +64 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +44 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +70 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +66 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +95 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +67 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +52 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +46 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +46 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +46 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +46 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +44 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +103 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +82 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +58 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +117 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +30 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +51 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +93 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +94 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +92 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +65 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +59 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +58 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +45 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +70 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +75 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +75 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +155 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +64 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +77 -0
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +210 -0
- scitex/plt/_subplots/_export_as_csv_formatters/verify_formatters.py +342 -0
- scitex/plt/_subplots/_export_as_csv_formatters.py +115 -0
- scitex/plt/_tpl.py +28 -0
- scitex/plt/ax/__init__.py +114 -0
- scitex/plt/ax/_plot/__init__.py +53 -0
- scitex/plt/ax/_plot/_plot_circular_hist.py +124 -0
- scitex/plt/ax/_plot/_plot_conf_mat.py +136 -0
- scitex/plt/ax/_plot/_plot_cube.py +57 -0
- scitex/plt/ax/_plot/_plot_ecdf.py +84 -0
- scitex/plt/ax/_plot/_plot_fillv.py +55 -0
- scitex/plt/ax/_plot/_plot_heatmap.py +266 -0
- scitex/plt/ax/_plot/_plot_image.py +94 -0
- scitex/plt/ax/_plot/_plot_joyplot.py +76 -0
- scitex/plt/ax/_plot/_plot_raster.py +172 -0
- scitex/plt/ax/_plot/_plot_rectangle.py +69 -0
- scitex/plt/ax/_plot/_plot_scatter_hist.py +133 -0
- scitex/plt/ax/_plot/_plot_shaded_line.py +142 -0
- scitex/plt/ax/_plot/_plot_statistical_shaded_line.py +221 -0
- scitex/plt/ax/_plot/_plot_violin.py +343 -0
- scitex/plt/ax/_style/__init__.py +38 -0
- scitex/plt/ax/_style/_add_marginal_ax.py +44 -0
- scitex/plt/ax/_style/_add_panel.py +92 -0
- scitex/plt/ax/_style/_extend.py +64 -0
- scitex/plt/ax/_style/_force_aspect.py +37 -0
- scitex/plt/ax/_style/_format_label.py +23 -0
- scitex/plt/ax/_style/_hide_spines.py +84 -0
- scitex/plt/ax/_style/_map_ticks.py +182 -0
- scitex/plt/ax/_style/_rotate_labels.py +215 -0
- scitex/plt/ax/_style/_sci_note.py +279 -0
- scitex/plt/ax/_style/_set_log_scale.py +299 -0
- scitex/plt/ax/_style/_set_meta.py +261 -0
- scitex/plt/ax/_style/_set_n_ticks.py +37 -0
- scitex/plt/ax/_style/_set_size.py +16 -0
- scitex/plt/ax/_style/_set_supxyt.py +116 -0
- scitex/plt/ax/_style/_set_ticks.py +276 -0
- scitex/plt/ax/_style/_set_xyt.py +121 -0
- scitex/plt/ax/_style/_share_axes.py +264 -0
- scitex/plt/ax/_style/_shift.py +139 -0
- scitex/plt/ax/_style/_show_spines.py +333 -0
- scitex/plt/color/_PARAMS.py +70 -0
- scitex/plt/color/__init__.py +52 -0
- scitex/plt/color/_add_hue_col.py +41 -0
- scitex/plt/color/_colors.py +205 -0
- scitex/plt/color/_get_colors_from_cmap.py +134 -0
- scitex/plt/color/_interpolate.py +29 -0
- scitex/plt/color/_vizualize_colors.py +54 -0
- scitex/plt/utils/__init__.py +44 -0
- scitex/plt/utils/_calc_bacc_from_conf_mat.py +46 -0
- scitex/plt/utils/_calc_nice_ticks.py +101 -0
- scitex/plt/utils/_close.py +68 -0
- scitex/plt/utils/_colorbar.py +96 -0
- scitex/plt/utils/_configure_mpl.py +295 -0
- scitex/plt/utils/_histogram_utils.py +132 -0
- scitex/plt/utils/_im2grid.py +70 -0
- scitex/plt/utils/_is_valid_axis.py +78 -0
- scitex/plt/utils/_mk_colorbar.py +65 -0
- scitex/plt/utils/_mk_patches.py +26 -0
- scitex/plt/utils/_scientific_captions.py +638 -0
- scitex/plt/utils/_scitex_config.py +223 -0
- scitex/reproduce/__init__.py +14 -0
- scitex/reproduce/_fix_seeds.py +45 -0
- scitex/reproduce/_gen_ID.py +55 -0
- scitex/reproduce/_gen_timestamp.py +35 -0
- scitex/res/__init__.py +5 -0
- scitex/resource/__init__.py +13 -0
- scitex/resource/_get_processor_usages.py +281 -0
- scitex/resource/_get_specs.py +280 -0
- scitex/resource/_log_processor_usages.py +190 -0
- scitex/resource/_utils/__init__.py +31 -0
- scitex/resource/_utils/_get_env_info.py +481 -0
- scitex/resource/limit_ram.py +33 -0
- scitex/scholar/__init__.py +24 -0
- scitex/scholar/_local_search.py +454 -0
- scitex/scholar/_paper.py +244 -0
- scitex/scholar/_pdf_downloader.py +325 -0
- scitex/scholar/_search.py +393 -0
- scitex/scholar/_vector_search.py +370 -0
- scitex/scholar/_web_sources.py +457 -0
- scitex/stats/__init__.py +31 -0
- scitex/stats/_calc_partial_corr.py +17 -0
- scitex/stats/_corr_test_multi.py +94 -0
- scitex/stats/_corr_test_wrapper.py +115 -0
- scitex/stats/_describe_wrapper.py +90 -0
- scitex/stats/_multiple_corrections.py +63 -0
- scitex/stats/_nan_stats.py +93 -0
- scitex/stats/_p2stars.py +116 -0
- scitex/stats/_p2stars_wrapper.py +56 -0
- scitex/stats/_statistical_tests.py +73 -0
- scitex/stats/desc/__init__.py +40 -0
- scitex/stats/desc/_describe.py +189 -0
- scitex/stats/desc/_nan.py +289 -0
- scitex/stats/desc/_real.py +94 -0
- scitex/stats/multiple/__init__.py +14 -0
- scitex/stats/multiple/_bonferroni_correction.py +72 -0
- scitex/stats/multiple/_fdr_correction.py +400 -0
- scitex/stats/multiple/_multicompair.py +28 -0
- scitex/stats/tests/__corr_test.py +277 -0
- scitex/stats/tests/__corr_test_multi.py +343 -0
- scitex/stats/tests/__corr_test_single.py +277 -0
- scitex/stats/tests/__init__.py +22 -0
- scitex/stats/tests/_brunner_munzel_test.py +192 -0
- scitex/stats/tests/_nocorrelation_test.py +28 -0
- scitex/stats/tests/_smirnov_grubbs.py +98 -0
- scitex/str/__init__.py +113 -0
- scitex/str/_clean_path.py +75 -0
- scitex/str/_color_text.py +52 -0
- scitex/str/_decapitalize.py +58 -0
- scitex/str/_factor_out_digits.py +281 -0
- scitex/str/_format_plot_text.py +498 -0
- scitex/str/_grep.py +48 -0
- scitex/str/_latex.py +155 -0
- scitex/str/_latex_fallback.py +471 -0
- scitex/str/_mask_api.py +39 -0
- scitex/str/_mask_api_key.py +8 -0
- scitex/str/_parse.py +158 -0
- scitex/str/_print_block.py +47 -0
- scitex/str/_print_debug.py +68 -0
- scitex/str/_printc.py +62 -0
- scitex/str/_readable_bytes.py +38 -0
- scitex/str/_remove_ansi.py +23 -0
- scitex/str/_replace.py +134 -0
- scitex/str/_search.py +125 -0
- scitex/str/_squeeze_space.py +36 -0
- scitex/tex/__init__.py +10 -0
- scitex/tex/_preview.py +103 -0
- scitex/tex/_to_vec.py +116 -0
- scitex/torch/__init__.py +18 -0
- scitex/torch/_apply_to.py +34 -0
- scitex/torch/_nan_funcs.py +77 -0
- scitex/types/_ArrayLike.py +44 -0
- scitex/types/_ColorLike.py +21 -0
- scitex/types/__init__.py +14 -0
- scitex/types/_is_listed_X.py +70 -0
- scitex/utils/__init__.py +22 -0
- scitex/utils/_compress_hdf5.py +116 -0
- scitex/utils/_email.py +120 -0
- scitex/utils/_grid.py +148 -0
- scitex/utils/_notify.py +247 -0
- scitex/utils/_search.py +121 -0
- scitex/web/__init__.py +38 -0
- scitex/web/_search_pubmed.py +438 -0
- scitex/web/_summarize_url.py +158 -0
- scitex-2.0.0.dist-info/METADATA +307 -0
- scitex-2.0.0.dist-info/RECORD +572 -0
- scitex-2.0.0.dist-info/WHEEL +6 -0
- scitex-2.0.0.dist-info/licenses/LICENSE +7 -0
- scitex-2.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Time-stamp: "2024-11-11 14:16:58 (ywatanabe)"
|
|
4
|
+
# File: ./scitex_repo/src/scitex/db/_delete_duplicates.py
|
|
5
|
+
|
|
6
|
+
import sqlite3
|
|
7
|
+
from typing import List, Optional, Tuple, Union
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
#!/usr/bin/env python3
|
|
11
|
+
# -*- coding: utf-8 -*-
|
|
12
|
+
# Time-stamp: "2024-10-20 02:17:10 (ywatanabe)"
|
|
13
|
+
# /data/gpfs/projects/punim2354/ywatanabe/scitex_repo/src/scitex/db/_delete_duplicates_clean.py
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
"""
|
|
17
|
+
Functionality:
|
|
18
|
+
- Deletes duplicate entries from an SQLite database table
|
|
19
|
+
Input:
|
|
20
|
+
- SQLite database file path, table name, columns to consider for duplicates
|
|
21
|
+
Output:
|
|
22
|
+
- Updated SQLite database with duplicates removed
|
|
23
|
+
Prerequisites:
|
|
24
|
+
- sqlite3, pandas, tqdm, scitex
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _sort_db(cursor: sqlite3.Cursor, table_name: str, columns: List[str]) -> None:
|
|
29
|
+
"""
|
|
30
|
+
Sorts the database table based on the specified columns.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
cursor : sqlite3.Cursor
|
|
35
|
+
The cursor object for executing SQL commands.
|
|
36
|
+
table_name : str
|
|
37
|
+
The name of the table to be sorted.
|
|
38
|
+
columns : List[str]
|
|
39
|
+
The list of column names to sort by, in order of priority.
|
|
40
|
+
|
|
41
|
+
Example
|
|
42
|
+
-------
|
|
43
|
+
>>> conn = sqlite3.connect('example.db')
|
|
44
|
+
>>> cursor = conn.cursor()
|
|
45
|
+
>>> _sort_db(cursor, 'my_table', ['column1', 'column2'])
|
|
46
|
+
>>> conn.commit()
|
|
47
|
+
>>> conn.close()
|
|
48
|
+
"""
|
|
49
|
+
columns_str = ", ".join(columns)
|
|
50
|
+
temp_table = f"{table_name}_temp"
|
|
51
|
+
|
|
52
|
+
cursor.execute(
|
|
53
|
+
f"CREATE TABLE {temp_table} AS SELECT * FROM {table_name} ORDER BY {columns_str}"
|
|
54
|
+
)
|
|
55
|
+
cursor.execute(f"DROP TABLE {table_name}")
|
|
56
|
+
cursor.execute(f"ALTER TABLE {temp_table} RENAME TO {table_name}")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _determine_columns(
|
|
60
|
+
cursor: sqlite3.Cursor,
|
|
61
|
+
table_name: str,
|
|
62
|
+
columns: Union[str, List[str]],
|
|
63
|
+
include_blob: bool,
|
|
64
|
+
) -> List[str]:
|
|
65
|
+
cursor.execute(f"PRAGMA table_info({table_name})")
|
|
66
|
+
table_info = cursor.fetchall()
|
|
67
|
+
all_columns = [col[1] for col in table_info]
|
|
68
|
+
column_types = {col[1]: col[2] for col in table_info}
|
|
69
|
+
|
|
70
|
+
if columns == "all":
|
|
71
|
+
columns = (
|
|
72
|
+
all_columns
|
|
73
|
+
if include_blob
|
|
74
|
+
else [col for col in all_columns if column_types[col].lower() != "blob"]
|
|
75
|
+
)
|
|
76
|
+
elif isinstance(columns, str):
|
|
77
|
+
columns = [columns]
|
|
78
|
+
|
|
79
|
+
columns_str = ", ".join(columns)
|
|
80
|
+
print(f"Columns considered for duplicates: {columns_str}")
|
|
81
|
+
|
|
82
|
+
return columns
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _fetch_as_df(
|
|
86
|
+
cursor: sqlite3.Cursor, columns: List[str], table_name: str
|
|
87
|
+
) -> pd.DataFrame:
|
|
88
|
+
print("\nFetching all database entries...")
|
|
89
|
+
columns_str = ", ".join(columns)
|
|
90
|
+
query = f"SELECT {columns_str} FROM {table_name}"
|
|
91
|
+
cursor.execute(query)
|
|
92
|
+
df_entries = cursor.fetchall()
|
|
93
|
+
return pd.DataFrame(df_entries, columns=columns)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _find_duplicated(df: pd.DataFrame) -> pd.DataFrame:
|
|
97
|
+
df_duplicated = df[df.duplicated(keep="first")].copy()
|
|
98
|
+
duplication_rate = len(df_duplicated) / (len(df) - len(df_duplicated))
|
|
99
|
+
print(f"\n{100*duplication_rate:.2f}% of data was duplicated. Cleaning up...")
|
|
100
|
+
print(f"\nOriginal entries:\n{df.head()}")
|
|
101
|
+
print(f"\nDuplicated entries:\n{df_duplicated.head()}")
|
|
102
|
+
return df_duplicated
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def verify_duplicated_index(
|
|
106
|
+
cursor: sqlite3.Cursor, duplicated_row: pd.Series, table_name: str, dry_run: bool
|
|
107
|
+
) -> Tuple[str, bool]:
|
|
108
|
+
"""Check if entry to delete is the one intended"""
|
|
109
|
+
columns = list(duplicated_row.index)
|
|
110
|
+
columns_str = ", ".join(columns)
|
|
111
|
+
|
|
112
|
+
where_conditions = " AND ".join([f"{col} = ?" for col in columns])
|
|
113
|
+
select_query = f"""
|
|
114
|
+
SELECT {columns_str}
|
|
115
|
+
FROM {table_name}
|
|
116
|
+
WHERE {where_conditions}
|
|
117
|
+
"""
|
|
118
|
+
cursor.execute(select_query, tuple(duplicated_row))
|
|
119
|
+
entries = cursor.fetchall()
|
|
120
|
+
|
|
121
|
+
is_verified = len(entries) >= 1
|
|
122
|
+
|
|
123
|
+
if dry_run:
|
|
124
|
+
print(f"Expected duplicate entry: {tuple(duplicated_row)}")
|
|
125
|
+
print(f"Found entries: {entries}")
|
|
126
|
+
print(f"Verification {'succeeded' if is_verified else 'failed'}")
|
|
127
|
+
|
|
128
|
+
return select_query, is_verified
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _delete_entry(
|
|
132
|
+
cursor: sqlite3.Cursor,
|
|
133
|
+
duplicated_row: pd.Series,
|
|
134
|
+
table_name: str,
|
|
135
|
+
dry_run: bool = True,
|
|
136
|
+
) -> None:
|
|
137
|
+
select_query, is_verified = verify_duplicated_index(
|
|
138
|
+
cursor, duplicated_row, table_name, dry_run
|
|
139
|
+
)
|
|
140
|
+
if is_verified:
|
|
141
|
+
delete_query = select_query.replace("SELECT", "DELETE")
|
|
142
|
+
if dry_run:
|
|
143
|
+
print(f"[DRY RUN] Would delete entry:\n{duplicated_row}")
|
|
144
|
+
else:
|
|
145
|
+
cursor.execute(delete_query, tuple(duplicated_row))
|
|
146
|
+
print(f"Deleted entry:\n{duplicated_row}")
|
|
147
|
+
else:
|
|
148
|
+
print(f"Skipping entry (not found or already deleted):\n{duplicated_row}")
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
# def delete_duplicates(
|
|
152
|
+
# lpath_db: str,
|
|
153
|
+
# table_name: str,
|
|
154
|
+
# columns: Union[str, List[str]] = "all",
|
|
155
|
+
# include_blob: bool = False,
|
|
156
|
+
# batch_size: int = 1000,
|
|
157
|
+
# reindex: bool = False,
|
|
158
|
+
# sort: bool = False,
|
|
159
|
+
# dry_run: bool = True,
|
|
160
|
+
# ) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
|
|
161
|
+
# """
|
|
162
|
+
# Delete duplicate entries from an SQLite database table.
|
|
163
|
+
|
|
164
|
+
# Parameters
|
|
165
|
+
# ----------
|
|
166
|
+
# lpath_db : str
|
|
167
|
+
# Path to the SQLite database file.
|
|
168
|
+
# table_name : str
|
|
169
|
+
# Name of the table to remove duplicates from.
|
|
170
|
+
# columns : Union[str, List[str]], optional
|
|
171
|
+
# Columns to consider when identifying duplicates. Default is "all".
|
|
172
|
+
# include_blob : bool, optional
|
|
173
|
+
# Whether to include BLOB columns when considering duplicates. Default is False.
|
|
174
|
+
# batch_size : int, optional
|
|
175
|
+
# Number of rows to process in each batch. Default is 1000.
|
|
176
|
+
# reindex : bool, optional
|
|
177
|
+
# Whether to reindex the table after deletion. Default is False.
|
|
178
|
+
# dry_run : bool, optional
|
|
179
|
+
# If True, simulates the deletion without actually modifying the database. Default is True.
|
|
180
|
+
|
|
181
|
+
# Returns
|
|
182
|
+
# -------
|
|
183
|
+
# Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]
|
|
184
|
+
# A tuple containing:
|
|
185
|
+
# - DataFrame of all entries after deletion process
|
|
186
|
+
# - DataFrame of remaining duplicates if any, None otherwise
|
|
187
|
+
# """
|
|
188
|
+
# try:
|
|
189
|
+
# conn = sqlite3.connect(lpath_db)
|
|
190
|
+
# cursor = conn.cursor()
|
|
191
|
+
|
|
192
|
+
# columns = _determine_columns(cursor, table_name, columns, include_blob)
|
|
193
|
+
|
|
194
|
+
# if sort:
|
|
195
|
+
# _sort_db(cursor, table_name, columns)
|
|
196
|
+
|
|
197
|
+
# df_orig = _fetch_as_df(cursor, columns, table_name)
|
|
198
|
+
# duplicates = _find_duplicated(df_orig)
|
|
199
|
+
|
|
200
|
+
# if duplicates.empty:
|
|
201
|
+
# print("Congratulations. Database is clean.")
|
|
202
|
+
# return df_orig, None
|
|
203
|
+
|
|
204
|
+
# columns_str = ", ".join(columns)
|
|
205
|
+
# where_conditions = " AND ".join([f"{col} = ?" for col in columns])
|
|
206
|
+
# delete_query = f"""
|
|
207
|
+
# DELETE FROM {table_name}
|
|
208
|
+
# WHERE {where_conditions}
|
|
209
|
+
# """
|
|
210
|
+
|
|
211
|
+
# for start in tqdm(range(0, len(duplicates), batch_size)):
|
|
212
|
+
# batch = duplicates.iloc[start:start+batch_size]
|
|
213
|
+
# batch_values = batch.values.tolist()
|
|
214
|
+
|
|
215
|
+
# if dry_run:
|
|
216
|
+
# print(f"[DRY RUN] Would delete {len(batch)} entries")
|
|
217
|
+
# else:
|
|
218
|
+
# cursor.executemany(delete_query, batch_values)
|
|
219
|
+
# conn.commit()
|
|
220
|
+
|
|
221
|
+
# if not dry_run:
|
|
222
|
+
# conn.commit()
|
|
223
|
+
|
|
224
|
+
# if reindex:
|
|
225
|
+
# print("Reindexing the table...")
|
|
226
|
+
# cursor.execute(f"REINDEX {table_name}")
|
|
227
|
+
# conn.commit()
|
|
228
|
+
|
|
229
|
+
# df_after = _fetch_as_df(cursor, columns, table_name)
|
|
230
|
+
# remaining_duplicates = _find_duplicated(df_after)
|
|
231
|
+
|
|
232
|
+
# if remaining_duplicates.empty:
|
|
233
|
+
# print("All duplicates successfully removed.")
|
|
234
|
+
# return df_after, None
|
|
235
|
+
# else:
|
|
236
|
+
# print(f"Warning: {len(remaining_duplicates)} duplicates still remain.\n{remaining_duplicates}")
|
|
237
|
+
# return df_after, remaining_duplicates
|
|
238
|
+
|
|
239
|
+
# except Exception as error:
|
|
240
|
+
# print(f"An error occurred: {error}")
|
|
241
|
+
# return None, None
|
|
242
|
+
|
|
243
|
+
# finally:
|
|
244
|
+
# conn.close()
|
|
245
|
+
|
|
246
|
+
# def delete_duplicates(
|
|
247
|
+
# lpath_db: str,
|
|
248
|
+
# table_name: str,
|
|
249
|
+
# columns: Union[str, List[str]] = "all",
|
|
250
|
+
# include_blob: bool = False,
|
|
251
|
+
# batch_size: int = 1000,
|
|
252
|
+
# chunk_size: int = 100_000,
|
|
253
|
+
# reindex: bool = False,
|
|
254
|
+
# sort: bool = False,
|
|
255
|
+
# dry_run: bool = True,
|
|
256
|
+
# ) -> Tuple[Optional[int], Optional[int]]:
|
|
257
|
+
# try:
|
|
258
|
+
# conn = sqlite3.connect(lpath_db)
|
|
259
|
+
# cursor = conn.cursor()
|
|
260
|
+
|
|
261
|
+
# columns = _determine_columns(cursor, table_name, columns, include_blob)
|
|
262
|
+
|
|
263
|
+
# if sort:
|
|
264
|
+
# _sort_db(cursor, table_name, columns)
|
|
265
|
+
|
|
266
|
+
# columns_str = ", ".join(columns)
|
|
267
|
+
# where_conditions = " AND ".join([f"{col} = ?" for col in columns])
|
|
268
|
+
# delete_query = f"""
|
|
269
|
+
# DELETE FROM {table_name}
|
|
270
|
+
# WHERE {where_conditions}
|
|
271
|
+
# """
|
|
272
|
+
|
|
273
|
+
# total_rows = cursor.execute(f"SELECT COUNT(*) FROM {table_name}").fetchone()[0]
|
|
274
|
+
# total_deleted = 0
|
|
275
|
+
# total_duplicates = 0
|
|
276
|
+
|
|
277
|
+
# for offset in tqdm(range(0, total_rows, chunk_size)):
|
|
278
|
+
# chunk_query = f"""
|
|
279
|
+
# SELECT {columns_str}
|
|
280
|
+
# FROM {table_name}
|
|
281
|
+
# LIMIT {chunk_size} OFFSET {offset}
|
|
282
|
+
# """
|
|
283
|
+
# df_chunk = pd.read_sql_query(chunk_query, conn)
|
|
284
|
+
# duplicates = _find_duplicated(df_chunk)
|
|
285
|
+
# total_duplicates += len(duplicates)
|
|
286
|
+
|
|
287
|
+
# if duplicates.empty:
|
|
288
|
+
# continue
|
|
289
|
+
|
|
290
|
+
# for start in range(0, len(duplicates), batch_size):
|
|
291
|
+
# batch = duplicates.iloc[start:start+batch_size]
|
|
292
|
+
# batch_values = batch.values.tolist()
|
|
293
|
+
|
|
294
|
+
# if dry_run:
|
|
295
|
+
# print(f"[DRY RUN] Would delete {len(batch)} entries")
|
|
296
|
+
# else:
|
|
297
|
+
# cursor.executemany(delete_query, batch_values)
|
|
298
|
+
# conn.commit()
|
|
299
|
+
# total_deleted += len(batch)
|
|
300
|
+
|
|
301
|
+
# if not dry_run:
|
|
302
|
+
# if reindex:
|
|
303
|
+
# print("Reindexing the table...")
|
|
304
|
+
# cursor.execute(f"REINDEX {table_name}")
|
|
305
|
+
# conn.commit()
|
|
306
|
+
|
|
307
|
+
# print(f"Total duplicates found: {total_duplicates}")
|
|
308
|
+
# print(f"Total entries deleted: {total_deleted}")
|
|
309
|
+
|
|
310
|
+
# return total_duplicates, total_deleted
|
|
311
|
+
|
|
312
|
+
# except Exception as error:
|
|
313
|
+
# print(f"An error occurred: {error}")
|
|
314
|
+
# return None, None
|
|
315
|
+
|
|
316
|
+
# finally:
|
|
317
|
+
# conn.close()
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def delete_duplicates(
|
|
321
|
+
lpath_db: str,
|
|
322
|
+
table_name: str,
|
|
323
|
+
columns: Union[str, List[str]] = "all",
|
|
324
|
+
include_blob: bool = False,
|
|
325
|
+
chunk_size: int = 10_000,
|
|
326
|
+
dry_run: bool = True,
|
|
327
|
+
) -> Tuple[Optional[int], Optional[int]]:
|
|
328
|
+
try:
|
|
329
|
+
conn = sqlite3.connect(lpath_db)
|
|
330
|
+
cursor = conn.cursor()
|
|
331
|
+
|
|
332
|
+
# Vacuum the database to free up space
|
|
333
|
+
if not dry_run:
|
|
334
|
+
cursor.execute("VACUUM")
|
|
335
|
+
conn.commit()
|
|
336
|
+
|
|
337
|
+
columns = _determine_columns(cursor, table_name, columns, include_blob)
|
|
338
|
+
columns_str = ", ".join(columns)
|
|
339
|
+
|
|
340
|
+
# Create a temporary table to store unique rows
|
|
341
|
+
temp_table = f"{table_name}_temp"
|
|
342
|
+
cursor.execute(
|
|
343
|
+
f"CREATE TABLE {temp_table} AS SELECT DISTINCT {columns_str} FROM {table_name} LIMIT 0"
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
# Process in small chunks
|
|
347
|
+
offset = 0
|
|
348
|
+
total_processed = 0
|
|
349
|
+
total_unique = 0
|
|
350
|
+
|
|
351
|
+
while True:
|
|
352
|
+
chunk_query = f"""
|
|
353
|
+
INSERT OR IGNORE INTO {temp_table}
|
|
354
|
+
SELECT DISTINCT {columns_str}
|
|
355
|
+
FROM {table_name}
|
|
356
|
+
LIMIT {chunk_size} OFFSET {offset}
|
|
357
|
+
"""
|
|
358
|
+
|
|
359
|
+
if dry_run:
|
|
360
|
+
print(f"[DRY RUN] Would execute: {chunk_query}")
|
|
361
|
+
else:
|
|
362
|
+
cursor.execute(chunk_query)
|
|
363
|
+
conn.commit()
|
|
364
|
+
|
|
365
|
+
rows_affected = cursor.rowcount
|
|
366
|
+
if rows_affected == 0:
|
|
367
|
+
break
|
|
368
|
+
|
|
369
|
+
total_processed += chunk_size
|
|
370
|
+
total_unique += rows_affected
|
|
371
|
+
offset += chunk_size
|
|
372
|
+
|
|
373
|
+
print(f"Processed {total_processed} rows, {total_unique} unique")
|
|
374
|
+
|
|
375
|
+
total_duplicates = total_processed - total_unique
|
|
376
|
+
|
|
377
|
+
if not dry_run:
|
|
378
|
+
# Replace original table with the deduplicated one
|
|
379
|
+
cursor.execute(f"DROP TABLE {table_name}")
|
|
380
|
+
cursor.execute(f"ALTER TABLE {temp_table} RENAME TO {table_name}")
|
|
381
|
+
conn.commit()
|
|
382
|
+
|
|
383
|
+
print(f"Total rows processed: {total_processed}")
|
|
384
|
+
print(f"Total unique rows: {total_unique}")
|
|
385
|
+
print(f"Total duplicates removed: {total_duplicates}")
|
|
386
|
+
|
|
387
|
+
return total_processed, total_duplicates
|
|
388
|
+
|
|
389
|
+
except Exception as error:
|
|
390
|
+
print(f"An error occurred: {error}")
|
|
391
|
+
return None, None
|
|
392
|
+
|
|
393
|
+
finally:
|
|
394
|
+
conn.close()
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
# EOF
|
scitex/db/_inspect.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
# Time-stamp: "2024-11-11 14:17:00 (ywatanabe)"
|
|
4
|
+
# File: ./scitex_repo/src/scitex/db/_inspect.py
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import sqlite3
|
|
8
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
9
|
+
|
|
10
|
+
#!/usr/bin/env python3
|
|
11
|
+
# -*- coding: utf-8 -*-
|
|
12
|
+
# Time-stamp: "2024-10-24 13:13:33 (ywatanabe)"
|
|
13
|
+
# /mnt/ssd/scitex_repo/src/scitex/db/_inspect.py
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Inspector:
|
|
17
|
+
def __init__(self, db_path: str):
|
|
18
|
+
if not os.path.exists(db_path):
|
|
19
|
+
raise FileNotFoundError(f"Database file not found: {db_path}")
|
|
20
|
+
self.db_path = db_path
|
|
21
|
+
|
|
22
|
+
def get_table_names(self) -> List[str]:
|
|
23
|
+
"""Retrieves all table names from the database.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
List[str]: List of table names
|
|
27
|
+
"""
|
|
28
|
+
with sqlite3.connect(self.db_path) as conn:
|
|
29
|
+
cursor = conn.cursor()
|
|
30
|
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
|
|
31
|
+
return [table[0] for table in cursor.fetchall()]
|
|
32
|
+
|
|
33
|
+
def get_table_info(
|
|
34
|
+
self, table_name: str
|
|
35
|
+
) -> List[Tuple[int, str, str, int, Any, int, str]]:
|
|
36
|
+
"""Retrieves table structure information.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
table_name (str): Name of the table
|
|
40
|
+
|
|
41
|
+
Returns:
|
|
42
|
+
List[Tuple[int, str, str, int, Any, int, str]]: List of column information tuples
|
|
43
|
+
"""
|
|
44
|
+
with sqlite3.connect(self.db_path) as conn:
|
|
45
|
+
cursor = conn.cursor()
|
|
46
|
+
cursor.execute(f"PRAGMA table_info({table_name})")
|
|
47
|
+
columns = cursor.fetchall()
|
|
48
|
+
|
|
49
|
+
cursor.execute(f"PRAGMA index_list({table_name})")
|
|
50
|
+
indexes = cursor.fetchall()
|
|
51
|
+
pk_columns = []
|
|
52
|
+
for idx in indexes:
|
|
53
|
+
if idx[2] == 1: # Is primary key
|
|
54
|
+
cursor.execute(f"PRAGMA index_info({idx[1]})")
|
|
55
|
+
pk_columns.extend([info[2] for info in cursor.fetchall()])
|
|
56
|
+
|
|
57
|
+
enhanced_columns = []
|
|
58
|
+
for col in columns:
|
|
59
|
+
constraints = []
|
|
60
|
+
if col[1] in pk_columns:
|
|
61
|
+
constraints.append("PRIMARY KEY")
|
|
62
|
+
if col[3] == 1:
|
|
63
|
+
constraints.append("NOT NULL")
|
|
64
|
+
enhanced_columns.append(col + (" ".join(constraints),))
|
|
65
|
+
|
|
66
|
+
return enhanced_columns
|
|
67
|
+
|
|
68
|
+
def get_sample_data(
|
|
69
|
+
self, table_name: str, limit: int = 5
|
|
70
|
+
) -> Tuple[List[str], List[Tuple], int]:
|
|
71
|
+
"""Retrieves sample data from the specified table.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
table_name (str): Name of the table
|
|
75
|
+
limit (int, optional): Number of rows to retrieve. Defaults to 5.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Tuple[List[str], List[Tuple], int]: Column names, sample data rows, and total row count
|
|
79
|
+
"""
|
|
80
|
+
with sqlite3.connect(self.db_path) as conn:
|
|
81
|
+
cursor = conn.cursor()
|
|
82
|
+
cursor.execute(f"SELECT * FROM {table_name} LIMIT {limit}")
|
|
83
|
+
columns = [description[0] for description in cursor.description]
|
|
84
|
+
sample_data = cursor.fetchall()
|
|
85
|
+
|
|
86
|
+
cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
|
|
87
|
+
total_rows = cursor.fetchone()[0]
|
|
88
|
+
|
|
89
|
+
return columns, sample_data, total_rows
|
|
90
|
+
|
|
91
|
+
def inspect(
|
|
92
|
+
self,
|
|
93
|
+
table_names: Optional[List[str]] = None,
|
|
94
|
+
verbose=True,
|
|
95
|
+
) -> List[Dict[str, Any]]:
|
|
96
|
+
import pandas as pd
|
|
97
|
+
|
|
98
|
+
if table_names is None:
|
|
99
|
+
table_names = self.get_table_names()
|
|
100
|
+
|
|
101
|
+
data_tables = []
|
|
102
|
+
for table_name in table_names:
|
|
103
|
+
columns = self.get_table_info(table_name)
|
|
104
|
+
column_names, rows, total_rows = self.get_sample_data(table_name)
|
|
105
|
+
|
|
106
|
+
meta = {}
|
|
107
|
+
meta["table_name"] = table_name
|
|
108
|
+
meta["n_total_rows"] = total_rows
|
|
109
|
+
|
|
110
|
+
sample_data = pd.DataFrame(
|
|
111
|
+
[
|
|
112
|
+
{
|
|
113
|
+
col: (str(value) if not isinstance(value, bytes) else "<BLOB>")
|
|
114
|
+
for col, value in zip(column_names, row)
|
|
115
|
+
}
|
|
116
|
+
for row in rows
|
|
117
|
+
]
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
for k, v in meta.items():
|
|
121
|
+
sample_data[k] = v
|
|
122
|
+
|
|
123
|
+
sample_data = sample_data.set_index(["table_name", "n_total_rows"])
|
|
124
|
+
|
|
125
|
+
data_tables.append(sample_data)
|
|
126
|
+
|
|
127
|
+
# if len(data_tables) == 1:
|
|
128
|
+
# return data_tables[0]
|
|
129
|
+
# else:
|
|
130
|
+
# return tuple(data_tables)
|
|
131
|
+
return data_tables
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def inspect(
|
|
135
|
+
lpath_db: str, table_names: Optional[List[str]] = None, verbose: bool = True
|
|
136
|
+
) -> None:
|
|
137
|
+
"""
|
|
138
|
+
Inspects the specified SQLite database.
|
|
139
|
+
|
|
140
|
+
Example:
|
|
141
|
+
>>> inspect('path/to/database.db')
|
|
142
|
+
>>> inspect('path/to/database.db', ['table1', 'table2'])
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
lpath_db (str): Path to the SQLite database file
|
|
146
|
+
table_names (Optional[List[str]], optional): List of table names to inspect.
|
|
147
|
+
If None, inspects all tables. Defaults to None.
|
|
148
|
+
"""
|
|
149
|
+
inspector = Inspector(lpath_db)
|
|
150
|
+
overviews_tables = inspector.inspect(table_names, verbose=verbose)
|
|
151
|
+
if verbose:
|
|
152
|
+
for dd in overviews_tables:
|
|
153
|
+
print(f"\n{dd}\n")
|
|
154
|
+
return overviews_tables
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
# python -c "import scitex; scitex.db.inspect(\"./data/db_all/Patient_23_005.db\")"
|
|
158
|
+
# python -c "import scitex; scitex.db.inspect(\"./data/db_all/Patient_23_005.db\", table_names=[\"eeg_data_reindexed\"])"
|
|
159
|
+
# python -c "import scitex; scitex.db.inspect(\"./data/db_all/Patient_23_005.db\", table_names=[\"eeg_data\"])"
|
|
160
|
+
# python -c "import scitex; scitex.db.inspect(\"./data/db_all/Patient_23_005.db\", table_names=[\"sqlite_sequence\"])"
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
# EOF
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Scitex decorators module."""
|
|
3
|
+
|
|
4
|
+
from ._auto_order import AutoOrderDecorator, batch_fn, disable_auto_order, enable_auto_order, numpy_fn, pandas_fn, torch_fn
|
|
5
|
+
from ._batch_fn import batch_fn
|
|
6
|
+
from ._cache_disk import cache_disk
|
|
7
|
+
from ._cache_mem import cache_mem
|
|
8
|
+
from ._combined import batch_numpy_fn, batch_pandas_fn, batch_torch_fn, numpy_batch_fn, pandas_batch_fn, torch_batch_fn
|
|
9
|
+
from ._converters import ConversionWarning, is_cuda, is_nested_decorator, is_torch, to_numpy, to_torch
|
|
10
|
+
from ._deprecated import deprecated
|
|
11
|
+
from ._not_implemented import not_implemented
|
|
12
|
+
from ._numpy_fn import numpy_fn
|
|
13
|
+
from ._pandas_fn import pandas_fn
|
|
14
|
+
from ._preserve_doc import preserve_doc
|
|
15
|
+
from ._signal_fn import signal_fn
|
|
16
|
+
from ._timeout import timeout
|
|
17
|
+
from ._torch_fn import torch_fn
|
|
18
|
+
from ._wrap import wrap
|
|
19
|
+
from ._xarray_fn import xarray_fn
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
"AutoOrderDecorator",
|
|
23
|
+
"ConversionWarning",
|
|
24
|
+
"batch_fn",
|
|
25
|
+
"batch_fn",
|
|
26
|
+
"batch_numpy_fn",
|
|
27
|
+
"batch_pandas_fn",
|
|
28
|
+
"batch_torch_fn",
|
|
29
|
+
"cache_disk",
|
|
30
|
+
"cache_mem",
|
|
31
|
+
"deprecated",
|
|
32
|
+
"disable_auto_order",
|
|
33
|
+
"enable_auto_order",
|
|
34
|
+
"is_cuda",
|
|
35
|
+
"is_nested_decorator",
|
|
36
|
+
"is_torch",
|
|
37
|
+
"not_implemented",
|
|
38
|
+
"numpy_batch_fn",
|
|
39
|
+
"numpy_fn",
|
|
40
|
+
"numpy_fn",
|
|
41
|
+
"pandas_batch_fn",
|
|
42
|
+
"pandas_fn",
|
|
43
|
+
"pandas_fn",
|
|
44
|
+
"preserve_doc",
|
|
45
|
+
"signal_fn",
|
|
46
|
+
"timeout",
|
|
47
|
+
"to_numpy",
|
|
48
|
+
"to_torch",
|
|
49
|
+
"torch_batch_fn",
|
|
50
|
+
"torch_fn",
|
|
51
|
+
"torch_fn",
|
|
52
|
+
"wrap",
|
|
53
|
+
"xarray_fn",
|
|
54
|
+
]
|