scitex 2.0.0__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scitex/__init__.py +53 -15
- scitex/__main__.py +72 -26
- scitex/__version__.py +1 -1
- scitex/_sh.py +145 -23
- scitex/ai/__init__.py +30 -16
- scitex/ai/_gen_ai/_Anthropic.py +5 -7
- scitex/ai/_gen_ai/_BaseGenAI.py +2 -2
- scitex/ai/_gen_ai/_DeepSeek.py +10 -2
- scitex/ai/_gen_ai/_Google.py +2 -2
- scitex/ai/_gen_ai/_Llama.py +2 -2
- scitex/ai/_gen_ai/_OpenAI.py +2 -2
- scitex/ai/_gen_ai/_PARAMS.py +51 -65
- scitex/ai/_gen_ai/_Perplexity.py +2 -2
- scitex/ai/_gen_ai/__init__.py +25 -14
- scitex/ai/_gen_ai/_format_output_func.py +4 -4
- scitex/ai/classification/{classifier_server.py → Classifier.py} +5 -5
- scitex/ai/classification/CrossValidationExperiment.py +374 -0
- scitex/ai/classification/__init__.py +43 -4
- scitex/ai/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ai/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ai/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ai/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ai/classification/reporters/__init__.py +11 -0
- scitex/ai/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ai/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ai/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ai/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ai/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ai/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ai/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ai/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ai/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ai/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ai/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ai/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ai/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ai/classification/timeseries/__init__.py +39 -0
- scitex/ai/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ai/clustering/_umap.py +2 -2
- scitex/ai/feature_extraction/vit.py +1 -0
- scitex/ai/feature_selection/__init__.py +30 -0
- scitex/ai/feature_selection/feature_selection.py +364 -0
- scitex/ai/loss/multi_task_loss.py +1 -1
- scitex/ai/metrics/__init__.py +51 -4
- scitex/ai/metrics/_calc_bacc.py +61 -0
- scitex/ai/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ai/metrics/_calc_clf_report.py +78 -0
- scitex/ai/metrics/_calc_conf_mat.py +93 -0
- scitex/ai/metrics/_calc_feature_importance.py +183 -0
- scitex/ai/metrics/_calc_mcc.py +61 -0
- scitex/ai/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ai/metrics/_calc_roc_auc.py +110 -0
- scitex/ai/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ai/metrics/{silhoute_score_block.py → _calc_silhouette_score.py} +15 -8
- scitex/ai/metrics/_normalize_labels.py +83 -0
- scitex/ai/plt/__init__.py +47 -8
- scitex/ai/plt/{_conf_mat.py → _plot_conf_mat.py} +158 -87
- scitex/ai/plt/_plot_feature_importance.py +323 -0
- scitex/ai/plt/_plot_learning_curve.py +345 -0
- scitex/ai/plt/_plot_optuna_study.py +225 -0
- scitex/ai/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ai/plt/_plot_roc_curve.py +255 -0
- scitex/ai/training/{learning_curve_logger.py → _LearningCurveLogger.py} +197 -213
- scitex/ai/training/__init__.py +2 -2
- scitex/ai/utils/grid_search.py +3 -3
- scitex/benchmark/__init__.py +52 -0
- scitex/benchmark/benchmark.py +400 -0
- scitex/benchmark/monitor.py +370 -0
- scitex/benchmark/profiler.py +297 -0
- scitex/browser/__init__.py +48 -0
- scitex/browser/automation/CookieHandler.py +216 -0
- scitex/browser/automation/__init__.py +7 -0
- scitex/browser/collaboration/__init__.py +55 -0
- scitex/browser/collaboration/auth_helpers.py +94 -0
- scitex/browser/collaboration/collaborative_agent.py +136 -0
- scitex/browser/collaboration/credential_manager.py +188 -0
- scitex/browser/collaboration/interactive_panel.py +400 -0
- scitex/browser/collaboration/persistent_browser.py +170 -0
- scitex/browser/collaboration/shared_session.py +383 -0
- scitex/browser/collaboration/standard_interactions.py +246 -0
- scitex/browser/collaboration/visual_feedback.py +181 -0
- scitex/browser/core/BrowserMixin.py +326 -0
- scitex/browser/core/ChromeProfileManager.py +446 -0
- scitex/browser/core/__init__.py +9 -0
- scitex/browser/debugging/__init__.py +18 -0
- scitex/browser/debugging/_browser_logger.py +657 -0
- scitex/browser/debugging/_highlight_element.py +143 -0
- scitex/browser/debugging/_show_grid.py +154 -0
- scitex/browser/interaction/__init__.py +24 -0
- scitex/browser/interaction/click_center.py +149 -0
- scitex/browser/interaction/click_with_fallbacks.py +206 -0
- scitex/browser/interaction/close_popups.py +498 -0
- scitex/browser/interaction/fill_with_fallbacks.py +209 -0
- scitex/browser/pdf/__init__.py +14 -0
- scitex/browser/pdf/click_download_for_chrome_pdf_viewer.py +200 -0
- scitex/browser/pdf/detect_chrome_pdf_viewer.py +198 -0
- scitex/browser/remote/CaptchaHandler.py +434 -0
- scitex/browser/remote/ZenRowsAPIClient.py +347 -0
- scitex/browser/remote/ZenRowsBrowserManager.py +570 -0
- scitex/browser/remote/__init__.py +11 -0
- scitex/browser/stealth/HumanBehavior.py +344 -0
- scitex/browser/stealth/StealthManager.py +1008 -0
- scitex/browser/stealth/__init__.py +9 -0
- scitex/browser/template.py +122 -0
- scitex/capture/__init__.py +110 -0
- scitex/capture/__main__.py +25 -0
- scitex/capture/capture.py +848 -0
- scitex/capture/cli.py +233 -0
- scitex/capture/gif.py +344 -0
- scitex/capture/mcp_server.py +961 -0
- scitex/capture/session.py +70 -0
- scitex/capture/utils.py +705 -0
- scitex/cli/__init__.py +17 -0
- scitex/cli/cloud.py +447 -0
- scitex/cli/main.py +42 -0
- scitex/cli/scholar.py +280 -0
- scitex/context/_suppress_output.py +5 -3
- scitex/db/__init__.py +30 -3
- scitex/db/__main__.py +75 -0
- scitex/db/_check_health.py +381 -0
- scitex/db/_delete_duplicates.py +25 -386
- scitex/db/_inspect.py +335 -114
- scitex/db/_inspect_optimized.py +301 -0
- scitex/db/{_PostgreSQL.py → _postgresql/_PostgreSQL.py} +3 -3
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BackupMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BatchMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_BlobMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ConnectionMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_MaintenanceMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_QueryMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_SchemaMixin.py +1 -1
- scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TransactionMixin.py +1 -1
- scitex/db/_postgresql/__init__.py +6 -0
- scitex/db/_sqlite3/_SQLite3.py +210 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin.py +581 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ArrayMixin_v01-need-_hash-col.py +517 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_BatchMixin.py +1 -1
- scitex/db/_sqlite3/_SQLite3Mixins/_BlobMixin.py +281 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin.py +548 -0
- scitex/db/_sqlite3/_SQLite3Mixins/_ColumnMixin_v01-indentation-issues.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ConnectionMixin.py +29 -13
- scitex/db/_sqlite3/_SQLite3Mixins/_GitMixin.py +583 -0
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_ImportExportMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_IndexMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_MaintenanceMixin.py +2 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_QueryMixin.py +37 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_RowMixin.py +46 -6
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TableMixin.py +56 -10
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/_TransactionMixin.py +1 -1
- scitex/db/{_SQLite3Mixins → _sqlite3/_SQLite3Mixins}/__init__.py +14 -2
- scitex/db/_sqlite3/__init__.py +7 -0
- scitex/db/_sqlite3/_delete_duplicates.py +274 -0
- scitex/decorators/__init__.py +2 -0
- scitex/decorators/_cache_disk.py +13 -5
- scitex/decorators/_cache_disk_async.py +49 -0
- scitex/decorators/_deprecated.py +175 -10
- scitex/decorators/_timeout.py +1 -1
- scitex/dev/_analyze_code_flow.py +2 -2
- scitex/dict/_DotDict.py +73 -15
- scitex/dict/_DotDict_v01-not-handling-recursive-instantiations.py +442 -0
- scitex/dict/_DotDict_v02-not-serializing-Path-object.py +446 -0
- scitex/dict/__init__.py +2 -0
- scitex/dict/_flatten.py +27 -0
- scitex/dsp/_crop.py +2 -2
- scitex/dsp/_demo_sig.py +2 -2
- scitex/dsp/_detect_ripples.py +2 -2
- scitex/dsp/_hilbert.py +2 -2
- scitex/dsp/_listen.py +6 -6
- scitex/dsp/_modulation_index.py +2 -2
- scitex/dsp/_pac.py +1 -1
- scitex/dsp/_psd.py +2 -2
- scitex/dsp/_resample.py +2 -1
- scitex/dsp/_time.py +3 -2
- scitex/dsp/_wavelet.py +3 -2
- scitex/dsp/add_noise.py +2 -2
- scitex/dsp/example.py +1 -0
- scitex/dsp/filt.py +10 -9
- scitex/dsp/template.py +3 -2
- scitex/dsp/utils/_differential_bandpass_filters.py +1 -1
- scitex/dsp/utils/pac.py +2 -2
- scitex/dt/_normalize_timestamp.py +432 -0
- scitex/errors.py +572 -0
- scitex/gen/_DimHandler.py +2 -2
- scitex/gen/__init__.py +37 -7
- scitex/gen/_deprecated_close.py +80 -0
- scitex/gen/_deprecated_start.py +26 -0
- scitex/gen/_detect_environment.py +152 -0
- scitex/gen/_detect_notebook_path.py +169 -0
- scitex/gen/_embed.py +6 -2
- scitex/gen/_get_notebook_path.py +257 -0
- scitex/gen/_less.py +1 -1
- scitex/gen/_list_packages.py +2 -2
- scitex/gen/_norm.py +44 -9
- scitex/gen/_norm_cache.py +269 -0
- scitex/gen/_src.py +3 -5
- scitex/gen/_title_case.py +3 -3
- scitex/io/__init__.py +28 -6
- scitex/io/_glob.py +13 -7
- scitex/io/_load.py +108 -21
- scitex/io/_load_cache.py +303 -0
- scitex/io/_load_configs.py +40 -15
- scitex/io/{_H5Explorer.py → _load_modules/_H5Explorer.py} +80 -17
- scitex/io/_load_modules/_ZarrExplorer.py +114 -0
- scitex/io/_load_modules/_bibtex.py +207 -0
- scitex/io/_load_modules/_hdf5.py +53 -178
- scitex/io/_load_modules/_json.py +5 -3
- scitex/io/_load_modules/_pdf.py +871 -16
- scitex/io/_load_modules/_sqlite3.py +15 -0
- scitex/io/_load_modules/_txt.py +41 -12
- scitex/io/_load_modules/_yaml.py +4 -3
- scitex/io/_load_modules/_zarr.py +126 -0
- scitex/io/_save.py +429 -171
- scitex/io/_save_modules/__init__.py +6 -0
- scitex/io/_save_modules/_bibtex.py +194 -0
- scitex/io/_save_modules/_csv.py +8 -4
- scitex/io/_save_modules/_excel.py +174 -15
- scitex/io/_save_modules/_hdf5.py +251 -226
- scitex/io/_save_modules/_image.py +1 -3
- scitex/io/_save_modules/_json.py +49 -4
- scitex/io/_save_modules/_listed_dfs_as_csv.py +1 -3
- scitex/io/_save_modules/_listed_scalars_as_csv.py +1 -3
- scitex/io/_save_modules/_tex.py +277 -0
- scitex/io/_save_modules/_yaml.py +42 -3
- scitex/io/_save_modules/_zarr.py +160 -0
- scitex/io/utils/__init__.py +20 -0
- scitex/io/utils/h5_to_zarr.py +616 -0
- scitex/linalg/_geometric_median.py +6 -2
- scitex/{gen/_tee.py → logging/_Tee.py} +43 -84
- scitex/logging/__init__.py +122 -0
- scitex/logging/_config.py +158 -0
- scitex/logging/_context.py +103 -0
- scitex/logging/_formatters.py +128 -0
- scitex/logging/_handlers.py +64 -0
- scitex/logging/_levels.py +35 -0
- scitex/logging/_logger.py +163 -0
- scitex/logging/_print_capture.py +95 -0
- scitex/ml/__init__.py +69 -0
- scitex/{ai/genai/anthropic.py → ml/_gen_ai/_Anthropic.py} +13 -19
- scitex/{ai/genai/base_genai.py → ml/_gen_ai/_BaseGenAI.py} +5 -5
- scitex/{ai/genai/deepseek.py → ml/_gen_ai/_DeepSeek.py} +11 -16
- scitex/{ai/genai/google.py → ml/_gen_ai/_Google.py} +7 -15
- scitex/{ai/genai/groq.py → ml/_gen_ai/_Groq.py} +1 -8
- scitex/{ai/genai/llama.py → ml/_gen_ai/_Llama.py} +3 -16
- scitex/{ai/genai/openai.py → ml/_gen_ai/_OpenAI.py} +3 -3
- scitex/{ai/genai/params.py → ml/_gen_ai/_PARAMS.py} +51 -65
- scitex/{ai/genai/perplexity.py → ml/_gen_ai/_Perplexity.py} +3 -14
- scitex/ml/_gen_ai/__init__.py +43 -0
- scitex/{ai/genai/calc_cost.py → ml/_gen_ai/_calc_cost.py} +1 -1
- scitex/{ai/genai/format_output_func.py → ml/_gen_ai/_format_output_func.py} +4 -4
- scitex/{ai/genai/genai_factory.py → ml/_gen_ai/_genai_factory.py} +8 -8
- scitex/ml/activation/__init__.py +8 -0
- scitex/ml/activation/_define.py +11 -0
- scitex/{ai/classifier_server.py → ml/classification/Classifier.py} +5 -5
- scitex/ml/classification/CrossValidationExperiment.py +374 -0
- scitex/ml/classification/__init__.py +46 -0
- scitex/ml/classification/reporters/_BaseClassificationReporter.py +281 -0
- scitex/ml/classification/reporters/_ClassificationReporter.py +773 -0
- scitex/ml/classification/reporters/_MultiClassificationReporter.py +406 -0
- scitex/ml/classification/reporters/_SingleClassificationReporter.py +1834 -0
- scitex/ml/classification/reporters/__init__.py +11 -0
- scitex/ml/classification/reporters/reporter_utils/_Plotter.py +1028 -0
- scitex/ml/classification/reporters/reporter_utils/__init__.py +80 -0
- scitex/ml/classification/reporters/reporter_utils/aggregation.py +457 -0
- scitex/ml/classification/reporters/reporter_utils/data_models.py +313 -0
- scitex/ml/classification/reporters/reporter_utils/reporting.py +1056 -0
- scitex/ml/classification/reporters/reporter_utils/storage.py +221 -0
- scitex/ml/classification/reporters/reporter_utils/validation.py +395 -0
- scitex/ml/classification/timeseries/_TimeSeriesBlockingSplit.py +568 -0
- scitex/ml/classification/timeseries/_TimeSeriesCalendarSplit.py +688 -0
- scitex/ml/classification/timeseries/_TimeSeriesMetadata.py +139 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit.py +1716 -0
- scitex/ml/classification/timeseries/_TimeSeriesSlidingWindowSplit_v01-not-using-n_splits.py +1685 -0
- scitex/ml/classification/timeseries/_TimeSeriesStrategy.py +84 -0
- scitex/ml/classification/timeseries/_TimeSeriesStratifiedSplit.py +610 -0
- scitex/ml/classification/timeseries/__init__.py +39 -0
- scitex/ml/classification/timeseries/_normalize_timestamp.py +436 -0
- scitex/ml/clustering/__init__.py +11 -0
- scitex/ml/clustering/_pca.py +115 -0
- scitex/ml/clustering/_umap.py +376 -0
- scitex/ml/feature_extraction/__init__.py +56 -0
- scitex/ml/feature_extraction/vit.py +149 -0
- scitex/ml/feature_selection/__init__.py +30 -0
- scitex/ml/feature_selection/feature_selection.py +364 -0
- scitex/ml/loss/_L1L2Losses.py +34 -0
- scitex/ml/loss/__init__.py +12 -0
- scitex/ml/loss/multi_task_loss.py +47 -0
- scitex/ml/metrics/__init__.py +56 -0
- scitex/ml/metrics/_calc_bacc.py +61 -0
- scitex/ml/metrics/_calc_bacc_from_conf_mat.py +38 -0
- scitex/ml/metrics/_calc_clf_report.py +78 -0
- scitex/ml/metrics/_calc_conf_mat.py +93 -0
- scitex/ml/metrics/_calc_feature_importance.py +183 -0
- scitex/ml/metrics/_calc_mcc.py +61 -0
- scitex/ml/metrics/_calc_pre_rec_auc.py +116 -0
- scitex/ml/metrics/_calc_roc_auc.py +110 -0
- scitex/ml/metrics/_calc_seizure_prediction_metrics.py +490 -0
- scitex/ml/metrics/_calc_silhouette_score.py +503 -0
- scitex/ml/metrics/_normalize_labels.py +83 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/__init__.py +0 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/__init__.py +3 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger.py +207 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger2020.py +238 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/ranger913A.py +215 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/ranger/rangerqh.py +184 -0
- scitex/ml/optim/Ranger_Deep_Learning_Optimizer/setup.py +24 -0
- scitex/ml/optim/__init__.py +13 -0
- scitex/ml/optim/_get_set.py +31 -0
- scitex/ml/optim/_optimizers.py +71 -0
- scitex/ml/plt/__init__.py +60 -0
- scitex/ml/plt/_plot_conf_mat.py +663 -0
- scitex/ml/plt/_plot_feature_importance.py +323 -0
- scitex/ml/plt/_plot_learning_curve.py +345 -0
- scitex/ml/plt/_plot_optuna_study.py +225 -0
- scitex/ml/plt/_plot_pre_rec_curve.py +290 -0
- scitex/ml/plt/_plot_roc_curve.py +255 -0
- scitex/ml/sk/__init__.py +11 -0
- scitex/ml/sk/_clf.py +58 -0
- scitex/ml/sk/_to_sktime.py +100 -0
- scitex/ml/sklearn/__init__.py +26 -0
- scitex/ml/sklearn/clf.py +58 -0
- scitex/ml/sklearn/to_sktime.py +100 -0
- scitex/{ai/training/early_stopping.py → ml/training/_EarlyStopping.py} +1 -2
- scitex/{ai → ml/training}/_LearningCurveLogger.py +198 -242
- scitex/ml/training/__init__.py +7 -0
- scitex/ml/utils/__init__.py +22 -0
- scitex/ml/utils/_check_params.py +50 -0
- scitex/ml/utils/_default_dataset.py +46 -0
- scitex/ml/utils/_format_samples_for_sktime.py +26 -0
- scitex/ml/utils/_label_encoder.py +134 -0
- scitex/ml/utils/_merge_labels.py +22 -0
- scitex/ml/utils/_sliding_window_data_augmentation.py +11 -0
- scitex/ml/utils/_under_sample.py +51 -0
- scitex/ml/utils/_verify_n_gpus.py +16 -0
- scitex/ml/utils/grid_search.py +148 -0
- scitex/nn/_BNet.py +15 -9
- scitex/nn/_Filters.py +2 -2
- scitex/nn/_ModulationIndex.py +2 -2
- scitex/nn/_PAC.py +1 -1
- scitex/nn/_Spectrogram.py +12 -3
- scitex/nn/__init__.py +9 -10
- scitex/path/__init__.py +18 -0
- scitex/path/_clean.py +4 -0
- scitex/path/_find.py +9 -4
- scitex/path/_symlink.py +348 -0
- scitex/path/_version.py +4 -3
- scitex/pd/__init__.py +2 -0
- scitex/pd/_get_unique.py +99 -0
- scitex/plt/__init__.py +114 -5
- scitex/plt/_subplots/_AxesWrapper.py +1 -3
- scitex/plt/_subplots/_AxisWrapper.py +7 -3
- scitex/plt/_subplots/_AxisWrapperMixins/_AdjustmentMixin.py +47 -13
- scitex/plt/_subplots/_AxisWrapperMixins/_MatplotlibPlotMixin.py +160 -2
- scitex/plt/_subplots/_AxisWrapperMixins/_SeabornMixin.py +26 -4
- scitex/plt/_subplots/_AxisWrapperMixins/_UnitAwareMixin.py +322 -0
- scitex/plt/_subplots/_AxisWrapperMixins/__init__.py +1 -0
- scitex/plt/_subplots/_FigWrapper.py +62 -6
- scitex/plt/_subplots/_export_as_csv.py +43 -27
- scitex/plt/_subplots/_export_as_csv_formatters/__init__.py +5 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_annotate.py +81 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_bar.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_barh.py +20 -5
- scitex/plt/_subplots/_export_as_csv_formatters/_format_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_contour.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_errorbar.py +35 -18
- scitex/plt/_subplots/_export_as_csv_formatters/_format_eventplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_fill_between.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_imshow2d.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot.py +15 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_box.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_conf_mat.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_ecdf.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_fillv.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_image.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_joyplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_kde.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_ci.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_mean_std.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_median_iqr.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_raster.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_rectangle.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter.py +35 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_scatter_hist.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_shaded_line.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_plot_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_scatter.py +6 -4
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_barplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_boxplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_heatmap.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_histplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_jointplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_kdeplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_lineplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_pairplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_scatterplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_stripplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_swarmplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_sns_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_text.py +60 -0
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violin.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/_format_violinplot.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters/test_formatters.py +1 -3
- scitex/plt/_subplots/_export_as_csv_formatters.py +56 -59
- scitex/plt/ax/_style/_hide_spines.py +1 -3
- scitex/plt/ax/_style/_rotate_labels.py +180 -76
- scitex/plt/ax/_style/_rotate_labels_v01.py +248 -0
- scitex/plt/ax/_style/_set_meta.py +11 -4
- scitex/plt/ax/_style/_set_supxyt.py +3 -3
- scitex/plt/ax/_style/_set_xyt.py +3 -3
- scitex/plt/ax/_style/_share_axes.py +2 -2
- scitex/plt/color/__init__.py +4 -4
- scitex/plt/color/{_get_colors_from_cmap.py → _get_colors_from_conf_matap.py} +7 -7
- scitex/plt/utils/_configure_mpl.py +99 -86
- scitex/plt/utils/_histogram_utils.py +1 -3
- scitex/plt/utils/_is_valid_axis.py +1 -3
- scitex/plt/utils/_scitex_config.py +1 -0
- scitex/repro/__init__.py +75 -0
- scitex/{reproduce → repro}/_gen_ID.py +1 -1
- scitex/{reproduce → repro}/_gen_timestamp.py +1 -1
- scitex/repro_rng/_RandomStateManager.py +590 -0
- scitex/repro_rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/repro_rng/__init__.py +39 -0
- scitex/reproduce/__init__.py +25 -13
- scitex/reproduce/_hash_array.py +22 -0
- scitex/resource/_get_processor_usages.py +4 -4
- scitex/resource/_get_specs.py +2 -2
- scitex/resource/_log_processor_usages.py +2 -2
- scitex/rng/_RandomStateManager.py +590 -0
- scitex/rng/_RandomStateManager_v01-no-verbose-options.py +414 -0
- scitex/rng/__init__.py +39 -0
- scitex/scholar/__init__.py +309 -19
- scitex/scholar/__main__.py +319 -0
- scitex/scholar/auth/ScholarAuthManager.py +308 -0
- scitex/scholar/auth/__init__.py +12 -0
- scitex/scholar/auth/core/AuthenticationGateway.py +473 -0
- scitex/scholar/auth/core/BrowserAuthenticator.py +386 -0
- scitex/scholar/auth/core/StrategyResolver.py +309 -0
- scitex/scholar/auth/core/__init__.py +16 -0
- scitex/scholar/auth/gateway/_OpenURLLinkFinder.py +120 -0
- scitex/scholar/auth/gateway/_OpenURLResolver.py +209 -0
- scitex/scholar/auth/gateway/__init__.py +38 -0
- scitex/scholar/auth/gateway/_resolve_functions.py +101 -0
- scitex/scholar/auth/providers/BaseAuthenticator.py +166 -0
- scitex/scholar/auth/providers/EZProxyAuthenticator.py +484 -0
- scitex/scholar/auth/providers/OpenAthensAuthenticator.py +619 -0
- scitex/scholar/auth/providers/ShibbolethAuthenticator.py +686 -0
- scitex/scholar/auth/providers/__init__.py +18 -0
- scitex/scholar/auth/session/AuthCacheManager.py +189 -0
- scitex/scholar/auth/session/SessionManager.py +159 -0
- scitex/scholar/auth/session/__init__.py +11 -0
- scitex/scholar/auth/sso/BaseSSOAutomator.py +373 -0
- scitex/scholar/auth/sso/OpenAthensSSOAutomator.py +378 -0
- scitex/scholar/auth/sso/SSOAutomator.py +180 -0
- scitex/scholar/auth/sso/UniversityOfMelbourneSSOAutomator.py +380 -0
- scitex/scholar/auth/sso/__init__.py +15 -0
- scitex/scholar/browser/ScholarBrowserManager.py +705 -0
- scitex/scholar/browser/__init__.py +38 -0
- scitex/scholar/browser/utils/__init__.py +13 -0
- scitex/scholar/browser/utils/click_and_wait.py +205 -0
- scitex/scholar/browser/utils/close_unwanted_pages.py +140 -0
- scitex/scholar/browser/utils/wait_redirects.py +732 -0
- scitex/scholar/config/PublisherRules.py +132 -0
- scitex/scholar/config/ScholarConfig.py +126 -0
- scitex/scholar/config/__init__.py +17 -0
- scitex/scholar/core/Paper.py +627 -0
- scitex/scholar/core/Papers.py +722 -0
- scitex/scholar/core/Scholar.py +1975 -0
- scitex/scholar/core/__init__.py +9 -0
- scitex/scholar/impact_factor/ImpactFactorEngine.py +204 -0
- scitex/scholar/impact_factor/__init__.py +20 -0
- scitex/scholar/impact_factor/estimation/ImpactFactorEstimationEngine.py +0 -0
- scitex/scholar/impact_factor/estimation/__init__.py +40 -0
- scitex/scholar/impact_factor/estimation/build_database.py +0 -0
- scitex/scholar/impact_factor/estimation/core/__init__.py +28 -0
- scitex/scholar/impact_factor/estimation/core/cache_manager.py +523 -0
- scitex/scholar/impact_factor/estimation/core/calculator.py +355 -0
- scitex/scholar/impact_factor/estimation/core/journal_matcher.py +428 -0
- scitex/scholar/integration/__init__.py +59 -0
- scitex/scholar/integration/base.py +502 -0
- scitex/scholar/integration/mendeley/__init__.py +22 -0
- scitex/scholar/integration/mendeley/exporter.py +166 -0
- scitex/scholar/integration/mendeley/importer.py +236 -0
- scitex/scholar/integration/mendeley/linker.py +79 -0
- scitex/scholar/integration/mendeley/mapper.py +212 -0
- scitex/scholar/integration/zotero/__init__.py +27 -0
- scitex/scholar/integration/zotero/__main__.py +264 -0
- scitex/scholar/integration/zotero/exporter.py +351 -0
- scitex/scholar/integration/zotero/importer.py +372 -0
- scitex/scholar/integration/zotero/linker.py +415 -0
- scitex/scholar/integration/zotero/mapper.py +286 -0
- scitex/scholar/metadata_engines/ScholarEngine.py +588 -0
- scitex/scholar/metadata_engines/__init__.py +21 -0
- scitex/scholar/metadata_engines/individual/ArXivEngine.py +397 -0
- scitex/scholar/metadata_engines/individual/CrossRefEngine.py +274 -0
- scitex/scholar/metadata_engines/individual/CrossRefLocalEngine.py +263 -0
- scitex/scholar/metadata_engines/individual/OpenAlexEngine.py +350 -0
- scitex/scholar/metadata_engines/individual/PubMedEngine.py +329 -0
- scitex/scholar/metadata_engines/individual/SemanticScholarEngine.py +438 -0
- scitex/scholar/metadata_engines/individual/URLDOIEngine.py +410 -0
- scitex/scholar/metadata_engines/individual/_BaseDOIEngine.py +487 -0
- scitex/scholar/metadata_engines/individual/__init__.py +7 -0
- scitex/scholar/metadata_engines/utils/_PubMedConverter.py +469 -0
- scitex/scholar/metadata_engines/utils/_URLDOIExtractor.py +283 -0
- scitex/scholar/metadata_engines/utils/__init__.py +30 -0
- scitex/scholar/metadata_engines/utils/_metadata2bibtex.py +103 -0
- scitex/scholar/metadata_engines/utils/_standardize_metadata.py +376 -0
- scitex/scholar/pdf_download/ScholarPDFDownloader.py +579 -0
- scitex/scholar/pdf_download/__init__.py +5 -0
- scitex/scholar/pdf_download/strategies/__init__.py +38 -0
- scitex/scholar/pdf_download/strategies/chrome_pdf_viewer.py +376 -0
- scitex/scholar/pdf_download/strategies/direct_download.py +131 -0
- scitex/scholar/pdf_download/strategies/manual_download_fallback.py +167 -0
- scitex/scholar/pdf_download/strategies/manual_download_utils.py +996 -0
- scitex/scholar/pdf_download/strategies/response_body.py +207 -0
- scitex/scholar/pipelines/ScholarPipelineBibTeX.py +364 -0
- scitex/scholar/pipelines/ScholarPipelineParallel.py +478 -0
- scitex/scholar/pipelines/ScholarPipelineSingle.py +767 -0
- scitex/scholar/pipelines/__init__.py +49 -0
- scitex/scholar/storage/BibTeXHandler.py +1018 -0
- scitex/scholar/storage/PaperIO.py +468 -0
- scitex/scholar/storage/ScholarLibrary.py +182 -0
- scitex/scholar/storage/_DeduplicationManager.py +548 -0
- scitex/scholar/storage/_LibraryCacheManager.py +724 -0
- scitex/scholar/storage/_LibraryManager.py +1835 -0
- scitex/scholar/storage/__init__.py +28 -0
- scitex/scholar/url_finder/ScholarURLFinder.py +379 -0
- scitex/scholar/url_finder/__init__.py +7 -0
- scitex/scholar/url_finder/strategies/__init__.py +33 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_direct_links.py +261 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_dropdown.py +67 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_href.py +204 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_navigation.py +256 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_publisher_patterns.py +165 -0
- scitex/scholar/url_finder/strategies/find_pdf_urls_by_zotero_translators.py +163 -0
- scitex/scholar/url_finder/strategies/find_supplementary_urls_by_href.py +70 -0
- scitex/scholar/utils/__init__.py +22 -0
- scitex/scholar/utils/bibtex/__init__.py +9 -0
- scitex/scholar/utils/bibtex/_parse_bibtex.py +71 -0
- scitex/scholar/utils/cleanup/__init__.py +8 -0
- scitex/scholar/utils/cleanup/_cleanup_scholar_processes.py +96 -0
- scitex/scholar/utils/cleanup/cleanup_old_extractions.py +117 -0
- scitex/scholar/utils/text/_TextNormalizer.py +407 -0
- scitex/scholar/utils/text/__init__.py +9 -0
- scitex/scholar/zotero/__init__.py +38 -0
- scitex/session/__init__.py +51 -0
- scitex/session/_lifecycle.py +736 -0
- scitex/session/_manager.py +102 -0
- scitex/session/template.py +122 -0
- scitex/stats/__init__.py +30 -26
- scitex/stats/correct/__init__.py +21 -0
- scitex/stats/correct/_correct_bonferroni.py +551 -0
- scitex/stats/correct/_correct_fdr.py +634 -0
- scitex/stats/correct/_correct_holm.py +548 -0
- scitex/stats/correct/_correct_sidak.py +499 -0
- scitex/stats/descriptive/__init__.py +85 -0
- scitex/stats/descriptive/_circular.py +540 -0
- scitex/stats/descriptive/_describe.py +219 -0
- scitex/stats/descriptive/_nan.py +518 -0
- scitex/stats/descriptive/_real.py +189 -0
- scitex/stats/effect_sizes/__init__.py +41 -0
- scitex/stats/effect_sizes/_cliffs_delta.py +325 -0
- scitex/stats/effect_sizes/_cohens_d.py +342 -0
- scitex/stats/effect_sizes/_epsilon_squared.py +315 -0
- scitex/stats/effect_sizes/_eta_squared.py +302 -0
- scitex/stats/effect_sizes/_prob_superiority.py +296 -0
- scitex/stats/posthoc/__init__.py +19 -0
- scitex/stats/posthoc/_dunnett.py +463 -0
- scitex/stats/posthoc/_games_howell.py +383 -0
- scitex/stats/posthoc/_tukey_hsd.py +367 -0
- scitex/stats/power/__init__.py +19 -0
- scitex/stats/power/_power.py +433 -0
- scitex/stats/template.py +119 -0
- scitex/stats/utils/__init__.py +62 -0
- scitex/stats/utils/_effect_size.py +985 -0
- scitex/stats/utils/_formatters.py +270 -0
- scitex/stats/utils/_normalizers.py +927 -0
- scitex/stats/utils/_power.py +433 -0
- scitex/stats_v01/_EffectSizeCalculator.py +488 -0
- scitex/stats_v01/_StatisticalValidator.py +411 -0
- scitex/stats_v01/__init__.py +60 -0
- scitex/stats_v01/_additional_tests.py +415 -0
- scitex/{stats → stats_v01}/_p2stars.py +19 -5
- scitex/stats_v01/_two_sample_tests.py +141 -0
- scitex/stats_v01/desc/__init__.py +83 -0
- scitex/stats_v01/desc/_circular.py +540 -0
- scitex/stats_v01/desc/_describe.py +219 -0
- scitex/stats_v01/desc/_nan.py +518 -0
- scitex/{stats/desc/_nan.py → stats_v01/desc/_nan_v01-20250920_145731.py} +23 -12
- scitex/stats_v01/desc/_real.py +189 -0
- scitex/stats_v01/tests/__corr_test_optimized.py +221 -0
- scitex/stats_v01/tests/_corr_test_optimized.py +179 -0
- scitex/str/__init__.py +1 -3
- scitex/str/_clean_path.py +6 -2
- scitex/str/_latex_fallback.py +267 -160
- scitex/str/_parse.py +44 -36
- scitex/str/_printc.py +1 -3
- scitex/template/__init__.py +87 -0
- scitex/template/_create_project.py +267 -0
- scitex/template/create_pip_project.py +80 -0
- scitex/template/create_research.py +80 -0
- scitex/template/create_singularity.py +80 -0
- scitex/units.py +291 -0
- scitex/utils/_compress_hdf5.py +14 -3
- scitex/utils/_email.py +21 -2
- scitex/utils/_grid.py +6 -4
- scitex/utils/_notify.py +13 -10
- scitex/utils/_verify_scitex_format.py +589 -0
- scitex/utils/_verify_scitex_format_v01.py +370 -0
- scitex/utils/template.py +122 -0
- scitex/web/_search_pubmed.py +62 -16
- scitex-2.1.0.dist-info/LICENSE +21 -0
- scitex-2.1.0.dist-info/METADATA +677 -0
- scitex-2.1.0.dist-info/RECORD +919 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/WHEEL +1 -1
- scitex-2.1.0.dist-info/entry_points.txt +3 -0
- scitex/ai/__Classifiers.py +0 -101
- scitex/ai/classification/classification_reporter.py +0 -1137
- scitex/ai/classification/classifiers.py +0 -101
- scitex/ai/classification_reporter.py +0 -1161
- scitex/ai/genai/__init__.py +0 -277
- scitex/ai/genai/anthropic_provider.py +0 -320
- scitex/ai/genai/anthropic_refactored.py +0 -109
- scitex/ai/genai/auth_manager.py +0 -200
- scitex/ai/genai/base_provider.py +0 -291
- scitex/ai/genai/chat_history.py +0 -307
- scitex/ai/genai/cost_tracker.py +0 -276
- scitex/ai/genai/deepseek_provider.py +0 -251
- scitex/ai/genai/google_provider.py +0 -228
- scitex/ai/genai/groq_provider.py +0 -248
- scitex/ai/genai/image_processor.py +0 -250
- scitex/ai/genai/llama_provider.py +0 -214
- scitex/ai/genai/mock_provider.py +0 -127
- scitex/ai/genai/model_registry.py +0 -304
- scitex/ai/genai/openai_provider.py +0 -293
- scitex/ai/genai/perplexity_provider.py +0 -205
- scitex/ai/genai/provider_base.py +0 -302
- scitex/ai/genai/provider_factory.py +0 -370
- scitex/ai/genai/response_handler.py +0 -235
- scitex/ai/layer/_Pass.py +0 -21
- scitex/ai/layer/__init__.py +0 -10
- scitex/ai/layer/_switch.py +0 -8
- scitex/ai/metrics/_bACC.py +0 -51
- scitex/ai/plt/_learning_curve.py +0 -194
- scitex/ai/plt/_optuna_study.py +0 -111
- scitex/ai/plt/aucs/__init__.py +0 -2
- scitex/ai/plt/aucs/example.py +0 -60
- scitex/ai/plt/aucs/pre_rec_auc.py +0 -223
- scitex/ai/plt/aucs/roc_auc.py +0 -246
- scitex/ai/sampling/undersample.py +0 -29
- scitex/db/_SQLite3.py +0 -2136
- scitex/db/_SQLite3Mixins/_BlobMixin.py +0 -229
- scitex/gen/_close.py +0 -222
- scitex/gen/_start.py +0 -451
- scitex/general/__init__.py +0 -5
- scitex/io/_load_modules/_db.py +0 -24
- scitex/life/__init__.py +0 -10
- scitex/life/_monitor_rain.py +0 -49
- scitex/reproduce/_fix_seeds.py +0 -45
- scitex/res/__init__.py +0 -5
- scitex/scholar/_local_search.py +0 -454
- scitex/scholar/_paper.py +0 -244
- scitex/scholar/_pdf_downloader.py +0 -325
- scitex/scholar/_search.py +0 -393
- scitex/scholar/_vector_search.py +0 -370
- scitex/scholar/_web_sources.py +0 -457
- scitex/stats/desc/__init__.py +0 -40
- scitex-2.0.0.dist-info/METADATA +0 -307
- scitex-2.0.0.dist-info/RECORD +0 -572
- scitex-2.0.0.dist-info/licenses/LICENSE +0 -7
- /scitex/ai/{act → activation}/__init__.py +0 -0
- /scitex/ai/{act → activation}/_define.py +0 -0
- /scitex/ai/{early_stopping.py → training/_EarlyStopping.py} +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_ImportExportMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_IndexMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_RowMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/_TableMixin.py +0 -0
- /scitex/db/{_PostgreSQLMixins → _postgresql/_PostgreSQLMixins}/__init__.py +0 -0
- /scitex/{stats → stats_v01}/_calc_partial_corr.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/_corr_test_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_describe_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_multiple_corrections.py +0 -0
- /scitex/{stats → stats_v01}/_nan_stats.py +0 -0
- /scitex/{stats → stats_v01}/_p2stars_wrapper.py +0 -0
- /scitex/{stats → stats_v01}/_statistical_tests.py +0 -0
- /scitex/{stats/desc/_describe.py → stats_v01/desc/_describe_v01-20250920_145731.py} +0 -0
- /scitex/{stats/desc/_real.py → stats_v01/desc/_real_v01-20250920_145731.py} +0 -0
- /scitex/{stats → stats_v01}/multiple/__init__.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_bonferroni_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_fdr_correction.py +0 -0
- /scitex/{stats → stats_v01}/multiple/_multicompair.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_multi.py +0 -0
- /scitex/{stats → stats_v01}/tests/__corr_test_single.py +0 -0
- /scitex/{stats → stats_v01}/tests/__init__.py +0 -0
- /scitex/{stats → stats_v01}/tests/_brunner_munzel_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_nocorrelation_test.py +0 -0
- /scitex/{stats → stats_v01}/tests/_smirnov_grubbs.py +0 -0
- {scitex-2.0.0.dist-info → scitex-2.1.0.dist-info}/top_level.txt +0 -0
scitex/ai/genai/groq_provider.py
DELETED
|
@@ -1,248 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
# Time-stamp: "2024-11-25 12:00:00"
|
|
4
|
-
# Author: Yusuke Watanabe (ywatanabe@alumni.u-tokyo.ac.jp)
|
|
5
|
-
# scitex/src/scitex/ai/genai/groq_provider.py
|
|
6
|
-
|
|
7
|
-
"""
|
|
8
|
-
Groq provider implementation for GenAI.
|
|
9
|
-
|
|
10
|
-
Provides access to Groq's API with models like Llama, Mixtral, etc.
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
from typing import List, Dict, Any, Optional, Generator
|
|
14
|
-
import logging
|
|
15
|
-
import os
|
|
16
|
-
|
|
17
|
-
from .base_provider import BaseProvider, CompletionResponse, Provider
|
|
18
|
-
from .provider_factory import register_provider
|
|
19
|
-
|
|
20
|
-
logger = logging.getLogger(__name__)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class GroqProvider(BaseProvider):
|
|
24
|
-
"""
|
|
25
|
-
Groq provider implementation.
|
|
26
|
-
|
|
27
|
-
Supports Llama 3, Mixtral, and other models available through Groq.
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
SUPPORTED_MODELS = [
|
|
31
|
-
"llama3-8b-8192",
|
|
32
|
-
"llama3-70b-8192",
|
|
33
|
-
"llama2-70b-4096",
|
|
34
|
-
"mixtral-8x7b-32768",
|
|
35
|
-
"gemma-7b-it",
|
|
36
|
-
]
|
|
37
|
-
|
|
38
|
-
DEFAULT_MODEL = "llama3-8b-8192"
|
|
39
|
-
|
|
40
|
-
def __init__(self, config):
|
|
41
|
-
"""Initialize Groq provider."""
|
|
42
|
-
self.config = config
|
|
43
|
-
self.api_key = config.api_key or os.getenv("GROQ_API_KEY")
|
|
44
|
-
self.model = config.model or self.DEFAULT_MODEL
|
|
45
|
-
self.kwargs = config.kwargs or {}
|
|
46
|
-
|
|
47
|
-
if not self.api_key:
|
|
48
|
-
raise ValueError("GROQ_API_KEY not provided and not found in environment")
|
|
49
|
-
|
|
50
|
-
# Import Groq client
|
|
51
|
-
try:
|
|
52
|
-
from groq import Groq as GroqClient
|
|
53
|
-
|
|
54
|
-
self.client = GroqClient(api_key=self.api_key)
|
|
55
|
-
except ImportError:
|
|
56
|
-
raise ImportError(
|
|
57
|
-
"Groq package not installed. Install with: pip install groq"
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
def complete(self, messages: List[Dict[str, Any]], **kwargs) -> CompletionResponse:
|
|
61
|
-
"""
|
|
62
|
-
Generate completion using Groq API.
|
|
63
|
-
|
|
64
|
-
Args:
|
|
65
|
-
messages: List of message dictionaries
|
|
66
|
-
**kwargs: Additional parameters (temperature, max_tokens, etc.)
|
|
67
|
-
|
|
68
|
-
Returns:
|
|
69
|
-
CompletionResponse with generated text and usage info
|
|
70
|
-
"""
|
|
71
|
-
# Validate messages
|
|
72
|
-
if not self.validate_messages(messages):
|
|
73
|
-
raise ValueError("Invalid message format")
|
|
74
|
-
|
|
75
|
-
# Format messages for Groq (same as OpenAI format)
|
|
76
|
-
formatted_messages = self.format_messages(messages)
|
|
77
|
-
|
|
78
|
-
# Prepare API parameters
|
|
79
|
-
api_params = {
|
|
80
|
-
"model": self.model,
|
|
81
|
-
"messages": formatted_messages,
|
|
82
|
-
"stream": False,
|
|
83
|
-
}
|
|
84
|
-
|
|
85
|
-
# Add optional parameters
|
|
86
|
-
for param in ["temperature", "max_tokens", "top_p", "stop", "seed"]:
|
|
87
|
-
if param in kwargs:
|
|
88
|
-
api_params[param] = kwargs[param]
|
|
89
|
-
|
|
90
|
-
# Groq has a max token limit of 8000
|
|
91
|
-
if "max_tokens" in api_params:
|
|
92
|
-
api_params["max_tokens"] = min(api_params["max_tokens"], 8000)
|
|
93
|
-
|
|
94
|
-
try:
|
|
95
|
-
# Make API call
|
|
96
|
-
response = self.client.chat.completions.create(**api_params)
|
|
97
|
-
|
|
98
|
-
# Extract content and usage
|
|
99
|
-
content = response.choices[0].message.content
|
|
100
|
-
usage = response.usage
|
|
101
|
-
|
|
102
|
-
return CompletionResponse(
|
|
103
|
-
content=content,
|
|
104
|
-
input_tokens=usage.prompt_tokens,
|
|
105
|
-
output_tokens=usage.completion_tokens,
|
|
106
|
-
finish_reason=response.choices[0].finish_reason,
|
|
107
|
-
provider_response=response,
|
|
108
|
-
)
|
|
109
|
-
|
|
110
|
-
except Exception as e:
|
|
111
|
-
logger.error(f"Groq API error: {str(e)}")
|
|
112
|
-
raise
|
|
113
|
-
|
|
114
|
-
def format_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
115
|
-
"""
|
|
116
|
-
Format messages for Groq API.
|
|
117
|
-
|
|
118
|
-
Groq uses the same message format as OpenAI.
|
|
119
|
-
"""
|
|
120
|
-
formatted_messages = []
|
|
121
|
-
|
|
122
|
-
for msg in messages:
|
|
123
|
-
formatted_msg = {"role": msg["role"], "content": msg["content"]}
|
|
124
|
-
formatted_messages.append(formatted_msg)
|
|
125
|
-
|
|
126
|
-
return formatted_messages
|
|
127
|
-
|
|
128
|
-
def validate_messages(self, messages: List[Dict[str, Any]]) -> bool:
|
|
129
|
-
"""Validate message format."""
|
|
130
|
-
if not messages:
|
|
131
|
-
return False
|
|
132
|
-
|
|
133
|
-
for msg in messages:
|
|
134
|
-
if not isinstance(msg, dict):
|
|
135
|
-
return False
|
|
136
|
-
if "role" not in msg or "content" not in msg:
|
|
137
|
-
return False
|
|
138
|
-
if msg["role"] not in ["system", "user", "assistant"]:
|
|
139
|
-
return False
|
|
140
|
-
|
|
141
|
-
return True
|
|
142
|
-
|
|
143
|
-
def stream(
|
|
144
|
-
self, messages: List[Dict[str, Any]], **kwargs
|
|
145
|
-
) -> Generator[str, None, CompletionResponse]:
|
|
146
|
-
"""Generate a streaming completion.
|
|
147
|
-
|
|
148
|
-
Args:
|
|
149
|
-
messages: List of messages in standard format
|
|
150
|
-
**kwargs: Additional parameters
|
|
151
|
-
|
|
152
|
-
Yields:
|
|
153
|
-
Text chunks during streaming
|
|
154
|
-
|
|
155
|
-
Returns:
|
|
156
|
-
Final CompletionResponse when complete
|
|
157
|
-
"""
|
|
158
|
-
# Validate messages
|
|
159
|
-
if not self.validate_messages(messages):
|
|
160
|
-
raise ValueError("Invalid message format")
|
|
161
|
-
|
|
162
|
-
# Format messages for Groq
|
|
163
|
-
formatted_messages = self.format_messages(messages)
|
|
164
|
-
|
|
165
|
-
# Prepare API parameters
|
|
166
|
-
api_params = {
|
|
167
|
-
"model": self.model,
|
|
168
|
-
"messages": formatted_messages,
|
|
169
|
-
"stream": True,
|
|
170
|
-
}
|
|
171
|
-
|
|
172
|
-
# Add optional parameters
|
|
173
|
-
for param in ["temperature", "max_tokens", "top_p", "stop", "seed"]:
|
|
174
|
-
if param in kwargs:
|
|
175
|
-
api_params[param] = kwargs[param]
|
|
176
|
-
|
|
177
|
-
# Groq has a max token limit of 8000
|
|
178
|
-
if "max_tokens" in api_params:
|
|
179
|
-
api_params["max_tokens"] = min(api_params["max_tokens"], 8000)
|
|
180
|
-
|
|
181
|
-
try:
|
|
182
|
-
# Make streaming API call
|
|
183
|
-
stream = self.client.chat.completions.create(**api_params)
|
|
184
|
-
|
|
185
|
-
# Track content
|
|
186
|
-
full_content = ""
|
|
187
|
-
|
|
188
|
-
for chunk in stream:
|
|
189
|
-
if chunk.choices[0].delta.content:
|
|
190
|
-
content = chunk.choices[0].delta.content
|
|
191
|
-
full_content += content
|
|
192
|
-
yield content
|
|
193
|
-
|
|
194
|
-
# Estimate tokens for streaming (Groq doesn't provide usage in stream)
|
|
195
|
-
input_tokens = self.count_tokens(str(formatted_messages))
|
|
196
|
-
output_tokens = self.count_tokens(full_content)
|
|
197
|
-
|
|
198
|
-
# Return final response
|
|
199
|
-
return CompletionResponse(
|
|
200
|
-
content=full_content,
|
|
201
|
-
input_tokens=input_tokens,
|
|
202
|
-
output_tokens=output_tokens,
|
|
203
|
-
finish_reason="stop",
|
|
204
|
-
)
|
|
205
|
-
|
|
206
|
-
except Exception as e:
|
|
207
|
-
logger.error(f"Groq streaming error: {str(e)}")
|
|
208
|
-
raise
|
|
209
|
-
|
|
210
|
-
def count_tokens(self, text: str) -> int:
|
|
211
|
-
"""Count tokens in the given text.
|
|
212
|
-
|
|
213
|
-
Args:
|
|
214
|
-
text: Text to count tokens for
|
|
215
|
-
|
|
216
|
-
Returns:
|
|
217
|
-
Number of tokens (estimated)
|
|
218
|
-
"""
|
|
219
|
-
# Groq doesn't provide a token counter, so estimate
|
|
220
|
-
# Llama tokenization is roughly similar to GPT
|
|
221
|
-
return len(text.split()) * 4 // 3
|
|
222
|
-
|
|
223
|
-
@property
|
|
224
|
-
def supports_images(self) -> bool:
|
|
225
|
-
"""Check if this provider/model supports image inputs."""
|
|
226
|
-
# Groq doesn't currently support multimodal inputs
|
|
227
|
-
return False
|
|
228
|
-
|
|
229
|
-
@property
|
|
230
|
-
def supports_streaming(self) -> bool:
|
|
231
|
-
"""Check if this provider/model supports streaming."""
|
|
232
|
-
return True
|
|
233
|
-
|
|
234
|
-
@property
|
|
235
|
-
def max_context_length(self) -> int:
|
|
236
|
-
"""Get maximum context length for this model."""
|
|
237
|
-
context_lengths = {
|
|
238
|
-
"llama3-8b-8192": 8192,
|
|
239
|
-
"llama3-70b-8192": 8192,
|
|
240
|
-
"llama2-70b-4096": 4096,
|
|
241
|
-
"mixtral-8x7b-32768": 32768,
|
|
242
|
-
"gemma-7b-it": 8192,
|
|
243
|
-
}
|
|
244
|
-
return context_lengths.get(self.model, 8192)
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
# Auto-register when module is imported
|
|
248
|
-
register_provider(Provider.GROQ.value, GroqProvider)
|
|
@@ -1,250 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
# Time-stamp: "2025-05-31 10:25:00"
|
|
4
|
-
# Author: ywatanabe
|
|
5
|
-
# File: ./src/scitex/ai/genai/image_processor.py
|
|
6
|
-
|
|
7
|
-
"""
|
|
8
|
-
Handles image processing for multimodal AI inputs.
|
|
9
|
-
|
|
10
|
-
This module provides image processing functionality including:
|
|
11
|
-
- Image resizing to fit token limits
|
|
12
|
-
- Base64 encoding for API transmission
|
|
13
|
-
- Multiple format support (file path, bytes, PIL Image)
|
|
14
|
-
- Format validation
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
import base64
|
|
18
|
-
import io
|
|
19
|
-
from typing import Union, Tuple, Optional
|
|
20
|
-
from PIL import Image
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class ImageProcessor:
|
|
24
|
-
"""Processes images for multimodal AI inputs.
|
|
25
|
-
|
|
26
|
-
Example
|
|
27
|
-
-------
|
|
28
|
-
>>> processor = ImageProcessor()
|
|
29
|
-
>>> # Process image from file
|
|
30
|
-
>>> base64_str = processor.process_image("path/to/image.jpg", max_size=512)
|
|
31
|
-
>>> print(base64_str[:50])
|
|
32
|
-
/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAgGBgcGBQgHBw...
|
|
33
|
-
|
|
34
|
-
>>> # Process PIL Image
|
|
35
|
-
>>> from PIL import Image
|
|
36
|
-
>>> img = Image.new('RGB', (100, 100), color='red')
|
|
37
|
-
>>> base64_str = processor.process_image(img)
|
|
38
|
-
"""
|
|
39
|
-
|
|
40
|
-
def __init__(self):
|
|
41
|
-
"""Initialize image processor."""
|
|
42
|
-
self.supported_formats = {".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp"}
|
|
43
|
-
|
|
44
|
-
def process_image(
|
|
45
|
-
self, image: Union[str, bytes, Image.Image], max_size: int = 512
|
|
46
|
-
) -> str:
|
|
47
|
-
"""Process an image for API transmission.
|
|
48
|
-
|
|
49
|
-
Parameters
|
|
50
|
-
----------
|
|
51
|
-
image : Union[str, bytes, Image.Image]
|
|
52
|
-
Image as file path, bytes, or PIL Image
|
|
53
|
-
max_size : int
|
|
54
|
-
Maximum dimension (width or height) in pixels
|
|
55
|
-
|
|
56
|
-
Returns
|
|
57
|
-
-------
|
|
58
|
-
str
|
|
59
|
-
Base64 encoded image string
|
|
60
|
-
"""
|
|
61
|
-
# Convert to PIL Image
|
|
62
|
-
pil_image = self._to_pil_image(image)
|
|
63
|
-
|
|
64
|
-
# Resize if needed
|
|
65
|
-
if max(pil_image.size) > max_size:
|
|
66
|
-
pil_image = self.resize_image(pil_image, max_size)
|
|
67
|
-
|
|
68
|
-
# Convert to base64
|
|
69
|
-
return self.to_base64(pil_image)
|
|
70
|
-
|
|
71
|
-
def _to_pil_image(self, image: Union[str, bytes, Image.Image]) -> Image.Image:
|
|
72
|
-
"""Convert various image formats to PIL Image.
|
|
73
|
-
|
|
74
|
-
Parameters
|
|
75
|
-
----------
|
|
76
|
-
image : Union[str, bytes, Image.Image]
|
|
77
|
-
Input image in various formats
|
|
78
|
-
|
|
79
|
-
Returns
|
|
80
|
-
-------
|
|
81
|
-
Image.Image
|
|
82
|
-
PIL Image object
|
|
83
|
-
"""
|
|
84
|
-
if isinstance(image, Image.Image):
|
|
85
|
-
return image
|
|
86
|
-
|
|
87
|
-
if isinstance(image, str):
|
|
88
|
-
# Check if it's a base64 string
|
|
89
|
-
if image.startswith("data:image"):
|
|
90
|
-
# Extract base64 data from data URL
|
|
91
|
-
base64_data = image.split(",")[1]
|
|
92
|
-
image_bytes = base64.b64decode(base64_data)
|
|
93
|
-
return Image.open(io.BytesIO(image_bytes))
|
|
94
|
-
else:
|
|
95
|
-
# Assume it's a file path
|
|
96
|
-
try:
|
|
97
|
-
return Image.open(image)
|
|
98
|
-
except Exception as e:
|
|
99
|
-
# Maybe it's already base64 encoded
|
|
100
|
-
try:
|
|
101
|
-
image_bytes = base64.b64decode(image)
|
|
102
|
-
return Image.open(io.BytesIO(image_bytes))
|
|
103
|
-
except:
|
|
104
|
-
raise ValueError(f"Could not load image from string: {e}")
|
|
105
|
-
|
|
106
|
-
if isinstance(image, bytes):
|
|
107
|
-
return Image.open(io.BytesIO(image))
|
|
108
|
-
|
|
109
|
-
raise ValueError(f"Unsupported image type: {type(image)}")
|
|
110
|
-
|
|
111
|
-
def resize_image(self, image: Image.Image, max_size: int) -> Image.Image:
|
|
112
|
-
"""Resize image to fit within max_size while maintaining aspect ratio.
|
|
113
|
-
|
|
114
|
-
Parameters
|
|
115
|
-
----------
|
|
116
|
-
image : Image.Image
|
|
117
|
-
PIL Image to resize
|
|
118
|
-
max_size : int
|
|
119
|
-
Maximum dimension in pixels
|
|
120
|
-
|
|
121
|
-
Returns
|
|
122
|
-
-------
|
|
123
|
-
Image.Image
|
|
124
|
-
Resized PIL Image
|
|
125
|
-
"""
|
|
126
|
-
# Calculate new dimensions
|
|
127
|
-
width, height = image.size
|
|
128
|
-
aspect_ratio = width / height
|
|
129
|
-
|
|
130
|
-
if width > height:
|
|
131
|
-
new_width = max_size
|
|
132
|
-
new_height = int(max_size / aspect_ratio)
|
|
133
|
-
else:
|
|
134
|
-
new_height = max_size
|
|
135
|
-
new_width = int(max_size * aspect_ratio)
|
|
136
|
-
|
|
137
|
-
# Use high-quality resampling
|
|
138
|
-
return image.resize((new_width, new_height), Image.Resampling.LANCZOS)
|
|
139
|
-
|
|
140
|
-
def to_base64(self, image: Image.Image, format: str = "JPEG") -> str:
|
|
141
|
-
"""Convert PIL Image to base64 string.
|
|
142
|
-
|
|
143
|
-
Parameters
|
|
144
|
-
----------
|
|
145
|
-
image : Image.Image
|
|
146
|
-
PIL Image to encode
|
|
147
|
-
format : str
|
|
148
|
-
Output format (JPEG, PNG, etc.)
|
|
149
|
-
|
|
150
|
-
Returns
|
|
151
|
-
-------
|
|
152
|
-
str
|
|
153
|
-
Base64 encoded image string
|
|
154
|
-
"""
|
|
155
|
-
# Convert RGBA to RGB if saving as JPEG
|
|
156
|
-
if format.upper() == "JPEG" and image.mode in ("RGBA", "LA", "P"):
|
|
157
|
-
# Create a white background
|
|
158
|
-
background = Image.new("RGB", image.size, (255, 255, 255))
|
|
159
|
-
if image.mode == "P":
|
|
160
|
-
image = image.convert("RGBA")
|
|
161
|
-
background.paste(
|
|
162
|
-
image, mask=image.split()[-1] if image.mode == "RGBA" else None
|
|
163
|
-
)
|
|
164
|
-
image = background
|
|
165
|
-
|
|
166
|
-
# Save to bytes buffer
|
|
167
|
-
buffer = io.BytesIO()
|
|
168
|
-
image.save(
|
|
169
|
-
buffer, format=format, quality=95 if format.upper() == "JPEG" else None
|
|
170
|
-
)
|
|
171
|
-
|
|
172
|
-
# Encode to base64
|
|
173
|
-
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
|
174
|
-
|
|
175
|
-
def get_image_info(self, image: Union[str, bytes, Image.Image]) -> dict:
|
|
176
|
-
"""Get information about an image.
|
|
177
|
-
|
|
178
|
-
Parameters
|
|
179
|
-
----------
|
|
180
|
-
image : Union[str, bytes, Image.Image]
|
|
181
|
-
Image to analyze
|
|
182
|
-
|
|
183
|
-
Returns
|
|
184
|
-
-------
|
|
185
|
-
dict
|
|
186
|
-
Image information including size, mode, format
|
|
187
|
-
"""
|
|
188
|
-
pil_image = self._to_pil_image(image)
|
|
189
|
-
|
|
190
|
-
return {
|
|
191
|
-
"width": pil_image.width,
|
|
192
|
-
"height": pil_image.height,
|
|
193
|
-
"mode": pil_image.mode,
|
|
194
|
-
"format": pil_image.format,
|
|
195
|
-
"size_mb": self._estimate_size_mb(pil_image),
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
def _estimate_size_mb(self, image: Image.Image) -> float:
|
|
199
|
-
"""Estimate image size in megabytes.
|
|
200
|
-
|
|
201
|
-
Parameters
|
|
202
|
-
----------
|
|
203
|
-
image : Image.Image
|
|
204
|
-
PIL Image
|
|
205
|
-
|
|
206
|
-
Returns
|
|
207
|
-
-------
|
|
208
|
-
float
|
|
209
|
-
Estimated size in MB
|
|
210
|
-
"""
|
|
211
|
-
# Rough estimate based on dimensions and mode
|
|
212
|
-
bytes_per_pixel = len(image.mode) # Rough estimate
|
|
213
|
-
total_bytes = image.width * image.height * bytes_per_pixel
|
|
214
|
-
return total_bytes / (1024 * 1024)
|
|
215
|
-
|
|
216
|
-
def validate_image(self, image_path: str) -> bool:
|
|
217
|
-
"""Validate if a file is a supported image format.
|
|
218
|
-
|
|
219
|
-
Parameters
|
|
220
|
-
----------
|
|
221
|
-
image_path : str
|
|
222
|
-
Path to image file
|
|
223
|
-
|
|
224
|
-
Returns
|
|
225
|
-
-------
|
|
226
|
-
bool
|
|
227
|
-
True if valid image format
|
|
228
|
-
"""
|
|
229
|
-
if not isinstance(image_path, str):
|
|
230
|
-
return False
|
|
231
|
-
|
|
232
|
-
# Check file extension
|
|
233
|
-
ext = image_path.lower().split(".")[-1]
|
|
234
|
-
if f".{ext}" not in self.supported_formats:
|
|
235
|
-
return False
|
|
236
|
-
|
|
237
|
-
# Try to open the image
|
|
238
|
-
try:
|
|
239
|
-
img = Image.open(image_path)
|
|
240
|
-
img.verify()
|
|
241
|
-
return True
|
|
242
|
-
except:
|
|
243
|
-
return False
|
|
244
|
-
|
|
245
|
-
def __repr__(self) -> str:
|
|
246
|
-
"""String representation of ImageProcessor."""
|
|
247
|
-
return f"ImageProcessor(supported_formats={self.supported_formats})"
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
# EOF
|
|
@@ -1,214 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
# -*- coding: utf-8 -*-
|
|
3
|
-
# Time-stamp: "2024-11-13 20:25:55 (ywatanabe)"
|
|
4
|
-
# File: ./scitex_repo/src/scitex/ai/genai/llama_provider.py
|
|
5
|
-
|
|
6
|
-
"""Llama provider implementation using the new component-based architecture.
|
|
7
|
-
|
|
8
|
-
This module provides integration with local Llama models through the official Llama library.
|
|
9
|
-
It supports loading and running Llama models locally with full control over model parameters.
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import os
|
|
13
|
-
from typing import Dict, List, Iterator, Optional, Any
|
|
14
|
-
|
|
15
|
-
try:
|
|
16
|
-
from llama import Llama as _Llama
|
|
17
|
-
from llama import Dialog
|
|
18
|
-
except ImportError:
|
|
19
|
-
_Llama = None
|
|
20
|
-
Dialog = None
|
|
21
|
-
print(
|
|
22
|
-
"Warning: llama package not installed. "
|
|
23
|
-
"Install with the official Meta Llama repository instructions."
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
from .base_provider import BaseProvider, ProviderConfig
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class LlamaProvider(BaseProvider):
|
|
30
|
-
"""Llama provider implementation for local model inference."""
|
|
31
|
-
|
|
32
|
-
SUPPORTED_MODELS = [
|
|
33
|
-
"Meta-Llama-3-8B",
|
|
34
|
-
"Meta-Llama-3-70B",
|
|
35
|
-
"Meta-Llama-3.1-8B",
|
|
36
|
-
"Meta-Llama-3.1-70B",
|
|
37
|
-
"Meta-Llama-3.1-405B",
|
|
38
|
-
"Llama-2-7b",
|
|
39
|
-
"Llama-2-13b",
|
|
40
|
-
"Llama-2-70b",
|
|
41
|
-
]
|
|
42
|
-
|
|
43
|
-
DEFAULT_MODEL = "Meta-Llama-3-8B"
|
|
44
|
-
|
|
45
|
-
def __init__(self, config: ProviderConfig):
|
|
46
|
-
"""Initialize Llama provider.
|
|
47
|
-
|
|
48
|
-
Args:
|
|
49
|
-
config: Provider configuration
|
|
50
|
-
"""
|
|
51
|
-
self.config = config
|
|
52
|
-
self.model_name = config.model or self.DEFAULT_MODEL
|
|
53
|
-
|
|
54
|
-
# Llama-specific configuration
|
|
55
|
-
self.ckpt_dir = getattr(config, "ckpt_dir", None) or f"{self.model_name}/"
|
|
56
|
-
self.tokenizer_path = (
|
|
57
|
-
getattr(config, "tokenizer_path", None)
|
|
58
|
-
or f"{self.model_name}/tokenizer.model"
|
|
59
|
-
)
|
|
60
|
-
self.max_seq_len = getattr(config, "max_seq_len", 32_768)
|
|
61
|
-
self.max_batch_size = getattr(config, "max_batch_size", 4)
|
|
62
|
-
self.max_gen_len = config.max_tokens
|
|
63
|
-
|
|
64
|
-
# Configure environment variables for distributed inference
|
|
65
|
-
self._setup_environment()
|
|
66
|
-
|
|
67
|
-
# Initialize the Llama model
|
|
68
|
-
if _Llama is None:
|
|
69
|
-
raise ImportError(
|
|
70
|
-
"Llama package is not installed. Please install it from the official Meta repository."
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
try:
|
|
74
|
-
self.generator = _Llama.build(
|
|
75
|
-
ckpt_dir=self.ckpt_dir,
|
|
76
|
-
tokenizer_path=self.tokenizer_path,
|
|
77
|
-
max_seq_len=self.max_seq_len,
|
|
78
|
-
max_batch_size=self.max_batch_size,
|
|
79
|
-
)
|
|
80
|
-
except Exception as e:
|
|
81
|
-
raise RuntimeError(f"Failed to load Llama model: {str(e)}")
|
|
82
|
-
|
|
83
|
-
def _setup_environment(self):
|
|
84
|
-
"""Set up environment variables for distributed inference."""
|
|
85
|
-
env_vars = {
|
|
86
|
-
"MASTER_ADDR": os.getenv("MASTER_ADDR", "localhost"),
|
|
87
|
-
"MASTER_PORT": os.getenv("MASTER_PORT", "12355"),
|
|
88
|
-
"WORLD_SIZE": os.getenv("WORLD_SIZE", "1"),
|
|
89
|
-
"RANK": os.getenv("RANK", "0"),
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
for key, value in env_vars.items():
|
|
93
|
-
os.environ[key] = value
|
|
94
|
-
|
|
95
|
-
def validate_messages(self, messages: List[Dict[str, Any]]) -> None:
|
|
96
|
-
"""Validate message format.
|
|
97
|
-
|
|
98
|
-
Args:
|
|
99
|
-
messages: List of message dictionaries
|
|
100
|
-
|
|
101
|
-
Raises:
|
|
102
|
-
ValueError: If messages are invalid
|
|
103
|
-
"""
|
|
104
|
-
if not messages:
|
|
105
|
-
raise ValueError("Messages cannot be empty")
|
|
106
|
-
|
|
107
|
-
for msg in messages:
|
|
108
|
-
if "role" not in msg:
|
|
109
|
-
raise ValueError(f"Missing role in message: {msg}")
|
|
110
|
-
if "content" not in msg:
|
|
111
|
-
raise ValueError(f"Missing content in message: {msg}")
|
|
112
|
-
if msg["role"] not in ["system", "user", "assistant"]:
|
|
113
|
-
raise ValueError(f"Invalid role: {msg['role']}")
|
|
114
|
-
|
|
115
|
-
def format_messages(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
116
|
-
"""Format messages for Llama API.
|
|
117
|
-
|
|
118
|
-
Args:
|
|
119
|
-
messages: List of message dictionaries
|
|
120
|
-
|
|
121
|
-
Returns:
|
|
122
|
-
Formatted messages for Llama
|
|
123
|
-
"""
|
|
124
|
-
formatted = []
|
|
125
|
-
|
|
126
|
-
# Add system prompt if configured
|
|
127
|
-
if self.config.system_prompt:
|
|
128
|
-
formatted.append({"role": "system", "content": self.config.system_prompt})
|
|
129
|
-
|
|
130
|
-
# Add user messages
|
|
131
|
-
formatted.extend(messages)
|
|
132
|
-
|
|
133
|
-
return formatted
|
|
134
|
-
|
|
135
|
-
def complete(self, messages: List[Dict[str, Any]], **kwargs) -> Dict[str, Any]:
|
|
136
|
-
"""Generate a completion.
|
|
137
|
-
|
|
138
|
-
Args:
|
|
139
|
-
messages: List of message dictionaries
|
|
140
|
-
**kwargs: Additional parameters for the API
|
|
141
|
-
|
|
142
|
-
Returns:
|
|
143
|
-
Completion response dictionary
|
|
144
|
-
"""
|
|
145
|
-
self.validate_messages(messages)
|
|
146
|
-
formatted_messages = self.format_messages(messages)
|
|
147
|
-
|
|
148
|
-
# Convert to Llama Dialog format
|
|
149
|
-
dialogs: List[Dialog] = [formatted_messages]
|
|
150
|
-
|
|
151
|
-
# Merge config parameters with kwargs
|
|
152
|
-
params = {
|
|
153
|
-
"max_gen_len": self.max_gen_len,
|
|
154
|
-
"temperature": self.config.temperature or 1.0,
|
|
155
|
-
"top_p": kwargs.get("top_p", 0.9),
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
try:
|
|
159
|
-
results = self.generator.chat_completion(dialogs, **params)
|
|
160
|
-
|
|
161
|
-
result = results[0]
|
|
162
|
-
content = result["generation"]["content"]
|
|
163
|
-
|
|
164
|
-
# Estimate token counts (Llama doesn't provide exact counts)
|
|
165
|
-
prompt_tokens = len(
|
|
166
|
-
" ".join(msg["content"] for msg in formatted_messages).split()
|
|
167
|
-
)
|
|
168
|
-
completion_tokens = len(content.split())
|
|
169
|
-
|
|
170
|
-
return {
|
|
171
|
-
"content": content,
|
|
172
|
-
"model": self.model_name,
|
|
173
|
-
"usage": {
|
|
174
|
-
"prompt_tokens": prompt_tokens,
|
|
175
|
-
"completion_tokens": completion_tokens,
|
|
176
|
-
"total_tokens": prompt_tokens + completion_tokens,
|
|
177
|
-
},
|
|
178
|
-
"finish_reason": "stop",
|
|
179
|
-
}
|
|
180
|
-
except Exception as e:
|
|
181
|
-
raise RuntimeError(f"Llama inference error: {str(e)}")
|
|
182
|
-
|
|
183
|
-
def stream(
|
|
184
|
-
self, messages: List[Dict[str, Any]], **kwargs
|
|
185
|
-
) -> Iterator[Dict[str, Any]]:
|
|
186
|
-
"""Stream a completion.
|
|
187
|
-
|
|
188
|
-
Note: Llama doesn't have native streaming support, so this simulates streaming
|
|
189
|
-
by yielding characters one at a time.
|
|
190
|
-
|
|
191
|
-
Args:
|
|
192
|
-
messages: List of message dictionaries
|
|
193
|
-
**kwargs: Additional parameters for the API
|
|
194
|
-
|
|
195
|
-
Yields:
|
|
196
|
-
Chunks of the completion
|
|
197
|
-
"""
|
|
198
|
-
# Get the full response
|
|
199
|
-
response = self.complete(messages, **kwargs)
|
|
200
|
-
content = response["content"]
|
|
201
|
-
|
|
202
|
-
# Simulate streaming by yielding characters
|
|
203
|
-
for i, char in enumerate(content):
|
|
204
|
-
yield {
|
|
205
|
-
"content": char,
|
|
206
|
-
"model": self.model_name,
|
|
207
|
-
}
|
|
208
|
-
|
|
209
|
-
# Yield final chunk with usage info
|
|
210
|
-
yield {
|
|
211
|
-
"content": "",
|
|
212
|
-
"usage": response["usage"],
|
|
213
|
-
"finish_reason": "stop",
|
|
214
|
-
}
|