dasmixer-core 0.6.0a2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. dasmixer_core-0.6.0a2/PKG-INFO +83 -0
  2. dasmixer_core-0.6.0a2/README.md +39 -0
  3. dasmixer_core-0.6.0a2/pyproject.toml +61 -0
  4. dasmixer_core-0.6.0a2/src/dasmixer/api/__init__.py +5 -0
  5. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/__init__.py +0 -0
  6. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/peptides/__init__.py +1 -0
  7. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/peptides/matching.py +161 -0
  8. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/peptides/protein_map.py +539 -0
  9. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/ppm/__init__.py +2 -0
  10. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/ppm/dataclasses.py +16 -0
  11. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/ppm/seqfixer.py +613 -0
  12. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/proteins/__init__.py +0 -0
  13. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/proteins/enrich.py +43 -0
  14. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/proteins/lfq.py +115 -0
  15. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/proteins/map_identifications.py +59 -0
  16. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/proteins/sempai/__init__.py +96 -0
  17. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/proteins/sempai/algorithms.py +433 -0
  18. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/proteins/sempai/exceptions.py +33 -0
  19. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/proteins/sempai/prediction.py +615 -0
  20. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/proteins/sempai/protein.py +688 -0
  21. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/proteins/sempai/sample.py +369 -0
  22. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/proteins/sempai/utils.py +353 -0
  23. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/spectra/__init__.py +21 -0
  24. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/spectra/coverage_worker.py +196 -0
  25. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/spectra/identification_processor.py +257 -0
  26. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/spectra/ion_match.py +285 -0
  27. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/spectra/plot_flow.py +35 -0
  28. dasmixer_core-0.6.0a2/src/dasmixer/api/calculations/spectra/plot_matches.py +224 -0
  29. dasmixer_core-0.6.0a2/src/dasmixer/api/config.py +226 -0
  30. dasmixer_core-0.6.0a2/src/dasmixer/api/export/__init__.py +1 -0
  31. dasmixer_core-0.6.0a2/src/dasmixer/api/export/joined_export.py +217 -0
  32. dasmixer_core-0.6.0a2/src/dasmixer/api/export/mgf_export.py +324 -0
  33. dasmixer_core-0.6.0a2/src/dasmixer/api/export/mztab_export.py +320 -0
  34. dasmixer_core-0.6.0a2/src/dasmixer/api/export/shared_state.py +43 -0
  35. dasmixer_core-0.6.0a2/src/dasmixer/api/export/system_export.py +204 -0
  36. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/__init__.py +80 -0
  37. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/base.py +67 -0
  38. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/peptides/MQ_Evidences.py +96 -0
  39. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/peptides/PLGS.py +133 -0
  40. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/peptides/PowerNovo2.py +65 -0
  41. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/peptides/__init__.py +26 -0
  42. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/peptides/base.py +121 -0
  43. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/peptides/table_importer.py +528 -0
  44. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/proteins/__init__.py +1 -0
  45. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/proteins/fasta.py +201 -0
  46. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/registry.py +155 -0
  47. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/registry_new.py +155 -0
  48. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/spectra/__init__.py +10 -0
  49. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/spectra/base.py +100 -0
  50. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/spectra/mgf.py +178 -0
  51. dasmixer_core-0.6.0a2/src/dasmixer/api/inputs/spectra/plgs_mgf_with_leid.py +5 -0
  52. dasmixer_core-0.6.0a2/src/dasmixer/api/plugin_loader.py +351 -0
  53. dasmixer_core-0.6.0a2/src/dasmixer/api/project/__init__.py +6 -0
  54. dasmixer_core-0.6.0a2/src/dasmixer/api/project/array_utils.py +34 -0
  55. dasmixer_core-0.6.0a2/src/dasmixer/api/project/core/__init__.py +6 -0
  56. dasmixer_core-0.6.0a2/src/dasmixer/api/project/core/base.py +124 -0
  57. dasmixer_core-0.6.0a2/src/dasmixer/api/project/core/lifecycle.py +188 -0
  58. dasmixer_core-0.6.0a2/src/dasmixer/api/project/dataclasses.py +245 -0
  59. dasmixer_core-0.6.0a2/src/dasmixer/api/project/migrations.py +122 -0
  60. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/__init__.py +27 -0
  61. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/fast_ident_match_mixin.py +5 -0
  62. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/identification_mixin.py +510 -0
  63. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/import_project_mixin.py +498 -0
  64. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/peptide_mixin.py +622 -0
  65. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/plot_mixin.py +249 -0
  66. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/protein_mixin.py +832 -0
  67. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/query_mixin.py +49 -0
  68. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/report_mixin.py +80 -0
  69. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/sample_mixin.py +399 -0
  70. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/spectra_mixin.py +345 -0
  71. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/subset_mixin.py +104 -0
  72. dasmixer_core-0.6.0a2/src/dasmixer/api/project/mixins/tool_mixin.py +107 -0
  73. dasmixer_core-0.6.0a2/src/dasmixer/api/project/project.py +75 -0
  74. dasmixer_core-0.6.0a2/src/dasmixer/api/project/project_spectra_mapping.py +58 -0
  75. dasmixer_core-0.6.0a2/src/dasmixer/api/project/schema.py +258 -0
  76. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/__init__.py +12 -0
  77. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/_icons.py +17 -0
  78. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/base.py +611 -0
  79. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/registry.py +66 -0
  80. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/report_form.py +112 -0
  81. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/reports/__init__.py +17 -0
  82. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/reports/coverage_report.py +294 -0
  83. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/reports/pca_report.py +321 -0
  84. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/reports/sample_report.py +84 -0
  85. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/reports/toolmatch_report.py +233 -0
  86. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/reports/upset.py +291 -0
  87. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/reports/volcano_report.py +212 -0
  88. dasmixer_core-0.6.0a2/src/dasmixer/api/reporting/templates/report.html.j2 +213 -0
  89. dasmixer_core-0.6.0a2/src/dasmixer/utils/__init__.py +5 -0
  90. dasmixer_core-0.6.0a2/src/dasmixer/utils/lic.py +18 -0
  91. dasmixer_core-0.6.0a2/src/dasmixer/utils/logger.py +54 -0
  92. dasmixer_core-0.6.0a2/src/dasmixer/utils/mq_evidences.py +54 -0
  93. dasmixer_core-0.6.0a2/src/dasmixer/utils/ppm.py +159 -0
  94. dasmixer_core-0.6.0a2/src/dasmixer/utils/seek_files.py +25 -0
  95. dasmixer_core-0.6.0a2/src/dasmixer/utils/seqfixer_utils.py +168 -0
  96. dasmixer_core-0.6.0a2/src/dasmixer/utils/show_pathways.py +31 -0
  97. dasmixer_core-0.6.0a2/src/dasmixer/versions.py +13 -0
@@ -0,0 +1,83 @@
1
+ Metadata-Version: 2.4
2
+ Name: dasmixer-core
3
+ Version: 0.6.0a2
4
+ Summary: DASMixer core library — project API, calculations, and data import
5
+ Author: gluck
6
+ Author-email: glucksistemi@gmail.com
7
+ Requires-Python: <4.0, >=3.11
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.11
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Programming Language :: Python :: 3.13
12
+ Classifier: Programming Language :: Python :: 3.14
13
+ Provides-Extra: all
14
+ Provides-Extra: plotly
15
+ Provides-Extra: proteins
16
+ Requires-Dist: aiocsv (>=1.4.0,<2.0.0)
17
+ Requires-Dist: aiofiles (>=25.1.0,<26.0.0)
18
+ Requires-Dist: aiosqlite (>=0.22.1,<0.23.0)
19
+ Requires-Dist: dasmixer-core[plotly,proteins] ; extra == "all"
20
+ Requires-Dist: docxtpl (>=0.20.2,<0.21.0)
21
+ Requires-Dist: html-for-docx (>=1.1.3,<2.0.0)
22
+ Requires-Dist: jinja2 (>=3.1.6,<4.0.0)
23
+ Requires-Dist: kaleido (>=1.2.0,<2.0.0) ; extra == "plotly"
24
+ Requires-Dist: mztabwriter[pandas] (>=0.1.0,<0.2.0)
25
+ Requires-Dist: npysearch (>=1.3.1,<2.0.0) ; (sys_platform != "win32") and (extra == "proteins")
26
+ Requires-Dist: npysearch-win (>=1.3.1,<2.0.0) ; (sys_platform == "win32") and (extra == "proteins")
27
+ Requires-Dist: numpy (>=2.4.1,<3.0.0)
28
+ Requires-Dist: openpyxl (>=3.1.5,<4.0.0)
29
+ Requires-Dist: pandas (>=2.3.3,<3.0.0)
30
+ Requires-Dist: parse (>=1.20.2,<2.0.0)
31
+ Requires-Dist: peptacular (>=2.5.1,<3.0.0)
32
+ Requires-Dist: plotly (>=6.5.2,<7.0.0) ; extra == "plotly"
33
+ Requires-Dist: pydantic (>=2.12.5,<3.0.0)
34
+ Requires-Dist: pydantic-settings (>=2.12.0,<3.0.0)
35
+ Requires-Dist: pyteomics (>=4.7.5,<5.0.0)
36
+ Requires-Dist: scikit-learn (>=1.8.0,<2.0.0)
37
+ Requires-Dist: smart-round (>=1.0.1,<2.0.0)
38
+ Requires-Dist: tabulate (>=0.9.0,<0.10.0)
39
+ Requires-Dist: typer (>=0.21.1,<0.22.0)
40
+ Requires-Dist: uniprot-meta-tool (>=0.2.1,<0.3.0)
41
+ Requires-Dist: xlrd (>=2.0.2,<3.0.0)
42
+ Project-URL: Homepage, https://github.com/protdb/dasmixer
43
+ Description-Content-Type: text/markdown
44
+
45
+ # DASMixer Core
46
+
47
+ Core library for proteomics data project management, calculations, and data import.
48
+
49
+ ## Features
50
+
51
+ - **Project management** — create, open, save `.dasmix` files (SQLite)
52
+ - **Data import** — MGF (spectra), PowerNovo2 / MaxQuant / PLGS (identifications), FASTA (proteins)
53
+ - **Calculations** — ion matching (b/y/a/c/x/z), de novo PPM correction (SeqFixer), coverage calculation
54
+ - **Peptide-to-protein mapping** (npysearch BLAST)
55
+ - **LFQ quantification** — emPAI, iBAQ, NSAF, Top3 (via semPAI)
56
+ - **Reports** — PCA, Volcano, UpSet, Coverage, Sample Summary (Plotly-based)
57
+ - **Export** — HTML, DOCX, XLSX, mzTab
58
+
59
+ ## Installation
60
+
61
+ ```bash
62
+ pip install dasmixer-core
63
+ ```
64
+
65
+ With optional extras:
66
+
67
+ ```bash
68
+ pip install "dasmixer-core[plotly]" # Plotly + Kaleido for charts and export
69
+ pip install "dasmixer-core[proteins]" # npysearch for BLAST-like search
70
+ pip install "dasmixer-core[all]" # Full installation
71
+ ```
72
+
73
+ ## Usage
74
+
75
+ ```python
76
+ from dasmixer.api.project.project import Project
77
+
78
+ async with Project(path="study.dasmix", create_if_not_exists=True) as project:
79
+ samples = await project.get_samples()
80
+ ...
81
+ ```
82
+
83
+ Documentation: https://github.com/protdb/dasmixer
@@ -0,0 +1,39 @@
1
+ # DASMixer Core
2
+
3
+ Core library for proteomics data project management, calculations, and data import.
4
+
5
+ ## Features
6
+
7
+ - **Project management** — create, open, save `.dasmix` files (SQLite)
8
+ - **Data import** — MGF (spectra), PowerNovo2 / MaxQuant / PLGS (identifications), FASTA (proteins)
9
+ - **Calculations** — ion matching (b/y/a/c/x/z), de novo PPM correction (SeqFixer), coverage calculation
10
+ - **Peptide-to-protein mapping** (npysearch BLAST)
11
+ - **LFQ quantification** — emPAI, iBAQ, NSAF, Top3 (via semPAI)
12
+ - **Reports** — PCA, Volcano, UpSet, Coverage, Sample Summary (Plotly-based)
13
+ - **Export** — HTML, DOCX, XLSX, mzTab
14
+
15
+ ## Installation
16
+
17
+ ```bash
18
+ pip install dasmixer-core
19
+ ```
20
+
21
+ With optional extras:
22
+
23
+ ```bash
24
+ pip install "dasmixer-core[plotly]" # Plotly + Kaleido for charts and export
25
+ pip install "dasmixer-core[proteins]" # npysearch for BLAST-like search
26
+ pip install "dasmixer-core[all]" # Full installation
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ ```python
32
+ from dasmixer.api.project.project import Project
33
+
34
+ async with Project(path="study.dasmix", create_if_not_exists=True) as project:
35
+ samples = await project.get_samples()
36
+ ...
37
+ ```
38
+
39
+ Documentation: https://github.com/protdb/dasmixer
@@ -0,0 +1,61 @@
1
+ [project]
2
+ name = "dasmixer-core"
3
+ version = "0.6.0a2"
4
+ description = "DASMixer core library — project API, calculations, and data import"
5
+ authors = [
6
+ {name = "gluck", email = "glucksistemi@gmail.com"}
7
+ ]
8
+ readme = "README.md"
9
+ requires-python = "<4.0, >=3.11"
10
+ urls = {Homepage = "https://github.com/protdb/dasmixer"}
11
+ dependencies = [
12
+ "pandas >=2.3.3,<3.0.0",
13
+ "numpy >=2.4.1,<3.0.0",
14
+ "aiosqlite >=0.22.1,<0.23.0",
15
+ "pydantic >=2.12.5,<3.0.0",
16
+ "pydantic-settings >=2.12.0,<3.0.0",
17
+ "typer >=0.21.1,<0.22.0",
18
+ "pyteomics >=4.7.5,<5.0.0",
19
+ "peptacular >=2.5.1,<3.0.0",
20
+ "uniprot-meta-tool >=0.2.1,<0.3.0",
21
+ "aiofiles >=25.1.0,<26.0.0",
22
+ "aiocsv >=1.4.0,<2.0.0",
23
+ "parse >=1.20.2,<2.0.0",
24
+ "tabulate >=0.9.0,<0.10.0",
25
+ "scikit-learn >=1.8.0,<2.0.0",
26
+ "jinja2 >=3.1.6,<4.0.0",
27
+ "docxtpl >=0.20.2,<0.21.0",
28
+ "html-for-docx >=1.1.3,<2.0.0",
29
+ "openpyxl >=3.1.5,<4.0.0",
30
+ "xlrd >=2.0.2,<3.0.0",
31
+ "smart-round >=1.0.1,<2.0.0",
32
+ "mztabwriter[pandas] >=0.1.0,<0.2.0",
33
+ ]
34
+
35
+ [project.optional-dependencies]
36
+ plotly = [
37
+ "plotly >=6.5.2,<7.0.0",
38
+ "kaleido >=1.2.0,<2.0.0",
39
+ ]
40
+ proteins = [
41
+ "npysearch >=1.3.1,<2.0.0 ; sys_platform != 'win32'",
42
+ "npysearch-win >=1.3.1,<2.0.0 ; sys_platform == 'win32'",
43
+ ]
44
+ all = [
45
+ "dasmixer-core[plotly,proteins]",
46
+ ]
47
+
48
+ [build-system]
49
+ requires = ["poetry-core>=2.0.0,<3.0.0"]
50
+ build-backend = "poetry.core.masonry.api"
51
+
52
+ [tool.poetry]
53
+ packages = [
54
+ {include = "dasmixer/api", from = "src"},
55
+ {include = "dasmixer/utils", from = "src"},
56
+ {include = "dasmixer/versions.py", from = "src"},
57
+ ]
58
+
59
+ [tool.poetry.dependencies]
60
+ npysearch = {version = ">=1.3.1,<2", optional = true, markers = "sys_platform != 'win32'"}
61
+ npysearch-win = {version = ">=1.3.1,<2", optional = true, markers = "sys_platform == 'win32'"}
@@ -0,0 +1,5 @@
1
+ """DASMixer API package."""
2
+
3
+ from .project.project import Project
4
+
5
+ __all__ = ['Project']
@@ -0,0 +1 @@
1
+ """Peptide identification and matching."""
@@ -0,0 +1,161 @@
1
+ """Peptide identification matching and selection."""
2
+ from typing import Literal
3
+
4
+ import pandas as pd
5
+
6
+ from dasmixer.api.project.project import Project
7
+ from dasmixer.utils.logger import logger
8
+
9
+
10
+ async def select_preferred_identifications(
11
+ project: Project,
12
+ criterion: str,
13
+ tool_settings: dict[int, dict]
14
+ ) -> int:
15
+ """
16
+ Select preferred identifications for all spectra based on criterion.
17
+
18
+ Args:
19
+ project: Project instance
20
+ criterion: Selection criterion — "ppm" or "intensity"
21
+ tool_settings: Tool-specific settings, mapping tool_id to:
22
+ - max_ppm: Maximum allowed PPM error (float)
23
+ - min_score: Minimum identification score (float)
24
+ - min_ion_intensity_coverage: Minimum % intensity coverage (float)
25
+ - use_protein_from_file: Use protein IDs from file (bool)
26
+ - min_protein_identity: Minimum protein sequence identity (float)
27
+ - denovo_correction: Apply de novo correction (bool)
28
+ - min_peptide_length: Minimum peptide length (int, default 7)
29
+ - max_peptide_length: Maximum peptide length (int, default 30)
30
+
31
+ Returns:
32
+ Number of spectra processed
33
+ """
34
+ logger.info(f"Starting preferred identification selection (criterion: {criterion})")
35
+ logger.debug(f"Tool settings: {tool_settings}")
36
+ counter = 0
37
+
38
+ if criterion not in ("ppm", "intensity"):
39
+ raise ValueError(f"Invalid criterion: {criterion}. Must be 'ppm' or 'intensity'")
40
+
41
+ spectra_files = await project.get_spectra_files()
42
+ for _, spectra_file in spectra_files.iterrows():
43
+ idents_not_merged = []
44
+ for tool_id, tool_params in tool_settings.items():
45
+ idents = await project.get_identifications(spectra_file['id'], tool_id)
46
+ if tool_params.get("ignore_criteria", False):
47
+ idents_not_merged.append(idents.copy())
48
+ continue
49
+ max_ppm = tool_params.get("max_ppm", 50000)
50
+ min_score = tool_params.get("min_score", 0)
51
+ min_ion_intensity_coverage = tool_params["min_ion_intensity_coverage"]
52
+ min_len = tool_params.get("min_peptide_length", 7)
53
+ max_len = tool_params.get("max_peptide_length", 30)
54
+
55
+
56
+
57
+ idents['canonical_length'] = idents['canonical_sequence'].str.len()
58
+ idents['ppm'] = idents['ppm'].abs()
59
+ if not tool_params.get("denovo_correction", False):
60
+ query = (
61
+ "ppm <= @max_ppm and "
62
+ "score >= @min_score and "
63
+ "intensity_coverage >= @min_ion_intensity_coverage and "
64
+ "canonical_length >= @min_len and "
65
+ "canonical_length <= @max_len"
66
+ )
67
+ else:
68
+ query = (
69
+ "ppm <= 50000 and "
70
+ "score >= @min_score and "
71
+ "intensity_coverage >= @min_ion_intensity_coverage and "
72
+ "canonical_length >= @min_len and "
73
+ "canonical_length <= @max_len"
74
+ )
75
+ idents_not_merged.append(idents.query(query).copy())
76
+
77
+ all_idents = pd.concat(idents_not_merged, ignore_index=True)
78
+ spectras = await project.get_spectra(spectra_file['id'])
79
+
80
+ for _, spectrum in spectras.iterrows():
81
+ spectra_id = spectrum['id']
82
+ spectra_idents = all_idents.query("spectre_id == @spectra_id")
83
+ if len(spectra_idents) == 0:
84
+ continue
85
+ if criterion == "ppm":
86
+ crit = 'ppm'
87
+ asc = True
88
+ else:
89
+ crit = 'intensity_coverage'
90
+ asc = False
91
+ best_id = spectra_idents.sort_values(crit, ascending=asc).iloc[0]['id']
92
+ await project.set_preferred_identification(spectra_id, best_id)
93
+ counter += 1
94
+
95
+ return counter
96
+
97
+
98
+ async def calculate_preferred_identifications_for_file(
99
+ project: Project,
100
+ spectra_file_id: int,
101
+ criterion: Literal['ppm', 'intensity'],
102
+ tool_settings: dict[int, dict]
103
+ ) -> list[int]:
104
+ """
105
+ Calculate preferred identification IDs for a single spectra file.
106
+
107
+ Args:
108
+ project: Project instance
109
+ spectra_file_id: ID of spectra file to process
110
+ criterion: "ppm" or "intensity"
111
+ tool_settings: Tool-specific settings dict
112
+
113
+ Returns:
114
+ List of identification IDs that should be marked as preferred
115
+ """
116
+ if criterion not in ("ppm", "intensity"):
117
+ raise ValueError(f"Invalid criterion: {criterion}. Must be 'ppm' or 'intensity'")
118
+
119
+ idents_not_merged = []
120
+ for tool_id, tool_params in tool_settings.items():
121
+ max_ppm = tool_params.get("max_ppm", 50)
122
+ min_score = tool_params.get("min_score", 0)
123
+ min_ion_intensity_coverage = tool_params["min_ion_intensity_coverage"]
124
+ min_len = tool_params.get("min_peptide_length", 7)
125
+ max_len = tool_params.get("max_peptide_length", 30)
126
+ min_peaks = tool_params.get("min_spectre_peaks", 1)
127
+ top_peaks_count = tool_params.get("min_top_peaks", 1)
128
+ min_ions = tool_params.get("min_ions_covered", 1)
129
+ denovo_correction = tool_params.get("denovo_correction", False)
130
+ denovo_correction_ppm = tool_params.get("denovo_correction_ppm", 50000)
131
+
132
+ idents = await project.get_idents_for_preferred(
133
+ spectra_file_id=spectra_file_id,
134
+ tool_id=tool_id,
135
+ min_score=min_score,
136
+ max_abs_ppm=max_ppm if not denovo_correction else denovo_correction_ppm,
137
+ intensity_coverage=min_ion_intensity_coverage,
138
+ canonical_length=(min_len, max_len),
139
+ spectre_peaks_count=min_peaks,
140
+ ions_matched=min_ions,
141
+ top_peaks_covered=top_peaks_count,
142
+ )
143
+ logger.debug(idents)
144
+ logger.debug(f"{tool_id} {spectra_file_id}")
145
+ if denovo_correction:
146
+ idents['min_ppm'] = idents.apply(
147
+ lambda row: min(abs(row['ppm']), abs(row['matched_ppm'])), axis=1
148
+ )
149
+ idents = idents.query('min_ppm <= @max_ppm')
150
+ else:
151
+ try:
152
+ idents['min_ppm'] = idents['ppm'].abs()
153
+ except KeyError:
154
+ idents['min_ppm'] = None
155
+ idents_not_merged.append(idents.copy())
156
+
157
+ df = pd.concat(idents_not_merged, ignore_index=True)
158
+ if df.empty:
159
+ return []
160
+ idx = df.groupby('spectre_id')['min_ppm'].idxmin()
161
+ return [int(x) for x in df.loc[idx, 'id']]