mars-ms 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mars_ms-0.1.3/.agent/workflows/agents.md +51 -0
- mars_ms-0.1.3/.github/dependabot.yml +30 -0
- mars_ms-0.1.3/.github/workflows/publish.yml +49 -0
- mars_ms-0.1.3/.github/workflows/tests.yml +33 -0
- mars_ms-0.1.3/.gitignore +90 -0
- mars_ms-0.1.3/CLAUDE.md +47 -0
- mars_ms-0.1.3/LICENSE +21 -0
- mars_ms-0.1.3/PKG-INFO +255 -0
- mars_ms-0.1.3/README.md +218 -0
- mars_ms-0.1.3/Skyline-PRISM-Report/Skyline-PRISM.skyr +28 -0
- mars_ms-0.1.3/check_features.py +73 -0
- mars_ms-0.1.3/mars/__init__.py +30 -0
- mars_ms-0.1.3/mars/calibration.py +678 -0
- mars_ms-0.1.3/mars/cli.py +572 -0
- mars_ms-0.1.3/mars/library.py +840 -0
- mars_ms-0.1.3/mars/matching.py +682 -0
- mars_ms-0.1.3/mars/mzml.py +565 -0
- mars_ms-0.1.3/mars/temperature.py +199 -0
- mars_ms-0.1.3/mars/visualization.py +1541 -0
- mars_ms-0.1.3/mars_ms.egg-info/PKG-INFO +255 -0
- mars_ms-0.1.3/mars_ms.egg-info/SOURCES.txt +34 -0
- mars_ms-0.1.3/mars_ms.egg-info/dependency_links.txt +1 -0
- mars_ms-0.1.3/mars_ms.egg-info/entry_points.txt +2 -0
- mars_ms-0.1.3/mars_ms.egg-info/requires.txt +17 -0
- mars_ms-0.1.3/mars_ms.egg-info/top_level.txt +1 -0
- mars_ms-0.1.3/pyproject.toml +69 -0
- mars_ms-0.1.3/release-notes/RELEASE_NOTES_v0.1.0.md +84 -0
- mars_ms-0.1.3/release-notes/RELEASE_NOTES_v0.1.1.md +17 -0
- mars_ms-0.1.3/release-notes/RELEASE_NOTES_v0.1.2.md +183 -0
- mars_ms-0.1.3/release-notes/RELEASE_NOTES_v0.1.3.md +137 -0
- mars_ms-0.1.3/setup.cfg +4 -0
- mars_ms-0.1.3/tests/__init__.py +1 -0
- mars_ms-0.1.3/tests/test_calibration.py +189 -0
- mars_ms-0.1.3/tests/test_library.py +278 -0
- mars_ms-0.1.3/tests/test_matching.py +105 -0
- mars_ms-0.1.3/tests/test_mzml.py +148 -0
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: How to run tests and linting for the mars project
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Mars Development Workflow
|
|
6
|
+
|
|
7
|
+
## Running Tests
|
|
8
|
+
// turbo
|
|
9
|
+
```bash
|
|
10
|
+
cd /home/maccoss/GitHub-Repo/maccoss/mars
|
|
11
|
+
pytest tests/ -v
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Running Linter (Ruff)
|
|
15
|
+
// turbo
|
|
16
|
+
```bash
|
|
17
|
+
cd /home/maccoss/GitHub-Repo/maccoss/mars
|
|
18
|
+
ruff check mars/
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Auto-fix Linter Issues
|
|
22
|
+
// turbo
|
|
23
|
+
```bash
|
|
24
|
+
cd /home/maccoss/GitHub-Repo/maccoss/mars
|
|
25
|
+
ruff check mars/ --fix
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Running Full CI Check
|
|
29
|
+
// turbo
|
|
30
|
+
```bash
|
|
31
|
+
cd /home/maccoss/GitHub-Repo/maccoss/mars
|
|
32
|
+
ruff check mars/ && pytest tests/ -v
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Running Calibration on Example Data
|
|
36
|
+
```bash
|
|
37
|
+
cd /home/maccoss/GitHub-Repo/maccoss/mars
|
|
38
|
+
mars calibrate \
|
|
39
|
+
--mzml "example-data/Ste-2024-12-02_HeLa_20msIIT_GPFDIA_*.mzML" \
|
|
40
|
+
--prism-csv example-data/Stellar-HeLa-GPF-PRISM.csv \
|
|
41
|
+
--tolerance 0.3 \
|
|
42
|
+
--min-intensity 500 \
|
|
43
|
+
--output-dir example-data/output/
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Installing in Development Mode
|
|
47
|
+
// turbo
|
|
48
|
+
```bash
|
|
49
|
+
cd /home/maccoss/GitHub-Repo/maccoss/mars
|
|
50
|
+
pip install -e .
|
|
51
|
+
```
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Dependabot configuration for automated dependency updates
|
|
2
|
+
# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates
|
|
3
|
+
|
|
4
|
+
version: 2
|
|
5
|
+
updates:
|
|
6
|
+
# Python dependencies (pip)
|
|
7
|
+
- package-ecosystem: "pip"
|
|
8
|
+
directory: "/"
|
|
9
|
+
schedule:
|
|
10
|
+
interval: "weekly"
|
|
11
|
+
day: "monday"
|
|
12
|
+
open-pull-requests-limit: 5
|
|
13
|
+
commit-message:
|
|
14
|
+
prefix: "deps"
|
|
15
|
+
labels:
|
|
16
|
+
- "dependencies"
|
|
17
|
+
- "python"
|
|
18
|
+
|
|
19
|
+
# GitHub Actions
|
|
20
|
+
- package-ecosystem: "github-actions"
|
|
21
|
+
directory: "/"
|
|
22
|
+
schedule:
|
|
23
|
+
interval: "weekly"
|
|
24
|
+
day: "monday"
|
|
25
|
+
open-pull-requests-limit: 3
|
|
26
|
+
commit-message:
|
|
27
|
+
prefix: "ci"
|
|
28
|
+
labels:
|
|
29
|
+
- "dependencies"
|
|
30
|
+
- "github-actions"
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
release:
|
|
5
|
+
types: [published]
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
build:
|
|
9
|
+
name: Build distribution
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v6
|
|
13
|
+
|
|
14
|
+
- name: Set up Python
|
|
15
|
+
uses: actions/setup-python@v6
|
|
16
|
+
with:
|
|
17
|
+
python-version: "3.12"
|
|
18
|
+
|
|
19
|
+
- name: Install build dependencies
|
|
20
|
+
run: python -m pip install --upgrade pip build
|
|
21
|
+
|
|
22
|
+
- name: Build package
|
|
23
|
+
run: python -m build
|
|
24
|
+
|
|
25
|
+
- name: Store distribution packages
|
|
26
|
+
uses: actions/upload-artifact@v6
|
|
27
|
+
with:
|
|
28
|
+
name: python-package-distributions
|
|
29
|
+
path: dist/
|
|
30
|
+
|
|
31
|
+
publish-to-pypi:
|
|
32
|
+
name: Publish to PyPI
|
|
33
|
+
needs: build
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
environment:
|
|
36
|
+
name: pypi
|
|
37
|
+
url: https://pypi.org/p/mars-ms
|
|
38
|
+
permissions:
|
|
39
|
+
id-token: write # Required for trusted publishing
|
|
40
|
+
|
|
41
|
+
steps:
|
|
42
|
+
- name: Download distribution packages
|
|
43
|
+
uses: actions/download-artifact@v7
|
|
44
|
+
with:
|
|
45
|
+
name: python-package-distributions
|
|
46
|
+
path: dist/
|
|
47
|
+
|
|
48
|
+
- name: Publish to PyPI
|
|
49
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
name: Tests
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.10", "3.11", "3.12"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v6
|
|
18
|
+
|
|
19
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
20
|
+
uses: actions/setup-python@v6
|
|
21
|
+
with:
|
|
22
|
+
python-version: ${{ matrix.python-version }}
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: |
|
|
26
|
+
python -m pip install --upgrade pip
|
|
27
|
+
pip install -e ".[dev]"
|
|
28
|
+
|
|
29
|
+
- name: Run tests
|
|
30
|
+
run: pytest tests/ -v --tb=short
|
|
31
|
+
|
|
32
|
+
- name: Lint with ruff
|
|
33
|
+
run: ruff check mars/
|
mars_ms-0.1.3/.gitignore
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# PyInstaller
|
|
30
|
+
*.manifest
|
|
31
|
+
*.spec
|
|
32
|
+
|
|
33
|
+
# Installer logs
|
|
34
|
+
pip-log.txt
|
|
35
|
+
pip-delete-this-directory.txt
|
|
36
|
+
|
|
37
|
+
# Unit test / coverage reports
|
|
38
|
+
htmlcov/
|
|
39
|
+
.tox/
|
|
40
|
+
.nox/
|
|
41
|
+
.coverage
|
|
42
|
+
.coverage.*
|
|
43
|
+
.cache
|
|
44
|
+
nosetests.xml
|
|
45
|
+
coverage.xml
|
|
46
|
+
*.cover
|
|
47
|
+
*.py,cover
|
|
48
|
+
.hypothesis/
|
|
49
|
+
.pytest_cache/
|
|
50
|
+
|
|
51
|
+
# Translations
|
|
52
|
+
*.mo
|
|
53
|
+
*.pot
|
|
54
|
+
|
|
55
|
+
# Environments
|
|
56
|
+
.env
|
|
57
|
+
.venv
|
|
58
|
+
env/
|
|
59
|
+
venv/
|
|
60
|
+
ENV/
|
|
61
|
+
env.bak/
|
|
62
|
+
venv.bak/
|
|
63
|
+
|
|
64
|
+
# IDE settings
|
|
65
|
+
.idea/
|
|
66
|
+
.vscode/
|
|
67
|
+
*.swp
|
|
68
|
+
*.swo
|
|
69
|
+
*~
|
|
70
|
+
|
|
71
|
+
# Jupyter Notebook
|
|
72
|
+
.ipynb_checkpoints
|
|
73
|
+
|
|
74
|
+
# pyenv
|
|
75
|
+
.python-version
|
|
76
|
+
|
|
77
|
+
# mypy
|
|
78
|
+
.mypy_cache/
|
|
79
|
+
.dmypy.json
|
|
80
|
+
dmypy.json
|
|
81
|
+
|
|
82
|
+
# Ruff
|
|
83
|
+
.ruff_cache/
|
|
84
|
+
|
|
85
|
+
# Mac
|
|
86
|
+
.DS_Store
|
|
87
|
+
|
|
88
|
+
# Project-specific
|
|
89
|
+
example-data/
|
|
90
|
+
example-astral-data/
|
mars_ms-0.1.3/CLAUDE.md
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# AGENTS.md
|
|
2
|
+
|
|
3
|
+
This file provides context and instructions for AI agents working on the Mars repository.
|
|
4
|
+
|
|
5
|
+
## Repository Overview
|
|
6
|
+
|
|
7
|
+
Mars (Mass Accuracy Recalibration System) is a tool for calibrating DIA mass spectrometry data from the Thermo Stellar instrument. It uses XGBoost to learn m/z corrections from spectral library matches.
|
|
8
|
+
|
|
9
|
+
## Continuous Integration (CI/CD)
|
|
10
|
+
|
|
11
|
+
The repository uses GitHub Actions for CI/CD, defined in `.github/workflows/`.
|
|
12
|
+
|
|
13
|
+
### Workflows
|
|
14
|
+
|
|
15
|
+
1. **Tests (`tests.yml`)**
|
|
16
|
+
* **Triggers:** Push to `main`, Pull Requests to `main`.
|
|
17
|
+
* **Actions:**
|
|
18
|
+
* Sets up Python 3.10, 3.11, 3.12.
|
|
19
|
+
* Installs dependencies with `pip install -e ".[dev]"`.
|
|
20
|
+
* Runs tests using `pytest tests/ -v --tb=short`.
|
|
21
|
+
* Runs linting with `ruff check mars/`.
|
|
22
|
+
|
|
23
|
+
2. **Publish to PyPI (`publish.yml`)**
|
|
24
|
+
* **Triggers:** Release published.
|
|
25
|
+
* **Actions:**
|
|
26
|
+
* Builds the package (`python -m build`).
|
|
27
|
+
* Publishes to PyPI using Trusted Publishing (OIDC).
|
|
28
|
+
* Requires the tag (e.g., `v0.1.0`) to match the release.
|
|
29
|
+
|
|
30
|
+
## Common Development Tasks
|
|
31
|
+
|
|
32
|
+
* **Install for dev:** `pip install -e ".[dev]"`
|
|
33
|
+
* **Run tests:** `pytest tests/`
|
|
34
|
+
* **Lint:** `ruff check .`
|
|
35
|
+
* **Build package:** `python -m build`
|
|
36
|
+
|
|
37
|
+
## Release Notes
|
|
38
|
+
|
|
39
|
+
When making bug fixes or improvements during development:
|
|
40
|
+
|
|
41
|
+
* **Update the current release notes:** Add any fixes or changes to the current version's release notes file in `release-notes/` (e.g., `RELEASE_NOTES_v0.1.3.md`).
|
|
42
|
+
* **Be specific:** Document what was fixed and why.
|
|
43
|
+
* **Group related changes:** Use appropriate sections (Bug Fixes, Changes, New Features, etc.).
|
|
44
|
+
|
|
45
|
+
## Style Guidelines
|
|
46
|
+
|
|
47
|
+
* **No Emojis:** Do not use emojis in any output, documentation, source code comments, or Jupyter notebooks. Keep all text professional and plain.
|
mars_ms-0.1.3/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Mike MacCoss
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
mars_ms-0.1.3/PKG-INFO
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mars-ms
|
|
3
|
+
Version: 0.1.3
|
|
4
|
+
Summary: Mass Accuracy Recalibration System
|
|
5
|
+
Author-email: MacCoss Lab <maccoss@uw.edu>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/maccoss/mars
|
|
8
|
+
Project-URL: Repository, https://github.com/maccoss/mars
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Science/Research
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: numpy>=1.24
|
|
21
|
+
Requires-Dist: pandas>=2.0
|
|
22
|
+
Requires-Dist: pyarrow>=14.0
|
|
23
|
+
Requires-Dist: pyteomics>=4.6
|
|
24
|
+
Requires-Dist: psims>=1.3
|
|
25
|
+
Requires-Dist: xgboost>=2.0
|
|
26
|
+
Requires-Dist: scikit-learn>=1.3
|
|
27
|
+
Requires-Dist: matplotlib>=3.7
|
|
28
|
+
Requires-Dist: seaborn>=0.12
|
|
29
|
+
Requires-Dist: click>=8.0
|
|
30
|
+
Requires-Dist: tqdm>=4.65
|
|
31
|
+
Requires-Dist: lxml>=4.9
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
34
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
35
|
+
Requires-Dist: ruff>=0.1; extra == "dev"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# MARS: Mass Accuracy Recalibration System
|
|
39
|
+
|
|
40
|
+
[](https://pypi.org/project/mars-ms/)
|
|
41
|
+
[](https://pypi.org/project/mars-ms/)
|
|
42
|
+
[](https://github.com/maccoss/mars/blob/main/LICENSE)
|
|
43
|
+
|
|
44
|
+
Mass recalibration tool for DIA mass spectrometry data from the ThermoFisher Stellar.
|
|
45
|
+
|
|
46
|
+
## Overview
|
|
47
|
+
|
|
48
|
+
Mars learns m/z calibration corrections from spectral library fragment matches. The XGBoost model accounts for:
|
|
49
|
+
|
|
50
|
+
- **Fragment m/z**: Mass-dependent calibration bias
|
|
51
|
+
- **Peak intensity**: Higher intensity peaks provide more reliable calibration
|
|
52
|
+
- **Absolute time**: Calibration drift over the acquisition run
|
|
53
|
+
- **Spectrum TIC**: Space charge effects from high ion current
|
|
54
|
+
- **Ion injection time**: Signal accumulation duration effects
|
|
55
|
+
- **Precursor m/z**: DIA isolation window-specific effects
|
|
56
|
+
- **RF temperatures**: Thermal effects from RF amplifier (RFA2) and electronics (RFC2)
|
|
57
|
+
|
|
58
|
+
## How It Works
|
|
59
|
+
|
|
60
|
+
1. **Fragment matching**: For each DIA MS2 spectrum, Mars finds library peptides where:
|
|
61
|
+
- The precursor m/z falls within the DIA isolation window
|
|
62
|
+
- The spectrum RT is within the peptide's elution window
|
|
63
|
+
|
|
64
|
+
2. **Peak selection**: For each expected fragment, Mars selects the **most intense** peak within the m/z tolerance (not the closest), filtering for minimum intensity
|
|
65
|
+
|
|
66
|
+
3. **Model training**: Each matched fragment becomes a training point with up to 16 features (see [Model Features](#model-features)) and target: `delta_mz`
|
|
67
|
+
|
|
68
|
+
4. **Calibration**: The trained model predicts m/z corrections for all peaks in the mzML
|
|
69
|
+
|
|
70
|
+
## Installation
|
|
71
|
+
|
|
72
|
+
### From PyPI (recommended)
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
pip install mars-ms
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### From source
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
git clone https://github.com/maccoss/mars.git
|
|
82
|
+
cd mars
|
|
83
|
+
pip install -e .
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
**Requirements**: Python 3.10+, pyteomics, xgboost, numpy, pandas, matplotlib, seaborn, click
|
|
87
|
+
|
|
88
|
+
## Usage
|
|
89
|
+
|
|
90
|
+
### With PRISM CSV (Recommended)
|
|
91
|
+
|
|
92
|
+
Use a CSV file created using this [Skyline report](Skyline-PRISM-Report/Skyline-PRISM.skyr) for accurate RT windows:
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
mars calibrate \
|
|
96
|
+
--mzml data.mzML \
|
|
97
|
+
--prism-csv prism_report.csv \
|
|
98
|
+
--tolerance 0.2 \
|
|
99
|
+
--min-intensity 500 \
|
|
100
|
+
--max-isolation-window 5.0 \
|
|
101
|
+
--output-dir output/
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Basic Usage
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
mars calibrate --mzml data.mzML --library library.blib --output-dir output/
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### Batch Processing
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
# Multiple files with wildcard
|
|
114
|
+
mars calibrate --mzml "*.mzML" --library library.blib --output-dir output/
|
|
115
|
+
|
|
116
|
+
# All files in directory
|
|
117
|
+
mars calibrate --mzml-dir /path/to/data/ --library library.blib --output-dir output/
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
## Options
|
|
121
|
+
|
|
122
|
+
| Option | Default | Description |
|
|
123
|
+
|--------|---------|-------------|
|
|
124
|
+
| `--mzml` | - | Path to mzML file or glob pattern |
|
|
125
|
+
| `--mzml-dir` | - | Directory containing mzML files |
|
|
126
|
+
| `--library` | - | Path to blib spectral library (ignored if using PRISM Skyline Report) |
|
|
127
|
+
| `--prism-csv` | - | PRISM Skyline CSV with Start/End Time columns |
|
|
128
|
+
| `--tolerance` | 0.7 | m/z tolerance for matching (Th), ignored if `--tolerance-ppm` is set |
|
|
129
|
+
| `--tolerance-ppm` | - | m/z tolerance for matching in ppm (e.g., 10 for Astral), overrides `--tolerance` |
|
|
130
|
+
| `--min-intensity` | 500 | Minimum peak intensity for matching |
|
|
131
|
+
| `--max-isolation-window` | - | Maximum isolation window width (m/z) to include |
|
|
132
|
+
| `--temperature-dir` | - | Directory with RF temperature CSV files |
|
|
133
|
+
| `--output-dir` | `.` | Output directory |
|
|
134
|
+
| `--model-path` | - | Path to save/load calibration model |
|
|
135
|
+
| `--no-recalibrate` | - | Only train model, don't write mzML |
|
|
136
|
+
|
|
137
|
+
## RT Window Behavior
|
|
138
|
+
|
|
139
|
+
- **With `--prism-csv`**: Uses exact `Start Time` and `End Time` from Skyline
|
|
140
|
+
- **Without `--prism-csv`**: Uses ±5 seconds around the blib library RT
|
|
141
|
+
|
|
142
|
+
## Isolation Window Filtering
|
|
143
|
+
|
|
144
|
+
Some DIA methods use wide isolation windows (e.g., 20-30 m/z) that may reduce calibration accuracy. Use `--max-isolation-window` to exclude these:
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
# Exclude windows wider than 5 m/z
|
|
148
|
+
mars calibrate --mzml data.mzML --prism-csv report.csv --max-isolation-window 5.0
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
This filters spectra during both model training and mzML recalibration. Typical narrow DIA windows (~1 m/z) are retained.
|
|
152
|
+
|
|
153
|
+
## Output Files
|
|
154
|
+
|
|
155
|
+
| File | Description |
|
|
156
|
+
|------|-------------|
|
|
157
|
+
| `{input}-mars.mzML` | Recalibrated mzML file |
|
|
158
|
+
| `mars_model.pkl` | Trained XGBoost calibration model |
|
|
159
|
+
| `mars_qc_histogram.png` | Delta m/z distribution (before/after) |
|
|
160
|
+
| `mars_qc_heatmap.png` | 2D heatmap (RT × m/z, color = delta) |
|
|
161
|
+
| `mars_qc_intensity_vs_error.png` | Intensity vs mass error hexbin |
|
|
162
|
+
| `mars_qc_rt_vs_error.png` | RT vs mass error hexbin |
|
|
163
|
+
| `mars_qc_mz_vs_error.png` | Fragment m/z vs mass error hexbin |
|
|
164
|
+
| `mars_qc_tic_vs_error.png` | TIC vs mass error hexbin |
|
|
165
|
+
| `mars_qc_injection_time_vs_error.png` | Injection time vs mass error hexbin |
|
|
166
|
+
| `mars_qc_tic_injection_time_vs_error.png` | TIC×injection time vs mass error hexbin |
|
|
167
|
+
| `mars_qc_fragment_ions_vs_error.png` | Fragment ions vs mass error hexbin |
|
|
168
|
+
| `mars_qc_rfa2_temperature_vs_error.png` | RFA2 temperature vs error (if available) |
|
|
169
|
+
| `mars_qc_rfc2_temperature_vs_error.png` | RFC2 temperature vs error (if available) |
|
|
170
|
+
| `mars_qc_feature_importance.png` | Model feature importance |
|
|
171
|
+
| `mars_qc_summary.txt` | Calibration statistics |
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
## Model Features
|
|
175
|
+
|
|
176
|
+
The XGBoost model uses up to 16 features to predict m/z corrections:
|
|
177
|
+
|
|
178
|
+
1. `precursor_mz` - DIA isolation window center
|
|
179
|
+
2. `fragment_mz` - Fragment m/z being calibrated
|
|
180
|
+
3. `absolute_time` - Time relative to first acquisition (seconds)
|
|
181
|
+
4. `log_tic` - Log10 of spectrum total ion current
|
|
182
|
+
5. `log_intensity` - Log10 of peak intensity
|
|
183
|
+
6. `injection_time` - Ion injection time (seconds)
|
|
184
|
+
7. `tic_injection_time` - TIC × injection time product
|
|
185
|
+
8. `fragment_ions` - Fragment intensity × injection time (total ions, not rate)
|
|
186
|
+
9. `ions_above_0_1` - Total ions in (X+0.5, X+1.5] Th range above fragment m/z
|
|
187
|
+
10. `ions_above_1_2` - Total ions in (X+1.5, X+2.5] Th range above fragment m/z
|
|
188
|
+
11. `ions_above_2_3` - Total ions in (X+2.5, X+3.5] Th range above fragment m/z
|
|
189
|
+
12. `ions_below_0_1` - Total ions in (X-1.5, X-0.5] Th range below fragment m/z
|
|
190
|
+
13. `ions_below_1_2` - Total ions in (X-2.5, X-1.5] Th range below fragment m/z
|
|
191
|
+
14. `ions_below_2_3` - Total ions in (X-3.5, X-2.5] Th range below fragment m/z
|
|
192
|
+
15. `adjacent_ratio_0_1` - ions_above_0_1 / fragment_ions (relative adjacent density)
|
|
193
|
+
16. `adjacent_ratio_1_2` - ions_above_1_2 / fragment_ions
|
|
194
|
+
17. `adjacent_ratio_2_3` - ions_above_2_3 / fragment_ions
|
|
195
|
+
18. `adjacent_ratio_below_0_1` - ions_below_0_1 / fragment_ions
|
|
196
|
+
19. `adjacent_ratio_below_1_2` - ions_below_1_2 / fragment_ions
|
|
197
|
+
20. `adjacent_ratio_below_2_3` - ions_below_2_3 / fragment_ions
|
|
198
|
+
21. `rfa2_temp` - RF amplifier temperature (°C)
|
|
199
|
+
22. `rfc2_temp` - RF electronics temperature (°C)
|
|
200
|
+
|
|
201
|
+
**Note**: Features 6-20 are only included if injection time data is available in the mzML files. Features 21-22 are only included if temperature CSV files are provided. Features with universally missing data are automatically excluded.
|
|
202
|
+
|
|
203
|
+
## RF Temperature Data
|
|
204
|
+
|
|
205
|
+
Mars can incorporate RF temperature data to model thermal effects on mass accuracy. Temperature data is loaded from CSV files exported from Thermo chromatogram exports.
|
|
206
|
+
|
|
207
|
+
### Temperature File Format
|
|
208
|
+
|
|
209
|
+
Temperature CSV files should be in Thermo's chromatogram export format:
|
|
210
|
+
- 3 header lines (skipped)
|
|
211
|
+
- Columns: `Time(min)`, temperature value
|
|
212
|
+
|
|
213
|
+
Example naming convention:
|
|
214
|
+
```
|
|
215
|
+
RFA2-Sample_Name.csv # RF amplifier temperature
|
|
216
|
+
RFC2-Sample_Name.csv # RF electronics temperature
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
### Usage with Temperature Data
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
mars calibrate \
|
|
223
|
+
--mzml data.mzML \
|
|
224
|
+
--prism-csv report.csv \
|
|
225
|
+
--temperature-dir /path/to/temperature_csvs/ \
|
|
226
|
+
--output-dir output/
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
Mars automatically finds temperature files matching each mzML filename and interpolates temperature values at each spectrum's retention time.
|
|
230
|
+
|
|
231
|
+
## Python API
|
|
232
|
+
|
|
233
|
+
```python
|
|
234
|
+
from mars import load_blib, read_dia_spectra, match_library_to_spectra, MzCalibrator
|
|
235
|
+
|
|
236
|
+
# Load library and match
|
|
237
|
+
library = load_blib("library.blib")
|
|
238
|
+
spectra = read_dia_spectra("data.mzML")
|
|
239
|
+
matches = match_library_to_spectra(library, spectra, mz_tolerance=0.2, min_intensity=1500)
|
|
240
|
+
|
|
241
|
+
# Train and save model
|
|
242
|
+
calibrator = MzCalibrator()
|
|
243
|
+
calibrator.fit(matches)
|
|
244
|
+
calibrator.save("model.pkl")
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
## Requirements
|
|
248
|
+
|
|
249
|
+
- **Spectral library**: blib format from Skyline with fragment annotations
|
|
250
|
+
- **mzML files**: DIA data from Thermo Stellar (or similar unit resolution instrument)
|
|
251
|
+
- **PRISM CSV** (optional): Skyline report with `Start Time`, `End Time`, `Replicate Name` columns
|
|
252
|
+
|
|
253
|
+
## License
|
|
254
|
+
|
|
255
|
+
MIT
|