evoseer-utils 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- evoseer_utils-0.1.0/PKG-INFO +141 -0
- evoseer_utils-0.1.0/README.md +124 -0
- evoseer_utils-0.1.0/pyproject.toml +52 -0
- evoseer_utils-0.1.0/src/__init__.py +5 -0
- evoseer_utils-0.1.0/src/db_connection.py +80 -0
- evoseer_utils-0.1.0/src/mutations.py +406 -0
- evoseer_utils-0.1.0/src/output_description.py +131 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: evoseer-utils
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Shared library for mutation management across modules
|
|
5
|
+
Author: Your Name
|
|
6
|
+
Author-email: your.email@example.com
|
|
7
|
+
Requires-Python: >=3.9,<4.0
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Requires-Dist: pydantic (>=2.0,<3.0)
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# Mutation Library
|
|
18
|
+
|
|
19
|
+
Shared library for mutation management across modules.
|
|
20
|
+
|
|
21
|
+
## Components
|
|
22
|
+
|
|
23
|
+
### `DbConnection` - Singleton DB connection
|
|
24
|
+
```python
|
|
25
|
+
from libs import DbConnection
|
|
26
|
+
|
|
27
|
+
DbConnection.set_db_path("mutations.db")
|
|
28
|
+
conn = DbConnection.get_connection()
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
### `Mutation` - Pydantic model with DB integration
|
|
32
|
+
|
|
33
|
+
#### States
|
|
34
|
+
- `"full"`: Has both id and (chrom, pos, ref, alt)
|
|
35
|
+
- `"miss_id"`: Has coordinates, missing id
|
|
36
|
+
- `"miss_attributes"`: Has id, missing coordinates
|
|
37
|
+
|
|
38
|
+
#### Creation patterns
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
# With coordinates (lazy load id)
|
|
42
|
+
mut = Mutation(chrom=17, pos=7577548, ref="C", alt="T")
|
|
43
|
+
|
|
44
|
+
# With id (lazy load attributes)
|
|
45
|
+
mut = Mutation(id=123)
|
|
46
|
+
|
|
47
|
+
# With both
|
|
48
|
+
mut = Mutation(id=123, chrom=17, pos=7577548, ref="C", alt="T")
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
#### Methods
|
|
52
|
+
|
|
53
|
+
**Instance methods:**
|
|
54
|
+
```python
|
|
55
|
+
mut.fetch_id_from_db() # Get id from coordinates
|
|
56
|
+
mut.fetch_attributes_from_db() # Get coordinates from id
|
|
57
|
+
mut.ensure_in_db() # Create if missing, return id
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
**Class methods (batch):**
|
|
61
|
+
```python
|
|
62
|
+
Mutation.fetch_ids_from_db_batch(mutations)
|
|
63
|
+
Mutation.fetch_attributes_from_db_batch(mutations)
|
|
64
|
+
Mutation.ensure_in_db_batch(mutations)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
## Usage in modules with OutputDescription (fully automatic)
|
|
68
|
+
|
|
69
|
+
`OutputDescription` is a base class that provides automatic DB insertion for module outputs.
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
from pydantic import Field
|
|
73
|
+
from typing import ClassVar, List
|
|
74
|
+
from libs import OutputDescription, DbConnection, Mutation
|
|
75
|
+
|
|
76
|
+
class MyModuleOutput(OutputDescription):
|
|
77
|
+
table_name: ClassVar[str] = "tool_mymodule"
|
|
78
|
+
db_fields: ClassVar[List[str]] = ["my_score", "my_prediction"]
|
|
79
|
+
|
|
80
|
+
my_score: float = Field(..., description="Module score")
|
|
81
|
+
my_prediction: str = Field(..., description="Prediction")
|
|
82
|
+
|
|
83
|
+
# Setup
|
|
84
|
+
DbConnection.set_db_path("mutations.db")
|
|
85
|
+
|
|
86
|
+
# Single insertion (automatic table creation + mutation insertion)
|
|
87
|
+
output = MyModuleOutput(
|
|
88
|
+
mutation=Mutation(chrom=17, pos=7577548, ref="C", alt="T"),
|
|
89
|
+
version="1.0.0", # Required field (free text)
|
|
90
|
+
my_score=0.85,
|
|
91
|
+
my_prediction="pathogenic"
|
|
92
|
+
)
|
|
93
|
+
output.insert_to_db() # Creates table if needed, ensures mutation exists, inserts
|
|
94
|
+
|
|
95
|
+
# Batch insertion
|
|
96
|
+
outputs = [...]
|
|
97
|
+
MyModuleOutput.insert_batch_to_db(outputs)
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**What happens automatically:**
|
|
101
|
+
- Table creation with correct SQL types (inferred from Python types)
|
|
102
|
+
- Mutation insertion/lookup
|
|
103
|
+
- Index creation on mutation_id
|
|
104
|
+
- `version` field automatically added to table and insertion
|
|
105
|
+
- INSERT OR REPLACE (idempotent)
|
|
106
|
+
|
|
107
|
+
**Note:** `version` field is required in all OutputDescription subclasses. Format is free text.
|
|
108
|
+
|
|
109
|
+
## Chromosome encoding
|
|
110
|
+
|
|
111
|
+
- Autosomes: `1-22`
|
|
112
|
+
- X: `23`
|
|
113
|
+
- Y: `24`
|
|
114
|
+
|
|
115
|
+
Helper functions:
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
from libs.src.mutations import chrom_to_int, int_to_chrom
|
|
119
|
+
|
|
120
|
+
chrom_to_int("chr17") # 17
|
|
121
|
+
chrom_to_int("chrX") # 23
|
|
122
|
+
int_to_chrom(23) # "chrX"
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Tests
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
# From project root
|
|
129
|
+
.venv/bin/python3 libs/tests/test_mutations_lib.py
|
|
130
|
+
|
|
131
|
+
# Or use the test runner
|
|
132
|
+
libs/tests/run_tests.sh
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## Examples
|
|
136
|
+
|
|
137
|
+
```bash
|
|
138
|
+
python3 example_mutations_lib.py
|
|
139
|
+
python3 modules/boostdm/output_description_example.py
|
|
140
|
+
```
|
|
141
|
+
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# Mutation Library
|
|
2
|
+
|
|
3
|
+
Shared library for mutation management across modules.
|
|
4
|
+
|
|
5
|
+
## Components
|
|
6
|
+
|
|
7
|
+
### `DbConnection` - Singleton DB connection
|
|
8
|
+
```python
|
|
9
|
+
from libs import DbConnection
|
|
10
|
+
|
|
11
|
+
DbConnection.set_db_path("mutations.db")
|
|
12
|
+
conn = DbConnection.get_connection()
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
### `Mutation` - Pydantic model with DB integration
|
|
16
|
+
|
|
17
|
+
#### States
|
|
18
|
+
- `"full"`: Has both id and (chrom, pos, ref, alt)
|
|
19
|
+
- `"miss_id"`: Has coordinates, missing id
|
|
20
|
+
- `"miss_attributes"`: Has id, missing coordinates
|
|
21
|
+
|
|
22
|
+
#### Creation patterns
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
# With coordinates (lazy load id)
|
|
26
|
+
mut = Mutation(chrom=17, pos=7577548, ref="C", alt="T")
|
|
27
|
+
|
|
28
|
+
# With id (lazy load attributes)
|
|
29
|
+
mut = Mutation(id=123)
|
|
30
|
+
|
|
31
|
+
# With both
|
|
32
|
+
mut = Mutation(id=123, chrom=17, pos=7577548, ref="C", alt="T")
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
#### Methods
|
|
36
|
+
|
|
37
|
+
**Instance methods:**
|
|
38
|
+
```python
|
|
39
|
+
mut.fetch_id_from_db() # Get id from coordinates
|
|
40
|
+
mut.fetch_attributes_from_db() # Get coordinates from id
|
|
41
|
+
mut.ensure_in_db() # Create if missing, return id
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
**Class methods (batch):**
|
|
45
|
+
```python
|
|
46
|
+
Mutation.fetch_ids_from_db_batch(mutations)
|
|
47
|
+
Mutation.fetch_attributes_from_db_batch(mutations)
|
|
48
|
+
Mutation.ensure_in_db_batch(mutations)
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Usage in modules with OutputDescription (fully automatic)
|
|
52
|
+
|
|
53
|
+
`OutputDescription` is a base class that provides automatic DB insertion for module outputs.
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
from pydantic import Field
|
|
57
|
+
from typing import ClassVar, List
|
|
58
|
+
from libs import OutputDescription, DbConnection, Mutation
|
|
59
|
+
|
|
60
|
+
class MyModuleOutput(OutputDescription):
|
|
61
|
+
table_name: ClassVar[str] = "tool_mymodule"
|
|
62
|
+
db_fields: ClassVar[List[str]] = ["my_score", "my_prediction"]
|
|
63
|
+
|
|
64
|
+
my_score: float = Field(..., description="Module score")
|
|
65
|
+
my_prediction: str = Field(..., description="Prediction")
|
|
66
|
+
|
|
67
|
+
# Setup
|
|
68
|
+
DbConnection.set_db_path("mutations.db")
|
|
69
|
+
|
|
70
|
+
# Single insertion (automatic table creation + mutation insertion)
|
|
71
|
+
output = MyModuleOutput(
|
|
72
|
+
mutation=Mutation(chrom=17, pos=7577548, ref="C", alt="T"),
|
|
73
|
+
version="1.0.0", # Required field (free text)
|
|
74
|
+
my_score=0.85,
|
|
75
|
+
my_prediction="pathogenic"
|
|
76
|
+
)
|
|
77
|
+
output.insert_to_db() # Creates table if needed, ensures mutation exists, inserts
|
|
78
|
+
|
|
79
|
+
# Batch insertion
|
|
80
|
+
outputs = [...]
|
|
81
|
+
MyModuleOutput.insert_batch_to_db(outputs)
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
**What happens automatically:**
|
|
85
|
+
- Table creation with correct SQL types (inferred from Python types)
|
|
86
|
+
- Mutation insertion/lookup
|
|
87
|
+
- Index creation on mutation_id
|
|
88
|
+
- `version` field automatically added to table and insertion
|
|
89
|
+
- INSERT OR REPLACE (idempotent)
|
|
90
|
+
|
|
91
|
+
**Note:** `version` field is required in all OutputDescription subclasses. Format is free text.
|
|
92
|
+
|
|
93
|
+
## Chromosome encoding
|
|
94
|
+
|
|
95
|
+
- Autosomes: `1-22`
|
|
96
|
+
- X: `23`
|
|
97
|
+
- Y: `24`
|
|
98
|
+
|
|
99
|
+
Helper functions:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
from libs.src.mutations import chrom_to_int, int_to_chrom
|
|
103
|
+
|
|
104
|
+
chrom_to_int("chr17") # 17
|
|
105
|
+
chrom_to_int("chrX") # 23
|
|
106
|
+
int_to_chrom(23) # "chrX"
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## Tests
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
# From project root
|
|
113
|
+
.venv/bin/python3 libs/tests/test_mutations_lib.py
|
|
114
|
+
|
|
115
|
+
# Or use the test runner
|
|
116
|
+
libs/tests/run_tests.sh
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Examples
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
python3 example_mutations_lib.py
|
|
123
|
+
python3 modules/boostdm/output_description_example.py
|
|
124
|
+
```
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "evoseer-utils"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Shared library for mutation management across modules"
|
|
5
|
+
authors = ["Your Name <your.email@example.com>"]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
packages = [{include = "src"}]
|
|
8
|
+
|
|
9
|
+
[tool.poetry.dependencies]
|
|
10
|
+
python = "^3.9"
|
|
11
|
+
pydantic = "^2.0"
|
|
12
|
+
|
|
13
|
+
[tool.poetry.group.dev.dependencies]
|
|
14
|
+
pytest = "^8.0"
|
|
15
|
+
ruff = "^0.8"
|
|
16
|
+
pre-commit = "^4.0"
|
|
17
|
+
|
|
18
|
+
[build-system]
|
|
19
|
+
requires = ["poetry-core"]
|
|
20
|
+
build-backend = "poetry.core.masonry.api"
|
|
21
|
+
|
|
22
|
+
[tool.pytest.ini_options]
|
|
23
|
+
testpaths = ["tests"]
|
|
24
|
+
python_files = ["test_*.py"]
|
|
25
|
+
python_classes = ["Test*"]
|
|
26
|
+
python_functions = ["test_*"]
|
|
27
|
+
addopts = [
|
|
28
|
+
"-v",
|
|
29
|
+
"--strict-markers",
|
|
30
|
+
"--tb=short",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[tool.ruff]
|
|
34
|
+
line-length = 100
|
|
35
|
+
target-version = "py39"
|
|
36
|
+
|
|
37
|
+
[tool.ruff.lint]
|
|
38
|
+
select = [
|
|
39
|
+
"E", # pycodestyle errors
|
|
40
|
+
"W", # pycodestyle warnings
|
|
41
|
+
"F", # pyflakes
|
|
42
|
+
"I", # isort
|
|
43
|
+
"N", # pep8-naming
|
|
44
|
+
"UP", # pyupgrade
|
|
45
|
+
"B", # flake8-bugbear
|
|
46
|
+
"C4", # flake8-comprehensions
|
|
47
|
+
]
|
|
48
|
+
ignore = []
|
|
49
|
+
|
|
50
|
+
[tool.ruff.lint.per-file-ignores]
|
|
51
|
+
"__init__.py" = ["F401"] # Allow unused imports in __init__.py
|
|
52
|
+
"tests/*" = ["D"] # Disable docstring requirements in tests
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gestion de la connexion à la base de données SQLite (singleton)
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import sqlite3
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Optional
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DbConnection:
|
|
11
|
+
"""
|
|
12
|
+
Singleton pour gérer la connexion à la base de données SQLite
|
|
13
|
+
|
|
14
|
+
Usage:
|
|
15
|
+
DbConnection.set_db_path("mutations.db")
|
|
16
|
+
conn = DbConnection.get_connection()
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
_instance: Optional["DbConnection"] = None
|
|
20
|
+
_connection: Optional[sqlite3.Connection] = None
|
|
21
|
+
_db_path: Optional[str] = None
|
|
22
|
+
|
|
23
|
+
def __new__(cls):
|
|
24
|
+
if cls._instance is None:
|
|
25
|
+
cls._instance = super().__new__(cls)
|
|
26
|
+
return cls._instance
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def set_db_path(cls, db_path: str) -> None:
|
|
30
|
+
"""
|
|
31
|
+
Configure le chemin vers la base de données
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
db_path: Chemin vers le fichier SQLite
|
|
35
|
+
"""
|
|
36
|
+
if cls._db_path != db_path:
|
|
37
|
+
# Fermer l'ancienne connexion si elle existe
|
|
38
|
+
if cls._connection is not None:
|
|
39
|
+
cls._connection.close()
|
|
40
|
+
cls._connection = None
|
|
41
|
+
|
|
42
|
+
cls._db_path = db_path
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def get_connection(cls) -> sqlite3.Connection:
|
|
46
|
+
"""
|
|
47
|
+
Retourne la connexion SQLite (crée si nécessaire)
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
Connexion SQLite
|
|
51
|
+
|
|
52
|
+
Raises:
|
|
53
|
+
RuntimeError: Si le chemin DB n'a pas été configuré
|
|
54
|
+
"""
|
|
55
|
+
if cls._db_path is None:
|
|
56
|
+
raise RuntimeError("Database path not set. Call DbConnection.set_db_path() first.")
|
|
57
|
+
|
|
58
|
+
if not Path(cls._db_path).exists():
|
|
59
|
+
raise FileNotFoundError(
|
|
60
|
+
f"Database file not found: {cls._db_path}. " f"Run init_database.py first."
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Créer la connexion si elle n'existe pas
|
|
64
|
+
if cls._connection is None:
|
|
65
|
+
cls._connection = sqlite3.connect(cls._db_path)
|
|
66
|
+
cls._connection.row_factory = sqlite3.Row # Accès par nom de colonne
|
|
67
|
+
|
|
68
|
+
return cls._connection
|
|
69
|
+
|
|
70
|
+
@classmethod
|
|
71
|
+
def close(cls) -> None:
|
|
72
|
+
"""Ferme la connexion à la base de données"""
|
|
73
|
+
if cls._connection is not None:
|
|
74
|
+
cls._connection.close()
|
|
75
|
+
cls._connection = None
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def is_configured(cls) -> bool:
|
|
79
|
+
"""Vérifie si la connexion est configurée"""
|
|
80
|
+
return cls._db_path is not None
|
|
@@ -0,0 +1,406 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Modèle Pydantic pour les mutations génomiques
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Literal, Optional
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, field_validator, model_validator
|
|
8
|
+
|
|
9
|
+
from .db_connection import DbConnection
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def chrom_to_int(chrom_str: str) -> Optional[int]:
|
|
13
|
+
"""Convertit chr1-chr22, chrX, chrY en 1-24"""
|
|
14
|
+
chrom_str = str(chrom_str).upper().replace("chr", "")
|
|
15
|
+
if chrom_str == "X":
|
|
16
|
+
return 23
|
|
17
|
+
elif chrom_str == "Y":
|
|
18
|
+
return 24
|
|
19
|
+
elif chrom_str in ["M", "MT"]:
|
|
20
|
+
return None
|
|
21
|
+
else:
|
|
22
|
+
try:
|
|
23
|
+
return int(chrom_str)
|
|
24
|
+
except ValueError:
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def int_to_chrom(chrom_int: int) -> str:
|
|
29
|
+
"""Convertit 1-24 en chr1-chr22, chrX, chrY"""
|
|
30
|
+
if chrom_int == 23:
|
|
31
|
+
return "chrX"
|
|
32
|
+
elif chrom_int == 24:
|
|
33
|
+
return "chrY"
|
|
34
|
+
else:
|
|
35
|
+
return f"chr{chrom_int}"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Mutation(BaseModel):
|
|
39
|
+
"""
|
|
40
|
+
Modèle pour une mutation génomique
|
|
41
|
+
|
|
42
|
+
Attributs:
|
|
43
|
+
id: ID de la mutation dans la DB (auto-généré)
|
|
44
|
+
chrom: Chromosome (1-22, 23=X, 24=Y)
|
|
45
|
+
pos: Position génomique
|
|
46
|
+
ref: Allèle de référence
|
|
47
|
+
alt: Allèle alternatif
|
|
48
|
+
|
|
49
|
+
Validation:
|
|
50
|
+
- Soit id est fourni
|
|
51
|
+
- Soit (chrom, pos, ref, alt) sont tous fournis
|
|
52
|
+
- Soit les deux
|
|
53
|
+
|
|
54
|
+
Usage:
|
|
55
|
+
# Avec ID seulement (lazy load des attributs)
|
|
56
|
+
mutation = Mutation(id=123)
|
|
57
|
+
mutation.fetch_attributes_from_db()
|
|
58
|
+
|
|
59
|
+
# Avec coordonnées (lazy load de l'ID)
|
|
60
|
+
mutation = Mutation(chrom=17, pos=7577548, ref="C", alt="T")
|
|
61
|
+
mutation.fetch_id_from_db()
|
|
62
|
+
|
|
63
|
+
# Avec tout
|
|
64
|
+
mutation = Mutation(id=123, chrom=17, pos=7577548, ref="C", alt="T")
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
id: Optional[int] = None
|
|
68
|
+
chrom: Optional[int] = None
|
|
69
|
+
pos: Optional[int] = None
|
|
70
|
+
ref: Optional[str] = None
|
|
71
|
+
alt: Optional[str] = None
|
|
72
|
+
|
|
73
|
+
@field_validator("chrom", mode="before")
|
|
74
|
+
@classmethod
|
|
75
|
+
def normalize_chrom(cls, v):
|
|
76
|
+
return chrom_to_int(v)
|
|
77
|
+
|
|
78
|
+
@model_validator(mode="after")
|
|
79
|
+
def validate_mutation(self):
|
|
80
|
+
"""
|
|
81
|
+
Valide qu'on a soit id, soit (chrom, pos, ref, alt), soit les deux
|
|
82
|
+
"""
|
|
83
|
+
has_id = self.id is not None
|
|
84
|
+
has_coords = all(
|
|
85
|
+
[
|
|
86
|
+
self.chrom is not None,
|
|
87
|
+
self.pos is not None,
|
|
88
|
+
self.ref is not None,
|
|
89
|
+
self.alt is not None,
|
|
90
|
+
]
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
if not has_id and not has_coords:
|
|
94
|
+
raise ValueError("Must provide either 'id' or all of (chrom, pos, ref, alt)")
|
|
95
|
+
|
|
96
|
+
# Vérifier que si on a des coordonnées partielles, elles sont complètes
|
|
97
|
+
coord_fields = [self.chrom, self.pos, self.ref, self.alt]
|
|
98
|
+
partial_coords = any(f is not None for f in coord_fields)
|
|
99
|
+
|
|
100
|
+
if partial_coords and not has_coords:
|
|
101
|
+
raise ValueError("If providing coordinates, must provide all of (chrom, pos, ref, alt)")
|
|
102
|
+
|
|
103
|
+
return self
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def state(self) -> Literal["full", "miss_id", "miss_attributes"]:
|
|
107
|
+
"""
|
|
108
|
+
Retourne l'état de la mutation
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
- "full": id et attributs présents
|
|
112
|
+
- "miss_id": attributs présents, id manquant
|
|
113
|
+
- "miss_attributes": id présent, attributs manquants
|
|
114
|
+
"""
|
|
115
|
+
has_id = self.id is not None
|
|
116
|
+
has_coords = all(
|
|
117
|
+
[
|
|
118
|
+
self.chrom is not None,
|
|
119
|
+
self.pos is not None,
|
|
120
|
+
self.ref is not None,
|
|
121
|
+
self.alt is not None,
|
|
122
|
+
]
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
if has_id and has_coords:
|
|
126
|
+
return "full"
|
|
127
|
+
elif has_coords:
|
|
128
|
+
return "miss_id"
|
|
129
|
+
else:
|
|
130
|
+
return "miss_attributes"
|
|
131
|
+
|
|
132
|
+
def fetch_id_from_db(self) -> Optional[int]:
|
|
133
|
+
"""
|
|
134
|
+
Récupère l'ID de la mutation depuis la DB via (chrom, pos, ref, alt)
|
|
135
|
+
Met à jour self.id si trouvé
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
ID de la mutation ou None si non trouvée
|
|
139
|
+
|
|
140
|
+
Raises:
|
|
141
|
+
ValueError: Si les coordonnées ne sont pas complètes
|
|
142
|
+
"""
|
|
143
|
+
if self.state == "miss_attributes":
|
|
144
|
+
raise ValueError("Cannot fetch ID: coordinates (chrom, pos, ref, alt) are required")
|
|
145
|
+
|
|
146
|
+
conn = DbConnection.get_connection()
|
|
147
|
+
cursor = conn.cursor()
|
|
148
|
+
|
|
149
|
+
cursor.execute(
|
|
150
|
+
"""
|
|
151
|
+
SELECT id FROM mutations
|
|
152
|
+
WHERE chrom=? AND pos=? AND ref=? AND alt=?
|
|
153
|
+
""",
|
|
154
|
+
(self.chrom, self.pos, self.ref, self.alt),
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
result = cursor.fetchone()
|
|
158
|
+
if result:
|
|
159
|
+
self.id = result["id"]
|
|
160
|
+
return self.id
|
|
161
|
+
|
|
162
|
+
return None
|
|
163
|
+
|
|
164
|
+
def fetch_attributes_from_db(self) -> bool:
|
|
165
|
+
"""
|
|
166
|
+
Récupère les attributs de la mutation depuis la DB via l'ID
|
|
167
|
+
Met à jour (chrom, pos, ref, alt) si trouvés
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
True si trouvé, False sinon
|
|
171
|
+
|
|
172
|
+
Raises:
|
|
173
|
+
ValueError: Si l'ID n'est pas fourni
|
|
174
|
+
"""
|
|
175
|
+
if self.id is None:
|
|
176
|
+
raise ValueError("Cannot fetch attributes: id is required")
|
|
177
|
+
|
|
178
|
+
conn = DbConnection.get_connection()
|
|
179
|
+
cursor = conn.cursor()
|
|
180
|
+
|
|
181
|
+
cursor.execute(
|
|
182
|
+
"""
|
|
183
|
+
SELECT chrom, pos, ref, alt FROM mutations
|
|
184
|
+
WHERE id=?
|
|
185
|
+
""",
|
|
186
|
+
(self.id,),
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
result = cursor.fetchone()
|
|
190
|
+
if result:
|
|
191
|
+
self.chrom = result["chrom"]
|
|
192
|
+
self.pos = result["pos"]
|
|
193
|
+
self.ref = result["ref"]
|
|
194
|
+
self.alt = result["alt"]
|
|
195
|
+
return True
|
|
196
|
+
|
|
197
|
+
return False
|
|
198
|
+
|
|
199
|
+
def ensure_in_db(self, annotate: bool = True) -> int:
|
|
200
|
+
"""
|
|
201
|
+
S'assure que la mutation existe dans la DB (crée si nécessaire)
|
|
202
|
+
Met à jour self.id
|
|
203
|
+
|
|
204
|
+
Args:
|
|
205
|
+
annotate: Si True, annote automatiquement avec le contexte génomique
|
|
206
|
+
|
|
207
|
+
Returns:
|
|
208
|
+
ID de la mutation
|
|
209
|
+
|
|
210
|
+
Raises:
|
|
211
|
+
ValueError: Si les coordonnées ne sont pas complètes
|
|
212
|
+
"""
|
|
213
|
+
if self.state == "miss_attributes":
|
|
214
|
+
raise ValueError("Cannot ensure in DB: coordinates (chrom, pos, ref, alt) are required")
|
|
215
|
+
|
|
216
|
+
# Si on a déjà l'ID, on vérifie qu'il existe
|
|
217
|
+
if self.id is not None:
|
|
218
|
+
conn = DbConnection.get_connection()
|
|
219
|
+
cursor = conn.cursor()
|
|
220
|
+
cursor.execute("SELECT id FROM mutations WHERE id=?", (self.id,))
|
|
221
|
+
if cursor.fetchone():
|
|
222
|
+
return self.id
|
|
223
|
+
|
|
224
|
+
# Sinon, chercher par coordonnées
|
|
225
|
+
existing_id = self.fetch_id_from_db()
|
|
226
|
+
if existing_id is not None:
|
|
227
|
+
return existing_id
|
|
228
|
+
|
|
229
|
+
# Si pas trouvé, créer
|
|
230
|
+
conn = DbConnection.get_connection()
|
|
231
|
+
cursor = conn.cursor()
|
|
232
|
+
|
|
233
|
+
cursor.execute(
|
|
234
|
+
"""
|
|
235
|
+
INSERT INTO mutations (chrom, pos, ref, alt)
|
|
236
|
+
VALUES (?, ?, ?, ?)
|
|
237
|
+
""",
|
|
238
|
+
(self.chrom, self.pos, self.ref, self.alt),
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
self.id = cursor.lastrowid
|
|
242
|
+
|
|
243
|
+
# Annoter si demandé
|
|
244
|
+
if annotate:
|
|
245
|
+
self._annotate_mutation()
|
|
246
|
+
|
|
247
|
+
conn.commit()
|
|
248
|
+
return self.id
|
|
249
|
+
|
|
250
|
+
def _annotate_mutation(self) -> None:
|
|
251
|
+
"""
|
|
252
|
+
Annote la mutation avec le contexte génomique
|
|
253
|
+
(méthode interne, appelée par ensure_in_db)
|
|
254
|
+
"""
|
|
255
|
+
if self.id is None or self.chrom is None or self.pos is None:
|
|
256
|
+
return
|
|
257
|
+
|
|
258
|
+
conn = DbConnection.get_connection()
|
|
259
|
+
cursor = conn.cursor()
|
|
260
|
+
|
|
261
|
+
# Supprimer les anciennes annotations
|
|
262
|
+
cursor.execute("DELETE FROM mutation_annotations WHERE mutation_id = ?", (self.id,))
|
|
263
|
+
|
|
264
|
+
# Trouver les features qui chevauchent
|
|
265
|
+
cursor.execute(
|
|
266
|
+
"""
|
|
267
|
+
SELECT gf.id
|
|
268
|
+
FROM genomic_features gf
|
|
269
|
+
JOIN genes g ON gf.gene_id = g.id
|
|
270
|
+
WHERE g.chrom = ? AND gf.feature_start <= ? AND gf.feature_end >= ?
|
|
271
|
+
""",
|
|
272
|
+
(self.chrom, self.pos, self.pos),
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
feature_ids = [row["id"] for row in cursor.fetchall()]
|
|
276
|
+
|
|
277
|
+
if feature_ids:
|
|
278
|
+
for feature_id in feature_ids:
|
|
279
|
+
cursor.execute(
|
|
280
|
+
"""
|
|
281
|
+
INSERT INTO mutation_annotations (mutation_id, feature_id)
|
|
282
|
+
VALUES (?, ?)
|
|
283
|
+
""",
|
|
284
|
+
(self.id, feature_id),
|
|
285
|
+
)
|
|
286
|
+
else:
|
|
287
|
+
# Intergenic
|
|
288
|
+
cursor.execute(
|
|
289
|
+
"""
|
|
290
|
+
INSERT INTO mutation_annotations (mutation_id, feature_id)
|
|
291
|
+
VALUES (?, NULL)
|
|
292
|
+
""",
|
|
293
|
+
(self.id,),
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
@classmethod
|
|
297
|
+
def fetch_ids_from_db_batch(cls, mutations: list["Mutation"]) -> None:
|
|
298
|
+
"""
|
|
299
|
+
Récupère les IDs pour un batch de mutations (modifie en place)
|
|
300
|
+
|
|
301
|
+
Args:
|
|
302
|
+
mutations: Liste de mutations (doivent avoir chrom, pos, ref, alt)
|
|
303
|
+
|
|
304
|
+
Raises:
|
|
305
|
+
ValueError: Si une mutation n'a pas de coordonnées complètes
|
|
306
|
+
"""
|
|
307
|
+
conn = DbConnection.get_connection()
|
|
308
|
+
cursor = conn.cursor()
|
|
309
|
+
|
|
310
|
+
for mutation in mutations:
|
|
311
|
+
if mutation.state == "miss_attributes":
|
|
312
|
+
raise ValueError(f"Mutation {mutation} missing coordinates")
|
|
313
|
+
|
|
314
|
+
cursor.execute(
|
|
315
|
+
"""
|
|
316
|
+
SELECT id FROM mutations
|
|
317
|
+
WHERE chrom=? AND pos=? AND ref=? AND alt=?
|
|
318
|
+
""",
|
|
319
|
+
(mutation.chrom, mutation.pos, mutation.ref, mutation.alt),
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
result = cursor.fetchone()
|
|
323
|
+
if result:
|
|
324
|
+
mutation.id = result["id"]
|
|
325
|
+
|
|
326
|
+
@classmethod
|
|
327
|
+
def fetch_attributes_from_db_batch(cls, mutations: list["Mutation"]) -> None:
|
|
328
|
+
"""
|
|
329
|
+
Récupère les attributs pour un batch de mutations (modifie en place)
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
mutations: Liste de mutations (doivent avoir id)
|
|
333
|
+
|
|
334
|
+
Raises:
|
|
335
|
+
ValueError: Si une mutation n'a pas d'ID
|
|
336
|
+
"""
|
|
337
|
+
conn = DbConnection.get_connection()
|
|
338
|
+
cursor = conn.cursor()
|
|
339
|
+
|
|
340
|
+
for mutation in mutations:
|
|
341
|
+
if mutation.id is None:
|
|
342
|
+
raise ValueError(f"Mutation {mutation} missing id")
|
|
343
|
+
|
|
344
|
+
cursor.execute(
|
|
345
|
+
"""
|
|
346
|
+
SELECT chrom, pos, ref, alt FROM mutations
|
|
347
|
+
WHERE id=?
|
|
348
|
+
""",
|
|
349
|
+
(mutation.id,),
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
result = cursor.fetchone()
|
|
353
|
+
if result:
|
|
354
|
+
mutation.chrom = result["chrom"]
|
|
355
|
+
mutation.pos = result["pos"]
|
|
356
|
+
mutation.ref = result["ref"]
|
|
357
|
+
mutation.alt = result["alt"]
|
|
358
|
+
|
|
359
|
+
@classmethod
|
|
360
|
+
def ensure_in_db_batch(cls, mutations: list["Mutation"], annotate: bool = True) -> None:
|
|
361
|
+
"""
|
|
362
|
+
S'assure que toutes les mutations existent dans la DB (crée si nécessaire)
|
|
363
|
+
Modifie les mutations en place pour ajouter les IDs
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
mutations: Liste de mutations (doivent avoir chrom, pos, ref, alt)
|
|
367
|
+
annotate: Si True, annote automatiquement avec le contexte génomique
|
|
368
|
+
|
|
369
|
+
Raises:
|
|
370
|
+
ValueError: Si une mutation n'a pas de coordonnées complètes
|
|
371
|
+
"""
|
|
372
|
+
# D'abord, essayer de récupérer les IDs existants
|
|
373
|
+
cls.fetch_ids_from_db_batch(mutations)
|
|
374
|
+
|
|
375
|
+
# Créer les mutations qui n'existent pas
|
|
376
|
+
conn = DbConnection.get_connection()
|
|
377
|
+
cursor = conn.cursor()
|
|
378
|
+
|
|
379
|
+
for mutation in mutations:
|
|
380
|
+
if mutation.id is None:
|
|
381
|
+
# Créer la mutation
|
|
382
|
+
cursor.execute(
|
|
383
|
+
"""
|
|
384
|
+
INSERT INTO mutations (chrom, pos, ref, alt)
|
|
385
|
+
VALUES (?, ?, ?, ?)
|
|
386
|
+
""",
|
|
387
|
+
(mutation.chrom, mutation.pos, mutation.ref, mutation.alt),
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
mutation.id = cursor.lastrowid
|
|
391
|
+
|
|
392
|
+
# Annoter si demandé
|
|
393
|
+
if annotate:
|
|
394
|
+
mutation._annotate_mutation()
|
|
395
|
+
|
|
396
|
+
conn.commit()
|
|
397
|
+
|
|
398
|
+
def __repr__(self) -> str:
|
|
399
|
+
if self.state == "full":
|
|
400
|
+
chrom_str = int_to_chrom(self.chrom)
|
|
401
|
+
return f"Mutation(id={self.id}, {chrom_str}:{self.pos} {self.ref}>{self.alt})"
|
|
402
|
+
elif self.state == "miss_id":
|
|
403
|
+
chrom_str = int_to_chrom(self.chrom)
|
|
404
|
+
return f"Mutation({chrom_str}:{self.pos} {self.ref}>{self.alt}, id=?)"
|
|
405
|
+
else:
|
|
406
|
+
return f"Mutation(id={self.id}, coords=?)"
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
from typing import ClassVar, Optional, get_type_hints
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel
|
|
4
|
+
|
|
5
|
+
from src.db_connection import DbConnection
|
|
6
|
+
from src.mutations import Mutation
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class OutputDescription(BaseModel):
|
|
10
|
+
mutation: Mutation
|
|
11
|
+
version: ClassVar[str]
|
|
12
|
+
|
|
13
|
+
_table_name: ClassVar[str]
|
|
14
|
+
db_fields: ClassVar[list[str]]
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def table_name(self) -> str:
|
|
18
|
+
# we do that because it allows automatic views based on "annot_" prefix
|
|
19
|
+
return "annot_" + self._table_name
|
|
20
|
+
|
|
21
|
+
@classmethod
|
|
22
|
+
def _get_all_db_fields(cls) -> list[str]:
|
|
23
|
+
# Always include version automatically
|
|
24
|
+
return ["version"] + cls.db_fields
|
|
25
|
+
|
|
26
|
+
@classmethod
|
|
27
|
+
def _python_type_to_sql(cls, python_type: type) -> str:
|
|
28
|
+
type_map = {
|
|
29
|
+
int: "INTEGER",
|
|
30
|
+
float: "REAL",
|
|
31
|
+
str: "TEXT",
|
|
32
|
+
bool: "INTEGER",
|
|
33
|
+
}
|
|
34
|
+
# Handle Optional types
|
|
35
|
+
origin = getattr(python_type, "__origin__", None)
|
|
36
|
+
if origin is type(None) or str(python_type).startswith("typing.Union"):
|
|
37
|
+
args = getattr(python_type, "__args__", ())
|
|
38
|
+
if args:
|
|
39
|
+
python_type = args[0] if args[0] is not type(None) else args[1]
|
|
40
|
+
|
|
41
|
+
return type_map.get(python_type, "TEXT")
|
|
42
|
+
|
|
43
|
+
@classmethod
|
|
44
|
+
def _ensure_table_exists(cls, table_name: str) -> None:
|
|
45
|
+
conn = DbConnection.get_connection()
|
|
46
|
+
cursor = conn.cursor()
|
|
47
|
+
|
|
48
|
+
type_hints = get_type_hints(cls)
|
|
49
|
+
columns = ["id INTEGER PRIMARY KEY AUTOINCREMENT", "mutation_id INTEGER NOT NULL UNIQUE"]
|
|
50
|
+
|
|
51
|
+
for field_name in cls._get_all_db_fields():
|
|
52
|
+
field_type = type_hints.get(field_name, str)
|
|
53
|
+
sql_type = cls._python_type_to_sql(field_type)
|
|
54
|
+
columns.append(f"{field_name} {sql_type}")
|
|
55
|
+
|
|
56
|
+
columns.append("FOREIGN KEY (mutation_id) REFERENCES mutations(id) ON DELETE CASCADE")
|
|
57
|
+
|
|
58
|
+
cursor.execute(f"""
|
|
59
|
+
CREATE TABLE IF NOT EXISTS {table_name} (
|
|
60
|
+
{', '.join(columns)}
|
|
61
|
+
)
|
|
62
|
+
""")
|
|
63
|
+
|
|
64
|
+
cursor.execute(f"""
|
|
65
|
+
CREATE INDEX IF NOT EXISTS idx_{table_name}_mutation
|
|
66
|
+
ON {table_name}(mutation_id)
|
|
67
|
+
""")
|
|
68
|
+
|
|
69
|
+
conn.commit()
|
|
70
|
+
|
|
71
|
+
def insert_to_db(self, table_name: Optional[str] = None) -> None:
|
|
72
|
+
if table_name is None:
|
|
73
|
+
table_name = self.table_name
|
|
74
|
+
|
|
75
|
+
self._ensure_table_exists(table_name)
|
|
76
|
+
self.mutation.ensure_in_db()
|
|
77
|
+
|
|
78
|
+
conn = DbConnection.get_connection()
|
|
79
|
+
cursor = conn.cursor()
|
|
80
|
+
|
|
81
|
+
all_fields = self._get_all_db_fields()
|
|
82
|
+
fields = ["mutation_id"] + all_fields
|
|
83
|
+
values = [self.mutation.id] + [getattr(self, field) for field in all_fields]
|
|
84
|
+
|
|
85
|
+
placeholders = ", ".join(["?"] * len(values))
|
|
86
|
+
|
|
87
|
+
cursor.execute(
|
|
88
|
+
f"""
|
|
89
|
+
INSERT OR REPLACE INTO {table_name}
|
|
90
|
+
({', '.join(fields)})
|
|
91
|
+
VALUES ({placeholders})
|
|
92
|
+
""",
|
|
93
|
+
values,
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
conn.commit()
|
|
97
|
+
|
|
98
|
+
@classmethod
|
|
99
|
+
def insert_batch_to_db(
|
|
100
|
+
cls, outputs: list["OutputDescription"], table_name: Optional[str] = None
|
|
101
|
+
) -> None:
|
|
102
|
+
if table_name is None:
|
|
103
|
+
table_name = cls.table_name
|
|
104
|
+
|
|
105
|
+
cls._ensure_table_exists(table_name)
|
|
106
|
+
|
|
107
|
+
mutations = [output.mutation for output in outputs]
|
|
108
|
+
Mutation.ensure_in_db_batch(mutations)
|
|
109
|
+
|
|
110
|
+
conn = DbConnection.get_connection()
|
|
111
|
+
cursor = conn.cursor()
|
|
112
|
+
|
|
113
|
+
all_fields = cls._get_all_db_fields()
|
|
114
|
+
fields = ["mutation_id"] + all_fields
|
|
115
|
+
placeholders = ", ".join(["?"] * len(fields))
|
|
116
|
+
|
|
117
|
+
values_list = []
|
|
118
|
+
for output in outputs:
|
|
119
|
+
values = [output.mutation.id] + [getattr(output, field) for field in all_fields]
|
|
120
|
+
values_list.append(values)
|
|
121
|
+
|
|
122
|
+
cursor.executemany(
|
|
123
|
+
f"""
|
|
124
|
+
INSERT OR REPLACE INTO {table_name}
|
|
125
|
+
({', '.join(fields)})
|
|
126
|
+
VALUES ({placeholders})
|
|
127
|
+
""",
|
|
128
|
+
values_list,
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
conn.commit()
|