evoseer-utils 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,141 @@
1
+ Metadata-Version: 2.1
2
+ Name: evoseer-utils
3
+ Version: 0.1.0
4
+ Summary: Shared library for mutation management across modules
5
+ Author: Your Name
6
+ Author-email: your.email@example.com
7
+ Requires-Python: >=3.9,<4.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Requires-Dist: pydantic (>=2.0,<3.0)
15
+ Description-Content-Type: text/markdown
16
+
17
+ # Mutation Library
18
+
19
+ Shared library for mutation management across modules.
20
+
21
+ ## Components
22
+
23
+ ### `DbConnection` - Singleton DB connection
24
+ ```python
25
+ from libs import DbConnection
26
+
27
+ DbConnection.set_db_path("mutations.db")
28
+ conn = DbConnection.get_connection()
29
+ ```
30
+
31
+ ### `Mutation` - Pydantic model with DB integration
32
+
33
+ #### States
34
+ - `"full"`: Has both id and (chrom, pos, ref, alt)
35
+ - `"miss_id"`: Has coordinates, missing id
36
+ - `"miss_attributes"`: Has id, missing coordinates
37
+
38
+ #### Creation patterns
39
+
40
+ ```python
41
+ # With coordinates (lazy load id)
42
+ mut = Mutation(chrom=17, pos=7577548, ref="C", alt="T")
43
+
44
+ # With id (lazy load attributes)
45
+ mut = Mutation(id=123)
46
+
47
+ # With both
48
+ mut = Mutation(id=123, chrom=17, pos=7577548, ref="C", alt="T")
49
+ ```
50
+
51
+ #### Methods
52
+
53
+ **Instance methods:**
54
+ ```python
55
+ mut.fetch_id_from_db() # Get id from coordinates
56
+ mut.fetch_attributes_from_db() # Get coordinates from id
57
+ mut.ensure_in_db() # Create if missing, return id
58
+ ```
59
+
60
+ **Class methods (batch):**
61
+ ```python
62
+ Mutation.fetch_ids_from_db_batch(mutations)
63
+ Mutation.fetch_attributes_from_db_batch(mutations)
64
+ Mutation.ensure_in_db_batch(mutations)
65
+ ```
66
+
67
+ ## Usage in modules with OutputDescription (fully automatic)
68
+
69
+ `OutputDescription` is a base class that provides automatic DB insertion for module outputs.
70
+
71
+ ```python
72
+ from pydantic import Field
73
+ from typing import ClassVar, List
74
+ from libs import OutputDescription, DbConnection, Mutation
75
+
76
+ class MyModuleOutput(OutputDescription):
77
+ table_name: ClassVar[str] = "tool_mymodule"
78
+ db_fields: ClassVar[List[str]] = ["my_score", "my_prediction"]
79
+
80
+ my_score: float = Field(..., description="Module score")
81
+ my_prediction: str = Field(..., description="Prediction")
82
+
83
+ # Setup
84
+ DbConnection.set_db_path("mutations.db")
85
+
86
+ # Single insertion (automatic table creation + mutation insertion)
87
+ output = MyModuleOutput(
88
+ mutation=Mutation(chrom=17, pos=7577548, ref="C", alt="T"),
89
+ version="1.0.0", # Required field (free text)
90
+ my_score=0.85,
91
+ my_prediction="pathogenic"
92
+ )
93
+ output.insert_to_db() # Creates table if needed, ensures mutation exists, inserts
94
+
95
+ # Batch insertion
96
+ outputs = [...]
97
+ MyModuleOutput.insert_batch_to_db(outputs)
98
+ ```
99
+
100
+ **What happens automatically:**
101
+ - Table creation with correct SQL types (inferred from Python types)
102
+ - Mutation insertion/lookup
103
+ - Index creation on mutation_id
104
+ - `version` field automatically added to table and insertion
105
+ - INSERT OR REPLACE (idempotent)
106
+
107
+ **Note:** `version` field is required in all OutputDescription subclasses. Format is free text.
108
+
109
+ ## Chromosome encoding
110
+
111
+ - Autosomes: `1-22`
112
+ - X: `23`
113
+ - Y: `24`
114
+
115
+ Helper functions:
116
+
117
+ ```python
118
+ from libs.src.mutations import chrom_to_int, int_to_chrom
119
+
120
+ chrom_to_int("chr17") # 17
121
+ chrom_to_int("chrX") # 23
122
+ int_to_chrom(23) # "chrX"
123
+ ```
124
+
125
+ ## Tests
126
+
127
+ ```bash
128
+ # From project root
129
+ .venv/bin/python3 libs/tests/test_mutations_lib.py
130
+
131
+ # Or use the test runner
132
+ libs/tests/run_tests.sh
133
+ ```
134
+
135
+ ## Examples
136
+
137
+ ```bash
138
+ python3 example_mutations_lib.py
139
+ python3 modules/boostdm/output_description_example.py
140
+ ```
141
+
@@ -0,0 +1,124 @@
1
+ # Mutation Library
2
+
3
+ Shared library for mutation management across modules.
4
+
5
+ ## Components
6
+
7
+ ### `DbConnection` - Singleton DB connection
8
+ ```python
9
+ from libs import DbConnection
10
+
11
+ DbConnection.set_db_path("mutations.db")
12
+ conn = DbConnection.get_connection()
13
+ ```
14
+
15
+ ### `Mutation` - Pydantic model with DB integration
16
+
17
+ #### States
18
+ - `"full"`: Has both id and (chrom, pos, ref, alt)
19
+ - `"miss_id"`: Has coordinates, missing id
20
+ - `"miss_attributes"`: Has id, missing coordinates
21
+
22
+ #### Creation patterns
23
+
24
+ ```python
25
+ # With coordinates (lazy load id)
26
+ mut = Mutation(chrom=17, pos=7577548, ref="C", alt="T")
27
+
28
+ # With id (lazy load attributes)
29
+ mut = Mutation(id=123)
30
+
31
+ # With both
32
+ mut = Mutation(id=123, chrom=17, pos=7577548, ref="C", alt="T")
33
+ ```
34
+
35
+ #### Methods
36
+
37
+ **Instance methods:**
38
+ ```python
39
+ mut.fetch_id_from_db() # Get id from coordinates
40
+ mut.fetch_attributes_from_db() # Get coordinates from id
41
+ mut.ensure_in_db() # Create if missing, return id
42
+ ```
43
+
44
+ **Class methods (batch):**
45
+ ```python
46
+ Mutation.fetch_ids_from_db_batch(mutations)
47
+ Mutation.fetch_attributes_from_db_batch(mutations)
48
+ Mutation.ensure_in_db_batch(mutations)
49
+ ```
50
+
51
+ ## Usage in modules with OutputDescription (fully automatic)
52
+
53
+ `OutputDescription` is a base class that provides automatic DB insertion for module outputs.
54
+
55
+ ```python
56
+ from pydantic import Field
57
+ from typing import ClassVar, List
58
+ from libs import OutputDescription, DbConnection, Mutation
59
+
60
+ class MyModuleOutput(OutputDescription):
61
+ table_name: ClassVar[str] = "tool_mymodule"
62
+ db_fields: ClassVar[List[str]] = ["my_score", "my_prediction"]
63
+
64
+ my_score: float = Field(..., description="Module score")
65
+ my_prediction: str = Field(..., description="Prediction")
66
+
67
+ # Setup
68
+ DbConnection.set_db_path("mutations.db")
69
+
70
+ # Single insertion (automatic table creation + mutation insertion)
71
+ output = MyModuleOutput(
72
+ mutation=Mutation(chrom=17, pos=7577548, ref="C", alt="T"),
73
+ version="1.0.0", # Required field (free text)
74
+ my_score=0.85,
75
+ my_prediction="pathogenic"
76
+ )
77
+ output.insert_to_db() # Creates table if needed, ensures mutation exists, inserts
78
+
79
+ # Batch insertion
80
+ outputs = [...]
81
+ MyModuleOutput.insert_batch_to_db(outputs)
82
+ ```
83
+
84
+ **What happens automatically:**
85
+ - Table creation with correct SQL types (inferred from Python types)
86
+ - Mutation insertion/lookup
87
+ - Index creation on mutation_id
88
+ - `version` field automatically added to table and insertion
89
+ - INSERT OR REPLACE (idempotent)
90
+
91
+ **Note:** `version` field is required in all OutputDescription subclasses. Format is free text.
92
+
93
+ ## Chromosome encoding
94
+
95
+ - Autosomes: `1-22`
96
+ - X: `23`
97
+ - Y: `24`
98
+
99
+ Helper functions:
100
+
101
+ ```python
102
+ from libs.src.mutations import chrom_to_int, int_to_chrom
103
+
104
+ chrom_to_int("chr17") # 17
105
+ chrom_to_int("chrX") # 23
106
+ int_to_chrom(23) # "chrX"
107
+ ```
108
+
109
+ ## Tests
110
+
111
+ ```bash
112
+ # From project root
113
+ .venv/bin/python3 libs/tests/test_mutations_lib.py
114
+
115
+ # Or use the test runner
116
+ libs/tests/run_tests.sh
117
+ ```
118
+
119
+ ## Examples
120
+
121
+ ```bash
122
+ python3 example_mutations_lib.py
123
+ python3 modules/boostdm/output_description_example.py
124
+ ```
@@ -0,0 +1,52 @@
1
+ [tool.poetry]
2
+ name = "evoseer-utils"
3
+ version = "0.1.0"
4
+ description = "Shared library for mutation management across modules"
5
+ authors = ["Your Name <your.email@example.com>"]
6
+ readme = "README.md"
7
+ packages = [{include = "src"}]
8
+
9
+ [tool.poetry.dependencies]
10
+ python = "^3.9"
11
+ pydantic = "^2.0"
12
+
13
+ [tool.poetry.group.dev.dependencies]
14
+ pytest = "^8.0"
15
+ ruff = "^0.8"
16
+ pre-commit = "^4.0"
17
+
18
+ [build-system]
19
+ requires = ["poetry-core"]
20
+ build-backend = "poetry.core.masonry.api"
21
+
22
+ [tool.pytest.ini_options]
23
+ testpaths = ["tests"]
24
+ python_files = ["test_*.py"]
25
+ python_classes = ["Test*"]
26
+ python_functions = ["test_*"]
27
+ addopts = [
28
+ "-v",
29
+ "--strict-markers",
30
+ "--tb=short",
31
+ ]
32
+
33
+ [tool.ruff]
34
+ line-length = 100
35
+ target-version = "py39"
36
+
37
+ [tool.ruff.lint]
38
+ select = [
39
+ "E", # pycodestyle errors
40
+ "W", # pycodestyle warnings
41
+ "F", # pyflakes
42
+ "I", # isort
43
+ "N", # pep8-naming
44
+ "UP", # pyupgrade
45
+ "B", # flake8-bugbear
46
+ "C4", # flake8-comprehensions
47
+ ]
48
+ ignore = []
49
+
50
+ [tool.ruff.lint.per-file-ignores]
51
+ "__init__.py" = ["F401"] # Allow unused imports in __init__.py
52
+ "tests/*" = ["D"] # Disable docstring requirements in tests
@@ -0,0 +1,5 @@
1
+ from src.db_connection import DbConnection
2
+ from src.mutations import Mutation
3
+ from src.output_description import OutputDescription
4
+
5
+ __all__ = ["DbConnection", "Mutation", "OutputDescription"]
@@ -0,0 +1,80 @@
1
+ """
2
+ Gestion de la connexion à la base de données SQLite (singleton)
3
+ """
4
+
5
+ import sqlite3
6
+ from pathlib import Path
7
+ from typing import Optional
8
+
9
+
10
+ class DbConnection:
11
+ """
12
+ Singleton pour gérer la connexion à la base de données SQLite
13
+
14
+ Usage:
15
+ DbConnection.set_db_path("mutations.db")
16
+ conn = DbConnection.get_connection()
17
+ """
18
+
19
+ _instance: Optional["DbConnection"] = None
20
+ _connection: Optional[sqlite3.Connection] = None
21
+ _db_path: Optional[str] = None
22
+
23
+ def __new__(cls):
24
+ if cls._instance is None:
25
+ cls._instance = super().__new__(cls)
26
+ return cls._instance
27
+
28
+ @classmethod
29
+ def set_db_path(cls, db_path: str) -> None:
30
+ """
31
+ Configure le chemin vers la base de données
32
+
33
+ Args:
34
+ db_path: Chemin vers le fichier SQLite
35
+ """
36
+ if cls._db_path != db_path:
37
+ # Fermer l'ancienne connexion si elle existe
38
+ if cls._connection is not None:
39
+ cls._connection.close()
40
+ cls._connection = None
41
+
42
+ cls._db_path = db_path
43
+
44
+ @classmethod
45
+ def get_connection(cls) -> sqlite3.Connection:
46
+ """
47
+ Retourne la connexion SQLite (crée si nécessaire)
48
+
49
+ Returns:
50
+ Connexion SQLite
51
+
52
+ Raises:
53
+ RuntimeError: Si le chemin DB n'a pas été configuré
54
+ """
55
+ if cls._db_path is None:
56
+ raise RuntimeError("Database path not set. Call DbConnection.set_db_path() first.")
57
+
58
+ if not Path(cls._db_path).exists():
59
+ raise FileNotFoundError(
60
+ f"Database file not found: {cls._db_path}. " f"Run init_database.py first."
61
+ )
62
+
63
+ # Créer la connexion si elle n'existe pas
64
+ if cls._connection is None:
65
+ cls._connection = sqlite3.connect(cls._db_path)
66
+ cls._connection.row_factory = sqlite3.Row # Accès par nom de colonne
67
+
68
+ return cls._connection
69
+
70
+ @classmethod
71
+ def close(cls) -> None:
72
+ """Ferme la connexion à la base de données"""
73
+ if cls._connection is not None:
74
+ cls._connection.close()
75
+ cls._connection = None
76
+
77
+ @classmethod
78
+ def is_configured(cls) -> bool:
79
+ """Vérifie si la connexion est configurée"""
80
+ return cls._db_path is not None
@@ -0,0 +1,406 @@
1
+ """
2
+ Modèle Pydantic pour les mutations génomiques
3
+ """
4
+
5
+ from typing import Literal, Optional
6
+
7
+ from pydantic import BaseModel, field_validator, model_validator
8
+
9
+ from .db_connection import DbConnection
10
+
11
+
12
+ def chrom_to_int(chrom_str: str) -> Optional[int]:
13
+ """Convertit chr1-chr22, chrX, chrY en 1-24"""
14
+ chrom_str = str(chrom_str).upper().replace("chr", "")
15
+ if chrom_str == "X":
16
+ return 23
17
+ elif chrom_str == "Y":
18
+ return 24
19
+ elif chrom_str in ["M", "MT"]:
20
+ return None
21
+ else:
22
+ try:
23
+ return int(chrom_str)
24
+ except ValueError:
25
+ return None
26
+
27
+
28
+ def int_to_chrom(chrom_int: int) -> str:
29
+ """Convertit 1-24 en chr1-chr22, chrX, chrY"""
30
+ if chrom_int == 23:
31
+ return "chrX"
32
+ elif chrom_int == 24:
33
+ return "chrY"
34
+ else:
35
+ return f"chr{chrom_int}"
36
+
37
+
38
+ class Mutation(BaseModel):
39
+ """
40
+ Modèle pour une mutation génomique
41
+
42
+ Attributs:
43
+ id: ID de la mutation dans la DB (auto-généré)
44
+ chrom: Chromosome (1-22, 23=X, 24=Y)
45
+ pos: Position génomique
46
+ ref: Allèle de référence
47
+ alt: Allèle alternatif
48
+
49
+ Validation:
50
+ - Soit id est fourni
51
+ - Soit (chrom, pos, ref, alt) sont tous fournis
52
+ - Soit les deux
53
+
54
+ Usage:
55
+ # Avec ID seulement (lazy load des attributs)
56
+ mutation = Mutation(id=123)
57
+ mutation.fetch_attributes_from_db()
58
+
59
+ # Avec coordonnées (lazy load de l'ID)
60
+ mutation = Mutation(chrom=17, pos=7577548, ref="C", alt="T")
61
+ mutation.fetch_id_from_db()
62
+
63
+ # Avec tout
64
+ mutation = Mutation(id=123, chrom=17, pos=7577548, ref="C", alt="T")
65
+ """
66
+
67
+ id: Optional[int] = None
68
+ chrom: Optional[int] = None
69
+ pos: Optional[int] = None
70
+ ref: Optional[str] = None
71
+ alt: Optional[str] = None
72
+
73
+ @field_validator("chrom", mode="before")
74
+ @classmethod
75
+ def normalize_chrom(cls, v):
76
+ return chrom_to_int(v)
77
+
78
+ @model_validator(mode="after")
79
+ def validate_mutation(self):
80
+ """
81
+ Valide qu'on a soit id, soit (chrom, pos, ref, alt), soit les deux
82
+ """
83
+ has_id = self.id is not None
84
+ has_coords = all(
85
+ [
86
+ self.chrom is not None,
87
+ self.pos is not None,
88
+ self.ref is not None,
89
+ self.alt is not None,
90
+ ]
91
+ )
92
+
93
+ if not has_id and not has_coords:
94
+ raise ValueError("Must provide either 'id' or all of (chrom, pos, ref, alt)")
95
+
96
+ # Vérifier que si on a des coordonnées partielles, elles sont complètes
97
+ coord_fields = [self.chrom, self.pos, self.ref, self.alt]
98
+ partial_coords = any(f is not None for f in coord_fields)
99
+
100
+ if partial_coords and not has_coords:
101
+ raise ValueError("If providing coordinates, must provide all of (chrom, pos, ref, alt)")
102
+
103
+ return self
104
+
105
+ @property
106
+ def state(self) -> Literal["full", "miss_id", "miss_attributes"]:
107
+ """
108
+ Retourne l'état de la mutation
109
+
110
+ Returns:
111
+ - "full": id et attributs présents
112
+ - "miss_id": attributs présents, id manquant
113
+ - "miss_attributes": id présent, attributs manquants
114
+ """
115
+ has_id = self.id is not None
116
+ has_coords = all(
117
+ [
118
+ self.chrom is not None,
119
+ self.pos is not None,
120
+ self.ref is not None,
121
+ self.alt is not None,
122
+ ]
123
+ )
124
+
125
+ if has_id and has_coords:
126
+ return "full"
127
+ elif has_coords:
128
+ return "miss_id"
129
+ else:
130
+ return "miss_attributes"
131
+
132
+ def fetch_id_from_db(self) -> Optional[int]:
133
+ """
134
+ Récupère l'ID de la mutation depuis la DB via (chrom, pos, ref, alt)
135
+ Met à jour self.id si trouvé
136
+
137
+ Returns:
138
+ ID de la mutation ou None si non trouvée
139
+
140
+ Raises:
141
+ ValueError: Si les coordonnées ne sont pas complètes
142
+ """
143
+ if self.state == "miss_attributes":
144
+ raise ValueError("Cannot fetch ID: coordinates (chrom, pos, ref, alt) are required")
145
+
146
+ conn = DbConnection.get_connection()
147
+ cursor = conn.cursor()
148
+
149
+ cursor.execute(
150
+ """
151
+ SELECT id FROM mutations
152
+ WHERE chrom=? AND pos=? AND ref=? AND alt=?
153
+ """,
154
+ (self.chrom, self.pos, self.ref, self.alt),
155
+ )
156
+
157
+ result = cursor.fetchone()
158
+ if result:
159
+ self.id = result["id"]
160
+ return self.id
161
+
162
+ return None
163
+
164
+ def fetch_attributes_from_db(self) -> bool:
165
+ """
166
+ Récupère les attributs de la mutation depuis la DB via l'ID
167
+ Met à jour (chrom, pos, ref, alt) si trouvés
168
+
169
+ Returns:
170
+ True si trouvé, False sinon
171
+
172
+ Raises:
173
+ ValueError: Si l'ID n'est pas fourni
174
+ """
175
+ if self.id is None:
176
+ raise ValueError("Cannot fetch attributes: id is required")
177
+
178
+ conn = DbConnection.get_connection()
179
+ cursor = conn.cursor()
180
+
181
+ cursor.execute(
182
+ """
183
+ SELECT chrom, pos, ref, alt FROM mutations
184
+ WHERE id=?
185
+ """,
186
+ (self.id,),
187
+ )
188
+
189
+ result = cursor.fetchone()
190
+ if result:
191
+ self.chrom = result["chrom"]
192
+ self.pos = result["pos"]
193
+ self.ref = result["ref"]
194
+ self.alt = result["alt"]
195
+ return True
196
+
197
+ return False
198
+
199
+ def ensure_in_db(self, annotate: bool = True) -> int:
200
+ """
201
+ S'assure que la mutation existe dans la DB (crée si nécessaire)
202
+ Met à jour self.id
203
+
204
+ Args:
205
+ annotate: Si True, annote automatiquement avec le contexte génomique
206
+
207
+ Returns:
208
+ ID de la mutation
209
+
210
+ Raises:
211
+ ValueError: Si les coordonnées ne sont pas complètes
212
+ """
213
+ if self.state == "miss_attributes":
214
+ raise ValueError("Cannot ensure in DB: coordinates (chrom, pos, ref, alt) are required")
215
+
216
+ # Si on a déjà l'ID, on vérifie qu'il existe
217
+ if self.id is not None:
218
+ conn = DbConnection.get_connection()
219
+ cursor = conn.cursor()
220
+ cursor.execute("SELECT id FROM mutations WHERE id=?", (self.id,))
221
+ if cursor.fetchone():
222
+ return self.id
223
+
224
+ # Sinon, chercher par coordonnées
225
+ existing_id = self.fetch_id_from_db()
226
+ if existing_id is not None:
227
+ return existing_id
228
+
229
+ # Si pas trouvé, créer
230
+ conn = DbConnection.get_connection()
231
+ cursor = conn.cursor()
232
+
233
+ cursor.execute(
234
+ """
235
+ INSERT INTO mutations (chrom, pos, ref, alt)
236
+ VALUES (?, ?, ?, ?)
237
+ """,
238
+ (self.chrom, self.pos, self.ref, self.alt),
239
+ )
240
+
241
+ self.id = cursor.lastrowid
242
+
243
+ # Annoter si demandé
244
+ if annotate:
245
+ self._annotate_mutation()
246
+
247
+ conn.commit()
248
+ return self.id
249
+
250
+ def _annotate_mutation(self) -> None:
251
+ """
252
+ Annote la mutation avec le contexte génomique
253
+ (méthode interne, appelée par ensure_in_db)
254
+ """
255
+ if self.id is None or self.chrom is None or self.pos is None:
256
+ return
257
+
258
+ conn = DbConnection.get_connection()
259
+ cursor = conn.cursor()
260
+
261
+ # Supprimer les anciennes annotations
262
+ cursor.execute("DELETE FROM mutation_annotations WHERE mutation_id = ?", (self.id,))
263
+
264
+ # Trouver les features qui chevauchent
265
+ cursor.execute(
266
+ """
267
+ SELECT gf.id
268
+ FROM genomic_features gf
269
+ JOIN genes g ON gf.gene_id = g.id
270
+ WHERE g.chrom = ? AND gf.feature_start <= ? AND gf.feature_end >= ?
271
+ """,
272
+ (self.chrom, self.pos, self.pos),
273
+ )
274
+
275
+ feature_ids = [row["id"] for row in cursor.fetchall()]
276
+
277
+ if feature_ids:
278
+ for feature_id in feature_ids:
279
+ cursor.execute(
280
+ """
281
+ INSERT INTO mutation_annotations (mutation_id, feature_id)
282
+ VALUES (?, ?)
283
+ """,
284
+ (self.id, feature_id),
285
+ )
286
+ else:
287
+ # Intergenic
288
+ cursor.execute(
289
+ """
290
+ INSERT INTO mutation_annotations (mutation_id, feature_id)
291
+ VALUES (?, NULL)
292
+ """,
293
+ (self.id,),
294
+ )
295
+
296
+ @classmethod
297
+ def fetch_ids_from_db_batch(cls, mutations: list["Mutation"]) -> None:
298
+ """
299
+ Récupère les IDs pour un batch de mutations (modifie en place)
300
+
301
+ Args:
302
+ mutations: Liste de mutations (doivent avoir chrom, pos, ref, alt)
303
+
304
+ Raises:
305
+ ValueError: Si une mutation n'a pas de coordonnées complètes
306
+ """
307
+ conn = DbConnection.get_connection()
308
+ cursor = conn.cursor()
309
+
310
+ for mutation in mutations:
311
+ if mutation.state == "miss_attributes":
312
+ raise ValueError(f"Mutation {mutation} missing coordinates")
313
+
314
+ cursor.execute(
315
+ """
316
+ SELECT id FROM mutations
317
+ WHERE chrom=? AND pos=? AND ref=? AND alt=?
318
+ """,
319
+ (mutation.chrom, mutation.pos, mutation.ref, mutation.alt),
320
+ )
321
+
322
+ result = cursor.fetchone()
323
+ if result:
324
+ mutation.id = result["id"]
325
+
326
+ @classmethod
327
+ def fetch_attributes_from_db_batch(cls, mutations: list["Mutation"]) -> None:
328
+ """
329
+ Récupère les attributs pour un batch de mutations (modifie en place)
330
+
331
+ Args:
332
+ mutations: Liste de mutations (doivent avoir id)
333
+
334
+ Raises:
335
+ ValueError: Si une mutation n'a pas d'ID
336
+ """
337
+ conn = DbConnection.get_connection()
338
+ cursor = conn.cursor()
339
+
340
+ for mutation in mutations:
341
+ if mutation.id is None:
342
+ raise ValueError(f"Mutation {mutation} missing id")
343
+
344
+ cursor.execute(
345
+ """
346
+ SELECT chrom, pos, ref, alt FROM mutations
347
+ WHERE id=?
348
+ """,
349
+ (mutation.id,),
350
+ )
351
+
352
+ result = cursor.fetchone()
353
+ if result:
354
+ mutation.chrom = result["chrom"]
355
+ mutation.pos = result["pos"]
356
+ mutation.ref = result["ref"]
357
+ mutation.alt = result["alt"]
358
+
359
+ @classmethod
360
+ def ensure_in_db_batch(cls, mutations: list["Mutation"], annotate: bool = True) -> None:
361
+ """
362
+ S'assure que toutes les mutations existent dans la DB (crée si nécessaire)
363
+ Modifie les mutations en place pour ajouter les IDs
364
+
365
+ Args:
366
+ mutations: Liste de mutations (doivent avoir chrom, pos, ref, alt)
367
+ annotate: Si True, annote automatiquement avec le contexte génomique
368
+
369
+ Raises:
370
+ ValueError: Si une mutation n'a pas de coordonnées complètes
371
+ """
372
+ # D'abord, essayer de récupérer les IDs existants
373
+ cls.fetch_ids_from_db_batch(mutations)
374
+
375
+ # Créer les mutations qui n'existent pas
376
+ conn = DbConnection.get_connection()
377
+ cursor = conn.cursor()
378
+
379
+ for mutation in mutations:
380
+ if mutation.id is None:
381
+ # Créer la mutation
382
+ cursor.execute(
383
+ """
384
+ INSERT INTO mutations (chrom, pos, ref, alt)
385
+ VALUES (?, ?, ?, ?)
386
+ """,
387
+ (mutation.chrom, mutation.pos, mutation.ref, mutation.alt),
388
+ )
389
+
390
+ mutation.id = cursor.lastrowid
391
+
392
+ # Annoter si demandé
393
+ if annotate:
394
+ mutation._annotate_mutation()
395
+
396
+ conn.commit()
397
+
398
+ def __repr__(self) -> str:
399
+ if self.state == "full":
400
+ chrom_str = int_to_chrom(self.chrom)
401
+ return f"Mutation(id={self.id}, {chrom_str}:{self.pos} {self.ref}>{self.alt})"
402
+ elif self.state == "miss_id":
403
+ chrom_str = int_to_chrom(self.chrom)
404
+ return f"Mutation({chrom_str}:{self.pos} {self.ref}>{self.alt}, id=?)"
405
+ else:
406
+ return f"Mutation(id={self.id}, coords=?)"
@@ -0,0 +1,131 @@
1
+ from typing import ClassVar, Optional, get_type_hints
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from src.db_connection import DbConnection
6
+ from src.mutations import Mutation
7
+
8
+
9
+ class OutputDescription(BaseModel):
10
+ mutation: Mutation
11
+ version: ClassVar[str]
12
+
13
+ _table_name: ClassVar[str]
14
+ db_fields: ClassVar[list[str]]
15
+
16
+ @property
17
+ def table_name(self) -> str:
18
+ # we do that because it allows automatic views based on "annot_" prefix
19
+ return "annot_" + self._table_name
20
+
21
+ @classmethod
22
+ def _get_all_db_fields(cls) -> list[str]:
23
+ # Always include version automatically
24
+ return ["version"] + cls.db_fields
25
+
26
+ @classmethod
27
+ def _python_type_to_sql(cls, python_type: type) -> str:
28
+ type_map = {
29
+ int: "INTEGER",
30
+ float: "REAL",
31
+ str: "TEXT",
32
+ bool: "INTEGER",
33
+ }
34
+ # Handle Optional types
35
+ origin = getattr(python_type, "__origin__", None)
36
+ if origin is type(None) or str(python_type).startswith("typing.Union"):
37
+ args = getattr(python_type, "__args__", ())
38
+ if args:
39
+ python_type = args[0] if args[0] is not type(None) else args[1]
40
+
41
+ return type_map.get(python_type, "TEXT")
42
+
43
+ @classmethod
44
+ def _ensure_table_exists(cls, table_name: str) -> None:
45
+ conn = DbConnection.get_connection()
46
+ cursor = conn.cursor()
47
+
48
+ type_hints = get_type_hints(cls)
49
+ columns = ["id INTEGER PRIMARY KEY AUTOINCREMENT", "mutation_id INTEGER NOT NULL UNIQUE"]
50
+
51
+ for field_name in cls._get_all_db_fields():
52
+ field_type = type_hints.get(field_name, str)
53
+ sql_type = cls._python_type_to_sql(field_type)
54
+ columns.append(f"{field_name} {sql_type}")
55
+
56
+ columns.append("FOREIGN KEY (mutation_id) REFERENCES mutations(id) ON DELETE CASCADE")
57
+
58
+ cursor.execute(f"""
59
+ CREATE TABLE IF NOT EXISTS {table_name} (
60
+ {', '.join(columns)}
61
+ )
62
+ """)
63
+
64
+ cursor.execute(f"""
65
+ CREATE INDEX IF NOT EXISTS idx_{table_name}_mutation
66
+ ON {table_name}(mutation_id)
67
+ """)
68
+
69
+ conn.commit()
70
+
71
+ def insert_to_db(self, table_name: Optional[str] = None) -> None:
72
+ if table_name is None:
73
+ table_name = self.table_name
74
+
75
+ self._ensure_table_exists(table_name)
76
+ self.mutation.ensure_in_db()
77
+
78
+ conn = DbConnection.get_connection()
79
+ cursor = conn.cursor()
80
+
81
+ all_fields = self._get_all_db_fields()
82
+ fields = ["mutation_id"] + all_fields
83
+ values = [self.mutation.id] + [getattr(self, field) for field in all_fields]
84
+
85
+ placeholders = ", ".join(["?"] * len(values))
86
+
87
+ cursor.execute(
88
+ f"""
89
+ INSERT OR REPLACE INTO {table_name}
90
+ ({', '.join(fields)})
91
+ VALUES ({placeholders})
92
+ """,
93
+ values,
94
+ )
95
+
96
+ conn.commit()
97
+
98
+ @classmethod
99
+ def insert_batch_to_db(
100
+ cls, outputs: list["OutputDescription"], table_name: Optional[str] = None
101
+ ) -> None:
102
+ if table_name is None:
103
+ table_name = cls.table_name
104
+
105
+ cls._ensure_table_exists(table_name)
106
+
107
+ mutations = [output.mutation for output in outputs]
108
+ Mutation.ensure_in_db_batch(mutations)
109
+
110
+ conn = DbConnection.get_connection()
111
+ cursor = conn.cursor()
112
+
113
+ all_fields = cls._get_all_db_fields()
114
+ fields = ["mutation_id"] + all_fields
115
+ placeholders = ", ".join(["?"] * len(fields))
116
+
117
+ values_list = []
118
+ for output in outputs:
119
+ values = [output.mutation.id] + [getattr(output, field) for field in all_fields]
120
+ values_list.append(values)
121
+
122
+ cursor.executemany(
123
+ f"""
124
+ INSERT OR REPLACE INTO {table_name}
125
+ ({', '.join(fields)})
126
+ VALUES ({placeholders})
127
+ """,
128
+ values_list,
129
+ )
130
+
131
+ conn.commit()