keggtangled 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Elisa Márquez Zavala
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: keggtangled
3
+ Version: 0.1.0
4
+ Summary: A KEGG tool for managing compounds, reactions, and pathways
5
+ Author-email: Elisa Márquez Zavala <emarquez@lcg.unam.mx>
6
+ Maintainer-email: Elisa Márquez Zavala <emarquez@lcg.unam.mx>
7
+ Requires-Python: >=3.8
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: biopython>=1.79
11
+ Project-URL: Homepage, https://github.com/emarquezz/KEGGtangled
12
+ Project-URL: Issues, https://github.com/emarquezz/KEGGtangled/issues
13
+ Project-URL: Repository, https://github.com/emarquezz/KEGGtangled
14
+
15
+ # KEGGtangled
16
+ Tie all your KEGG data together. Untangle it your way.
17
+
@@ -0,0 +1,2 @@
1
+ # KEGGtangled
2
+ Tie all your KEGG data together. Untangle it your way.
@@ -0,0 +1,28 @@
1
+ [build-system]
2
+ requires = ["flit_core>=3.12"]
3
+ build-backend = "flit_core.buildapi"
4
+
5
+ [project]
6
+ name = "keggtangled"
7
+ dynamic = ["version"]
8
+ description = "A KEGG tool for managing compounds, reactions, and pathways"
9
+ readme = "README.md"
10
+ authors = [
11
+ { name = "Elisa Márquez Zavala", email = "emarquez@lcg.unam.mx" },
12
+ ]
13
+ maintainers = [
14
+ { name = "Elisa Márquez Zavala", email = "emarquez@lcg.unam.mx" },
15
+ ]
16
+ license = { text = "MIT" }
17
+ requires-python = ">=3.8"
18
+ dependencies = [
19
+ "biopython>=1.79",
20
+ ]
21
+
22
+ [project.urls]
23
+ Homepage = "https://github.com/emarquezz/KEGGtangled"
24
+ Repository = "https://github.com/emarquezz/KEGGtangled"
25
+ Issues = "https://github.com/emarquezz/KEGGtangled/issues"
26
+
27
+ [tool.flit.module]
28
+ name = "keggtangled"
@@ -0,0 +1,522 @@
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+ __version__ = "0.1.0"
4
+
5
+ import re
6
+ import os
7
+ import json
8
+ import io
9
+ import hashlib
10
+ from Bio.KEGG.REST import kegg_link, kegg_get
11
+ from Bio.KEGG.KGML.KGML_parser import read as kgml_read
12
+
13
+ # ----------------------------------------------------------------------
14
+ # Pre‑compiled regular expressions
15
+ # ----------------------------------------------------------------------
16
+ KO_RE = re.compile(r'\[KO:(K\d+)\]')
17
+
18
+ # ----------------------------------------------------------------------
19
+ # Compound class
20
+ # ----------------------------------------------------------------------
21
+ class Compound:
22
+ """
23
+ Represents a KEGG compound (e.g., C00022).
24
+ """
25
+ def __init__(self, compound_id, organism):
26
+ self.id = compound_id # e.g., 'C00022'
27
+ self.organism = organism # Organism instance (needed for KOs / genes)
28
+ self.name = None # human‑readable name
29
+ self.formula = None # molecular formula
30
+ self.mass = None # molecular weight (string from KEGG)
31
+ self.reactions = set() # reaction IDs this compound participates in
32
+
33
+ def __repr__(self):
34
+ return f"Compound({self.id}, {self.name})"
35
+
36
+ def get_kos(self):
37
+ """Return the set of KOs linked to this compound via any reaction."""
38
+ kos = set()
39
+ for rxn_id in self.reactions:
40
+ kos.update(self.organism._reaction_to_kos.get(rxn_id, set()))
41
+ return kos
42
+
43
+ def get_genes(self):
44
+ """Return the set of genes linked to this compound (through KOs and reactions)."""
45
+ genes = set()
46
+ for ko in self.get_kos():
47
+ genes.update(self.organism.get_genes_for_ko(ko))
48
+ return genes
49
+
50
+
51
+ # ----------------------------------------------------------------------
52
+ # Reaction class
53
+ # ----------------------------------------------------------------------
54
+ class Reaction:
55
+ def __init__(self, reaction_id, organism):
56
+ self.reaction_id = reaction_id
57
+ self.organism = organism
58
+ self.ko_to_genes = {}
59
+
60
+ # pathway_id -> {type, substrates, products, substrates_read, products_read,
61
+ # formula_kegg, formula_read}
62
+ self.formula_per_pathway = {}
63
+
64
+ kos = organism._reaction_to_kos.get(reaction_id, set())
65
+ for ko in kos:
66
+ genes = organism.get_genes_for_ko(ko)
67
+ if genes:
68
+ self.ko_to_genes[ko] = genes
69
+
70
+ def __repr__(self):
71
+ return (f"Reaction({self.reaction_id}, {self.organism.org_code}) – "
72
+ f"{len(self.ko_to_genes)} KOs mapped to genes, "
73
+ f"{len(self.formula_per_pathway)} pathway formulas")
74
+
75
+ def get_genes(self):
76
+ all_genes = set()
77
+ for genes in self.ko_to_genes.values():
78
+ all_genes.update(genes)
79
+ return all_genes
80
+
81
+ def get_kos(self):
82
+ return list(self.ko_to_genes.keys())
83
+
84
+
85
+ # ----------------------------------------------------------------------
86
+ # Pathway class
87
+ # ----------------------------------------------------------------------
88
+ class Pathway:
89
+ def __init__(self, pathway_id, gene_kos=None):
90
+ self.id = pathway_id
91
+ if gene_kos is None:
92
+ gene_kos = {}
93
+ self.gene_ids = set(gene_kos.keys())
94
+ self.reaction_ids = set()
95
+
96
+ def add_reactions(self, reaction_ids):
97
+ self.reaction_ids.update(reaction_ids)
98
+
99
+ def __repr__(self):
100
+ return f"Pathway({self.id}, {len(self.gene_ids)} genes, {len(self.reaction_ids)} reactions)"
101
+
102
+
103
+ # ----------------------------------------------------------------------
104
+ # KGML fetcher (cached)
105
+ # ----------------------------------------------------------------------
106
+ def get_pathway_kgml(pathway_id, cache_dir="kegg_cache"):
107
+ """
108
+ Return a parsed KGML pathway object (Bio.KEGG.KGML.Pathway).
109
+ The raw XML is cached on disk.
110
+ """
111
+ os.makedirs(cache_dir, exist_ok=True)
112
+ full_id = f"path:{pathway_id}" if not pathway_id.startswith("path:") else pathway_id
113
+ cache_file = os.path.join(cache_dir, f"{pathway_id}.kgml")
114
+
115
+ if not os.path.exists(cache_file):
116
+ raw_kgml = kegg_get(full_id, "kgml").read()
117
+ with open(cache_file, 'w', encoding='utf-8') as f:
118
+ f.write(raw_kgml)
119
+ else:
120
+ with open(cache_file, 'r', encoding='utf-8') as f:
121
+ raw_kgml = f.read()
122
+
123
+ return kgml_read(io.StringIO(raw_kgml))
124
+
125
+
126
+ # ----------------------------------------------------------------------
127
+ # Organism class (fully integrated with Compound, Reaction, Pathway)
128
+ # ----------------------------------------------------------------------
129
+ class Organism:
130
+ def __init__(self, org_code, batch_size=10, cache_dir="kegg_cache"):
131
+ self.org_code = org_code
132
+ self.batch_size = batch_size
133
+ self.cache_dir = cache_dir
134
+ os.makedirs(self.cache_dir, exist_ok=True)
135
+
136
+ self.pathways = {}
137
+ self.reactions = {}
138
+ self._pathway_reaction_map = {}
139
+ self._gene_pathway_map = {}
140
+ self._ko_to_reactions = {}
141
+ self._ko_to_genes = {}
142
+ self._reaction_to_kos = {}
143
+ self._gene_to_kos = {} # reverse mapping: locus_tag -> set of KOs
144
+
145
+ # Compound objects dictionary
146
+ self._compounds = {} # "C00022" -> Compound instance
147
+ self._compound_cache_file = os.path.join(cache_dir, f"{org_code}_compounds.json")
148
+
149
+ # Bulk pre‑fetching
150
+ self._load_all_ko_genes() # builds _ko_to_genes and _gene_to_kos
151
+ self._prefetch_all_ko_reactions() # builds _ko_to_reactions and _reaction_to_kos
152
+ self._load_compounds()
153
+
154
+ # ------------------------------------------------------------------
155
+ # Caching helper (unchanged)
156
+ # ------------------------------------------------------------------
157
+ def _cache_get(self, key, subdir="", fetcher_func=None, *args, **kwargs):
158
+ cache_subdir = os.path.join(self.cache_dir, subdir)
159
+ os.makedirs(cache_subdir, exist_ok=True)
160
+ key_str = str(key)
161
+ key_hash = hashlib.md5(key_str.encode()).hexdigest()
162
+ cache_file = os.path.join(cache_subdir, f"{key_hash}.txt")
163
+ if os.path.exists(cache_file):
164
+ with open(cache_file, 'r', encoding='utf-8') as f:
165
+ return f.read()
166
+ raw_data = fetcher_func(*args, **kwargs)
167
+ with open(cache_file, 'w', encoding='utf-8') as f:
168
+ f.write(raw_data)
169
+ return raw_data
170
+
171
+ # ------------------------------------------------------------------
172
+ # KO–gene mapping (JSON) + reverse gene→KO mapping
173
+ # ------------------------------------------------------------------
174
+ def _load_all_ko_genes(self):
175
+ json_file = os.path.join(self.cache_dir, f"{self.org_code}_ko_genes.json")
176
+ if os.path.exists(json_file):
177
+ with open(json_file, 'r', encoding='utf-8') as f:
178
+ data = json.load(f)
179
+ self._ko_to_genes = {ko: set(genes) for ko, genes in data.items()}
180
+ else:
181
+ raw = kegg_link("ko", self.org_code).read().strip()
182
+ if raw:
183
+ for line in raw.splitlines():
184
+ if not line:
185
+ continue
186
+ parts = line.split("\t")
187
+ if len(parts) < 2:
188
+ continue
189
+ gene_part = parts[0]
190
+ ko_part = parts[1]
191
+ locus = gene_part.split(":", 1)[-1]
192
+ self._ko_to_genes.setdefault(ko_part, set()).add(locus)
193
+
194
+ json_data = {ko: list(genes) for ko, genes in self._ko_to_genes.items()}
195
+ with open(json_file, 'w', encoding='utf-8') as f:
196
+ json.dump(json_data, f, indent=2)
197
+
198
+ # Build reverse mapping (gene -> KOs)
199
+ self._gene_to_kos = {}
200
+ for ko, genes in self._ko_to_genes.items():
201
+ for gene in genes:
202
+ self._gene_to_kos.setdefault(gene, set()).add(ko)
203
+
204
+ # ------------------------------------------------------------------
205
+ # KO–reaction mapping (JSON)
206
+ # ------------------------------------------------------------------
207
+ def _prefetch_all_ko_reactions(self):
208
+ all_kos = list(self._ko_to_genes.keys())
209
+ if not all_kos:
210
+ return
211
+
212
+ ko_reactions_file = os.path.join(self.cache_dir, f"{self.org_code}_ko_reactions.json")
213
+ reaction_kos_file = os.path.join(self.cache_dir, f"{self.org_code}_reaction_kos.json")
214
+
215
+ if os.path.exists(ko_reactions_file) and os.path.exists(reaction_kos_file):
216
+ with open(ko_reactions_file, 'r') as f:
217
+ data = json.load(f)
218
+ self._ko_to_reactions = {ko: set(rxn_list) for ko, rxn_list in data.items()}
219
+ with open(reaction_kos_file, 'r') as f:
220
+ data = json.load(f)
221
+ self._reaction_to_kos = {rn: set(ko_list) for rn, ko_list in data.items()}
222
+ return
223
+
224
+ for i in range(0, len(all_kos), self.batch_size):
225
+ batch = all_kos[i:i + self.batch_size]
226
+ ko_query = "+".join(batch)
227
+ url_key = f"link/rn/{ko_query}"
228
+ raw = self._cache_get(
229
+ url_key,
230
+ subdir="ko_reactions_batches",
231
+ fetcher_func=lambda: kegg_link("rn", ko_query).read().strip()
232
+ )
233
+ if not raw:
234
+ for ko in batch:
235
+ self._get_reactions_for_ko_fallback(ko)
236
+ continue
237
+
238
+ for line in raw.splitlines():
239
+ if not line:
240
+ continue
241
+ parts = line.split("\t")
242
+ if len(parts) < 2:
243
+ continue
244
+ ko_id = parts[0]
245
+ rn_id = parts[1].split(":")[1]
246
+ self._ko_to_reactions.setdefault(ko_id, set()).add(rn_id)
247
+
248
+ self._reaction_to_kos = {}
249
+ for ko, rxn_set in self._ko_to_reactions.items():
250
+ for rn in rxn_set:
251
+ self._reaction_to_kos.setdefault(rn, set()).add(ko)
252
+
253
+ with open(ko_reactions_file, 'w') as f:
254
+ json.dump({ko: list(rxn_set) for ko, rxn_set in self._ko_to_reactions.items()}, f, indent=2)
255
+ with open(reaction_kos_file, 'w') as f:
256
+ json.dump({rn: list(ko_set) for rn, ko_set in self._reaction_to_kos.items()}, f, indent=2)
257
+
258
+ def _get_reactions_for_ko_fallback(self, ko):
259
+ raw = kegg_link("rn", ko).read().strip()
260
+ rxn_set = set()
261
+ for line in raw.splitlines():
262
+ if line:
263
+ parts = line.split("\t")
264
+ if len(parts) >= 2:
265
+ rn_id = parts[1].split(":")[1]
266
+ rxn_set.add(rn_id)
267
+ self._ko_to_reactions[ko] = rxn_set
268
+ for rn in rxn_set:
269
+ self._reaction_to_kos.setdefault(rn, set()).add(ko)
270
+
271
+ # ------------------------------------------------------------------
272
+ # Compound management
273
+ # ------------------------------------------------------------------
274
+ def _load_compounds(self):
275
+ """Load cached compound info (name, formula, mass) from JSON."""
276
+ if os.path.exists(self._compound_cache_file):
277
+ with open(self._compound_cache_file, 'r', encoding='utf-8') as f:
278
+ data = json.load(f)
279
+ for cid, info in data.items():
280
+ comp = Compound(cid, self)
281
+ comp.name = info.get("name")
282
+ comp.formula = info.get("formula")
283
+ comp.mass = info.get("mass")
284
+ self._compounds[cid] = comp
285
+ # (reaction sets will be populated when pathways are loaded)
286
+
287
+ def _save_compounds(self):
288
+ """Save compound names, formulas, masses to JSON (reaction sets are transient)."""
289
+ data = {}
290
+ for cid, comp in self._compounds.items():
291
+ if comp.name: # only save if at least a name is known
292
+ data[cid] = {"name": comp.name, "formula": comp.formula, "mass": comp.mass}
293
+ with open(self._compound_cache_file, 'w', encoding='utf-8') as f:
294
+ json.dump(data, f, indent=2)
295
+
296
+ def get_compound(self, compound_id, fetch_if_missing=True):
297
+ """
298
+ Return a Compound object. If not yet loaded, create it.
299
+ Optionally fetch full details from KEGG (name, formula, mass) via flat file.
300
+ """
301
+ if compound_id not in self._compounds:
302
+ self._compounds[compound_id] = Compound(compound_id, self)
303
+ if fetch_if_missing:
304
+ self._fetch_compound_details(compound_id)
305
+ return self._compounds[compound_id]
306
+
307
+ def _fetch_compound_details(self, compound_id):
308
+ """Fetch compound flat file from KEGG and populate name, formula, mass."""
309
+ comp = self._compounds.get(compound_id)
310
+ if not comp:
311
+ return
312
+ try:
313
+ full_id = f"cpd:{compound_id}" if not compound_id.startswith("cpd:") else compound_id
314
+ raw = kegg_get(full_id).read()
315
+ for line in raw.splitlines():
316
+ if line.startswith("NAME") and comp.name is None:
317
+ parts = line.split(maxsplit=1)
318
+ if len(parts) > 1:
319
+ comp.name = parts[1].strip()
320
+ elif line.startswith("FORMULA"):
321
+ parts = line.split(maxsplit=1)
322
+ if len(parts) > 1:
323
+ comp.formula = parts[1].strip()
324
+ elif line.startswith("MASS"):
325
+ parts = line.split()
326
+ if len(parts) >= 2:
327
+ comp.mass = parts[1]
328
+ self._save_compounds()
329
+ except Exception as e:
330
+ print(f"Warning: could not fetch details for compound {compound_id}: {e}")
331
+
332
+ def _parse_compound_names_from_text(self, flat_text):
333
+ """
334
+ Extract compound IDs and names from the COMPOUND section of a pathway flat file.
335
+ Updates Compound objects in self._compounds and saves the cache.
336
+ """
337
+ in_compound = False
338
+ for line in flat_text.splitlines():
339
+ if line.startswith("COMPOUND"):
340
+ in_compound = True
341
+ parts = line.split()
342
+ if len(parts) >= 3:
343
+ cid = parts[1]
344
+ name = ' '.join(parts[2:])
345
+ comp = self.get_compound(cid, fetch_if_missing=False)
346
+ comp.name = name
347
+ continue
348
+ if in_compound:
349
+ if line.startswith(" "): # continuation line
350
+ parts = line.split()
351
+ if len(parts) >= 2:
352
+ cid = parts[0]
353
+ name = ' '.join(parts[1:])
354
+ comp = self.get_compound(cid, fetch_if_missing=False)
355
+ comp.name = name
356
+ else:
357
+ in_compound = False
358
+ self._save_compounds()
359
+
360
+ # ------------------------------------------------------------------
361
+ # Public lookups
362
+ # ------------------------------------------------------------------
363
+ def get_genes_for_ko(self, ko):
364
+ return self._ko_to_genes.get(ko, set())
365
+
366
+ def get_reactions_for_ko(self, ko):
367
+ return self._ko_to_reactions.get(ko, set())
368
+
369
+ def get_kos_for_gene(self, locus_tag):
370
+ """Return the set of KOs associated with a gene locus tag."""
371
+ return self._gene_to_kos.get(locus_tag, set())
372
+
373
+ # ------------------------------------------------------------------
374
+ # Pathway loading (with compound integration)
375
+ # ------------------------------------------------------------------
376
+ def _parse_gene_kos_from_text(self, flat_text):
377
+ gene_kos = {}
378
+ in_gene_section = False
379
+ for line in flat_text.splitlines():
380
+ if line.startswith("GENE"):
381
+ in_gene_section = True
382
+ parts = line.split()
383
+ if len(parts) > 1:
384
+ gene_id = parts[1]
385
+ kos = KO_RE.findall(line)
386
+ if kos:
387
+ gene_kos[gene_id] = {f"ko:{ko}" for ko in kos}
388
+ continue
389
+ if in_gene_section:
390
+ if line.startswith(" "):
391
+ parts = line.split()
392
+ if parts:
393
+ gene_id = parts[0]
394
+ kos = KO_RE.findall(line)
395
+ if kos:
396
+ gene_kos[gene_id] = {f"ko:{ko}" for ko in kos}
397
+ else:
398
+ in_gene_section = False
399
+ return gene_kos
400
+
401
+ def load_pathway(self, pathway_id):
402
+ """Load pathway, extract genes, reactions, and compounds; attach formulas."""
403
+ if pathway_id in self.pathways:
404
+ return self.pathways[pathway_id]
405
+
406
+ # 1. Load / fetch the flat file
407
+ cache_file = os.path.join(self.cache_dir, "pathways", f"{pathway_id}.txt")
408
+ os.makedirs(os.path.dirname(cache_file), exist_ok=True)
409
+
410
+ if os.path.exists(cache_file):
411
+ with open(cache_file, 'r', encoding='utf-8') as f:
412
+ flat_text = f.read()
413
+ else:
414
+ flat_text = kegg_get(pathway_id).read()
415
+ with open(cache_file, 'w', encoding='utf-8') as f:
416
+ f.write(flat_text)
417
+
418
+ gene_kos = self._parse_gene_kos_from_text(flat_text)
419
+ pw = Pathway(pathway_id, gene_kos)
420
+ self.pathways[pathway_id] = pw
421
+
422
+ # 2. Update compound names from the flat file and save
423
+ self._parse_compound_names_from_text(flat_text)
424
+
425
+ # 3. Add reactions from KO annotations (from flat file)
426
+ all_kos = set().union(*gene_kos.values()) if gene_kos else set()
427
+ for ko in all_kos:
428
+ rxn_ids = self._ko_to_reactions.get(ko, set())
429
+ for rn_id in rxn_ids:
430
+ pw.reaction_ids.add(rn_id)
431
+ if rn_id not in self.reactions:
432
+ self.reactions[rn_id] = Reaction(rn_id, self)
433
+
434
+ # 4. Parse KGML and attach per‑pathway formulas, link compounds to reactions
435
+ try:
436
+ kgml = get_pathway_kgml(pathway_id, self.cache_dir)
437
+ except Exception as e:
438
+ print(f"Warning: could not fetch/parse KGML for {pathway_id}: {e}")
439
+ kgml = None
440
+
441
+ if kgml is not None:
442
+ for kgml_rxn in kgml.reactions:
443
+ rxn_id = kgml_rxn.name.split(':')[-1]
444
+
445
+ if rxn_id not in self.reactions:
446
+ self.reactions[rxn_id] = Reaction(rxn_id, self)
447
+ rxn_obj = self.reactions[rxn_id]
448
+
449
+ # Substrates / products as 'cpd:C00022'
450
+ substrates_kegg = [s.name for s in kgml_rxn.substrates]
451
+ products_kegg = [p.name for p in kgml_rxn.products]
452
+
453
+ # Short IDs for formulas
454
+ subs_short = [s.split(':')[-1] for s in substrates_kegg]
455
+ prod_short = [p.split(':')[-1] for p in products_kegg]
456
+ arrow = ' <=> ' if kgml_rxn.type == 'reversible' else ' --> '
457
+
458
+ # KEGG‑ID formula
459
+ formula_kegg = ' + '.join(subs_short) + arrow + ' + '.join(prod_short) if (subs_short or prod_short) else ''
460
+
461
+ # Readable names (using Compound objects)
462
+ substrates_read = []
463
+ for s in substrates_kegg:
464
+ cid = s.split(':')[-1]
465
+ comp = self.get_compound(cid, fetch_if_missing=False)
466
+ substrates_read.append(comp.name if comp.name else cid)
467
+ # Link compound to this reaction
468
+ comp.reactions.add(rxn_id)
469
+
470
+ products_read = []
471
+ for p in products_kegg:
472
+ cid = p.split(':')[-1]
473
+ comp = self.get_compound(cid, fetch_if_missing=False)
474
+ products_read.append(comp.name if comp.name else cid)
475
+ comp.reactions.add(rxn_id)
476
+
477
+ formula_read = ' + '.join(substrates_read) + arrow + ' + '.join(products_read)
478
+
479
+ rxn_obj.formula_per_pathway[pathway_id] = {
480
+ 'type': kgml_rxn.type,
481
+ 'substrates': substrates_kegg,
482
+ 'products': products_kegg,
483
+ 'substrates_read': substrates_read,
484
+ 'products_read': products_read,
485
+ 'formula_kegg': formula_kegg,
486
+ 'formula_read': formula_read
487
+ }
488
+
489
+ pw.reaction_ids.add(rxn_id)
490
+
491
+ # 5. Cross‑reference maps
492
+ for rn_id in pw.reaction_ids:
493
+ self._pathway_reaction_map.setdefault(rn_id, set()).add(pathway_id)
494
+ for locus in pw.gene_ids:
495
+ self._gene_pathway_map.setdefault(locus, set()).add(pathway_id)
496
+
497
+ return pw
498
+
499
+ # ------------------------------------------------------------------
500
+ # Convenience methods
501
+ # ------------------------------------------------------------------
502
+ def get_pathway(self, pathway_id):
503
+ return self.load_pathway(pathway_id)
504
+
505
+ def get_reaction(self, reaction_id):
506
+ if reaction_id not in self.reactions:
507
+ self.reactions[reaction_id] = Reaction(reaction_id, self)
508
+ return self.reactions[reaction_id]
509
+
510
+ def get_pathways_for_reaction(self, reaction_id):
511
+ return self._pathway_reaction_map.get(reaction_id, set())
512
+
513
+ def get_pathways_for_gene(self, locus_tag):
514
+ return self._gene_pathway_map.get(locus_tag, set())
515
+
516
+ def get_genes_for_pathway(self, pathway_id):
517
+ pw = self.pathways.get(pathway_id)
518
+ return pw.gene_ids if pw else set()
519
+
520
+ def get_reactions_for_pathway(self, pathway_id):
521
+ pw = self.pathways.get(pathway_id)
522
+ return pw.reaction_ids if pw else set()