debase 0.5.1__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {debase-0.5.1/src/debase.egg-info → debase-0.6.0}/PKG-INFO +1 -1
- {debase-0.5.1 → debase-0.6.0}/src/debase/_version.py +1 -1
- {debase-0.5.1 → debase-0.6.0}/src/debase/lineage_format.py +9 -46
- {debase-0.5.1 → debase-0.6.0/src/debase.egg-info}/PKG-INFO +1 -1
- {debase-0.5.1 → debase-0.6.0}/.gitignore +0 -0
- {debase-0.5.1 → debase-0.6.0}/LICENSE +0 -0
- {debase-0.5.1 → debase-0.6.0}/MANIFEST.in +0 -0
- {debase-0.5.1 → debase-0.6.0}/README.md +0 -0
- {debase-0.5.1 → debase-0.6.0}/environment.yml +0 -0
- {debase-0.5.1 → debase-0.6.0}/pyproject.toml +0 -0
- {debase-0.5.1 → debase-0.6.0}/setup.cfg +0 -0
- {debase-0.5.1 → debase-0.6.0}/setup.py +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/__init__.py +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase/__init__.py +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase/__main__.py +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase/build_db.py +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase/cleanup_sequence.py +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase/enzyme_lineage_extractor.py +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase/reaction_info_extractor.py +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase/substrate_scope_extractor.py +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase/wrapper.py +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase.egg-info/SOURCES.txt +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase.egg-info/dependency_links.txt +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase.egg-info/entry_points.txt +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase.egg-info/requires.txt +0 -0
- {debase-0.5.1 → debase-0.6.0}/src/debase.egg-info/top_level.txt +0 -0
@@ -35,7 +35,6 @@ import logging
|
|
35
35
|
import os
|
36
36
|
import pickle
|
37
37
|
import re
|
38
|
-
import sqlite3
|
39
38
|
import sys
|
40
39
|
import time
|
41
40
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
@@ -137,8 +136,7 @@ SUBSTRATE_CACHE_FILE: Path = CACHE_DIR / "substrate_smiles_cache.pkl"
|
|
137
136
|
CANONICAL_CACHE_FILE: Path = CACHE_DIR / "canonical_smiles_cache.pkl"
|
138
137
|
CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
139
138
|
|
140
|
-
#
|
141
|
-
PUBCHEM_DB_PATH: Path = Path(__file__).parent.parent.parent / "data" / "iupac2smiles.db"
|
139
|
+
# API endpoints for IUPAC to SMILES conversion --------------------------------------
|
142
140
|
|
143
141
|
# Gemini API configuration -----------------------------------------------------------
|
144
142
|
GEMINI_API_KEY: str = os.environ.get("GEMINI_API_KEY", "")
|
@@ -323,37 +321,7 @@ SUBSTRATE_CACHE: Dict[str, str] = _load_pickle(SUBSTRATE_CACHE_FILE)
|
|
323
321
|
CANONICAL_CACHE: Dict[str, str] = _load_pickle(CANONICAL_CACHE_FILE)
|
324
322
|
|
325
323
|
|
326
|
-
# ---
|
327
|
-
class PubChemDB:
|
328
|
-
"""Very thin wrapper around a local SQLite mapping IUPAC -> SMILES."""
|
329
|
-
|
330
|
-
def __init__(self, path: Path | str) -> None:
|
331
|
-
self.path = Path(path)
|
332
|
-
self._conn: Optional[sqlite3.Connection] = None
|
333
|
-
if not self.path.exists():
|
334
|
-
log.warning("Local PubChem DB not found at %s", self.path)
|
335
|
-
|
336
|
-
def _connect(self) -> sqlite3.Connection:
|
337
|
-
if self._conn is None:
|
338
|
-
self._conn = sqlite3.connect(str(self.path))
|
339
|
-
return self._conn
|
340
|
-
|
341
|
-
def lookup(self, name: str) -> Optional[str]:
|
342
|
-
if not self.path.exists():
|
343
|
-
return None
|
344
|
-
sql = "SELECT smiles FROM x WHERE name = ? LIMIT 1"
|
345
|
-
try:
|
346
|
-
# Create a new connection for thread safety
|
347
|
-
conn = sqlite3.connect(str(self.path))
|
348
|
-
cur = conn.execute(sql, (name.lower(),))
|
349
|
-
row = cur.fetchone()
|
350
|
-
conn.close()
|
351
|
-
return row[0] if row else None
|
352
|
-
except Exception: # pragma: no cover
|
353
|
-
return None
|
354
|
-
|
355
|
-
|
356
|
-
PC_DB = PubChemDB(PUBCHEM_DB_PATH)
|
324
|
+
# --- Removed local database - using only online APIs -------------------------------
|
357
325
|
|
358
326
|
|
359
327
|
# === 5. SEQUENCE / MUTATION HELPERS ================================================
|
@@ -481,12 +449,7 @@ def _name_to_smiles(name: str, is_substrate: bool) -> str:
|
|
481
449
|
if not name or name.lower() in ['nan', 'none', 'null', 'n/a', 'na', '']:
|
482
450
|
return ""
|
483
451
|
|
484
|
-
# 1.
|
485
|
-
db_smiles = PC_DB.lookup(name)
|
486
|
-
if db_smiles:
|
487
|
-
return db_smiles
|
488
|
-
|
489
|
-
# 2. OPSIN (if installed) ---------------------------------------------------
|
452
|
+
# 1. OPSIN (if installed) - fast and reliable for IUPAC names
|
490
453
|
try:
|
491
454
|
import subprocess
|
492
455
|
|
@@ -503,12 +466,7 @@ def _name_to_smiles(name: str, is_substrate: bool) -> str:
|
|
503
466
|
except FileNotFoundError:
|
504
467
|
pass # OPSIN not installed
|
505
468
|
|
506
|
-
#
|
507
|
-
gemini_smiles = search_smiles_with_gemini(name)
|
508
|
-
if gemini_smiles:
|
509
|
-
return gemini_smiles
|
510
|
-
|
511
|
-
# 4. PubChem PUG REST (online) ---------------------------------------------
|
469
|
+
# 2. PubChem PUG REST API (online) - comprehensive database
|
512
470
|
try:
|
513
471
|
import requests
|
514
472
|
|
@@ -521,6 +479,11 @@ def _name_to_smiles(name: str, is_substrate: bool) -> str:
|
|
521
479
|
return pug_smiles
|
522
480
|
except Exception: # pragma: no cover
|
523
481
|
pass
|
482
|
+
|
483
|
+
# 3. Gemini search (for complex compounds) - AI fallback
|
484
|
+
gemini_smiles = search_smiles_with_gemini(name)
|
485
|
+
if gemini_smiles:
|
486
|
+
return gemini_smiles
|
524
487
|
|
525
488
|
# Return empty string if all methods fail
|
526
489
|
return ""
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|