debase 0.5.1__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
debase/_version.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Version information."""
2
2
 
3
- __version__ = "0.5.1"
3
+ __version__ = "0.6.0"
debase/lineage_format.py CHANGED
@@ -35,7 +35,6 @@ import logging
35
35
  import os
36
36
  import pickle
37
37
  import re
38
- import sqlite3
39
38
  import sys
40
39
  import time
41
40
  from concurrent.futures import ThreadPoolExecutor, as_completed
@@ -137,8 +136,7 @@ SUBSTRATE_CACHE_FILE: Path = CACHE_DIR / "substrate_smiles_cache.pkl"
137
136
  CANONICAL_CACHE_FILE: Path = CACHE_DIR / "canonical_smiles_cache.pkl"
138
137
  CACHE_DIR.mkdir(parents=True, exist_ok=True)
139
138
 
140
- # Local PubChem DB (optional) --------------------------------------------------------
141
- PUBCHEM_DB_PATH: Path = Path(__file__).parent.parent.parent / "data" / "iupac2smiles.db"
139
+ # API endpoints for IUPAC to SMILES conversion --------------------------------------
142
140
 
143
141
  # Gemini API configuration -----------------------------------------------------------
144
142
  GEMINI_API_KEY: str = os.environ.get("GEMINI_API_KEY", "")
@@ -323,37 +321,7 @@ SUBSTRATE_CACHE: Dict[str, str] = _load_pickle(SUBSTRATE_CACHE_FILE)
323
321
  CANONICAL_CACHE: Dict[str, str] = _load_pickle(CANONICAL_CACHE_FILE)
324
322
 
325
323
 
326
- # --- Database lookup ---------------------------------------------------------------
327
- class PubChemDB:
328
- """Very thin wrapper around a local SQLite mapping IUPAC -> SMILES."""
329
-
330
- def __init__(self, path: Path | str) -> None:
331
- self.path = Path(path)
332
- self._conn: Optional[sqlite3.Connection] = None
333
- if not self.path.exists():
334
- log.warning("Local PubChem DB not found at %s", self.path)
335
-
336
- def _connect(self) -> sqlite3.Connection:
337
- if self._conn is None:
338
- self._conn = sqlite3.connect(str(self.path))
339
- return self._conn
340
-
341
- def lookup(self, name: str) -> Optional[str]:
342
- if not self.path.exists():
343
- return None
344
- sql = "SELECT smiles FROM x WHERE name = ? LIMIT 1"
345
- try:
346
- # Create a new connection for thread safety
347
- conn = sqlite3.connect(str(self.path))
348
- cur = conn.execute(sql, (name.lower(),))
349
- row = cur.fetchone()
350
- conn.close()
351
- return row[0] if row else None
352
- except Exception: # pragma: no cover
353
- return None
354
-
355
-
356
- PC_DB = PubChemDB(PUBCHEM_DB_PATH)
324
+ # --- Removed local database - using only online APIs -------------------------------
357
325
 
358
326
 
359
327
  # === 5. SEQUENCE / MUTATION HELPERS ================================================
@@ -481,12 +449,7 @@ def _name_to_smiles(name: str, is_substrate: bool) -> str:
481
449
  if not name or name.lower() in ['nan', 'none', 'null', 'n/a', 'na', '']:
482
450
  return ""
483
451
 
484
- # 1. Local DB (fast, offline)
485
- db_smiles = PC_DB.lookup(name)
486
- if db_smiles:
487
- return db_smiles
488
-
489
- # 2. OPSIN (if installed) ---------------------------------------------------
452
+ # 1. OPSIN (if installed) - fast and reliable for IUPAC names
490
453
  try:
491
454
  import subprocess
492
455
 
@@ -503,12 +466,7 @@ def _name_to_smiles(name: str, is_substrate: bool) -> str:
503
466
  except FileNotFoundError:
504
467
  pass # OPSIN not installed
505
468
 
506
- # 3. Gemini search (for complex compounds) ---------------------------------
507
- gemini_smiles = search_smiles_with_gemini(name)
508
- if gemini_smiles:
509
- return gemini_smiles
510
-
511
- # 4. PubChem PUG REST (online) ---------------------------------------------
469
+ # 2. PubChem PUG REST API (online) - comprehensive database
512
470
  try:
513
471
  import requests
514
472
 
@@ -521,6 +479,11 @@ def _name_to_smiles(name: str, is_substrate: bool) -> str:
521
479
  return pug_smiles
522
480
  except Exception: # pragma: no cover
523
481
  pass
482
+
483
+ # 3. Gemini search (for complex compounds) - AI fallback
484
+ gemini_smiles = search_smiles_with_gemini(name)
485
+ if gemini_smiles:
486
+ return gemini_smiles
524
487
 
525
488
  # Return empty string if all methods fail
526
489
  return ""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: debase
3
- Version: 0.5.1
3
+ Version: 0.6.0
4
4
  Summary: Enzyme lineage analysis and sequence extraction package
5
5
  Home-page: https://github.com/YuemingLong/DEBase
6
6
  Author: DEBase Team
@@ -1,16 +1,16 @@
1
1
  debase/__init__.py,sha256=YeKveGj_8fwuu5ozoK2mUU86so_FjiCwsvg1d_lYVZU,586
2
2
  debase/__main__.py,sha256=LbxYt2x9TG5Ced7LpzzX_8gkWyXeZSlVHzqHfqAiPwQ,160
3
- debase/_version.py,sha256=iDuv12GVbaAFXFufv3yqwn-3Hwv9Kua4nJZQ-gUNJXw,49
3
+ debase/_version.py,sha256=C3pfAAiv8eypvKmkKXrxXIwx9sLe4TtShkzld5GJJ7k,49
4
4
  debase/build_db.py,sha256=bW574GxsL1BJtDwM19urLbciPcejLzfraXZPpzm09FQ,7167
5
5
  debase/cleanup_sequence.py,sha256=qKAou871Eri4SDQMz-XCfD3D2BuuINxSxzJZMACJ7p4,73313
6
6
  debase/enzyme_lineage_extractor.py,sha256=C2rVFyM84TvDy7hvk_xIeVSdh1F6WSe4QQB8B8QrPC4,168026
7
- debase/lineage_format.py,sha256=Omb3oug0oEfQLcC_5XsbACvTDV7PFIIlGRtOhxC7Nwo,57844
7
+ debase/lineage_format.py,sha256=BE8uW1XUCmxlcYKiD7QveF4r99xObfGf1vP1rZzJTV8,56525
8
8
  debase/reaction_info_extractor.py,sha256=9QXbtp0RSP6QMqQ_azBWDceGIqiw2JPCg3eJ0Ba_lxA,167849
9
9
  debase/substrate_scope_extractor.py,sha256=ydU6iZVRw3fLyQ8kIQs6ZuruBMvM4mMXIeGuPgCUOn4,115956
10
10
  debase/wrapper.py,sha256=0z1BRvs3pzuPV_sgJxrBVmX_IXqwX3tB4u0GXdSgR3c,24568
11
- debase-0.5.1.dist-info/licenses/LICENSE,sha256=5sk9_tcNmr1r2iMIUAiioBo7wo38u8BrPlO7f0seqgE,1075
12
- debase-0.5.1.dist-info/METADATA,sha256=GoaBFl0kdh8dtrApBTMoLWH6fe5GYLiSYC5JrohbPcI,4047
13
- debase-0.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- debase-0.5.1.dist-info/entry_points.txt,sha256=hUcxA1b4xORu-HHBFTe9u2KTdbxPzt0dwz95_6JNe9M,48
15
- debase-0.5.1.dist-info/top_level.txt,sha256=2BUeq-4kmQr0Rhl06AnRzmmZNs8WzBRK9OcJehkcdk8,7
16
- debase-0.5.1.dist-info/RECORD,,
11
+ debase-0.6.0.dist-info/licenses/LICENSE,sha256=5sk9_tcNmr1r2iMIUAiioBo7wo38u8BrPlO7f0seqgE,1075
12
+ debase-0.6.0.dist-info/METADATA,sha256=o_Iq-jjk_ekBLLheIQZuLEeC7tWbduq70b5jqfiCmro,4047
13
+ debase-0.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ debase-0.6.0.dist-info/entry_points.txt,sha256=hUcxA1b4xORu-HHBFTe9u2KTdbxPzt0dwz95_6JNe9M,48
15
+ debase-0.6.0.dist-info/top_level.txt,sha256=2BUeq-4kmQr0Rhl06AnRzmmZNs8WzBRK9OcJehkcdk8,7
16
+ debase-0.6.0.dist-info/RECORD,,
File without changes