PyPI - debase - Versions diffs - 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl - Mend

debase 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

debase/_version.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Version information."""
-__version__ = "0.1.3"
+__version__ = "0.1.4"

debase/enzyme_lineage_extractor.py CHANGED Viewed

@@ -800,15 +800,36 @@ def identify_evolution_locations(
                         _dump(f"=== CAMPAIGN MAPPING PROMPT ===\nLocation: {location_str}\n{'='*80}\n\n{mapping_prompt}", mapping_file)
                     response = model.generate_content(mapping_prompt)
-                    campaign_id = _extract_text(response).strip().strip('"')
+                    response_text = _extract_text(response).strip()
+                    # Extract just the campaign_id from the response
+                    # Look for the campaign_id pattern in the response
+                    campaign_id = None
+                    for campaign in campaigns:
+                        if hasattr(campaign, 'campaign_id') and campaign.campaign_id in response_text:
+                            campaign_id = campaign.campaign_id
+                            break
+                    # If not found, try to extract the last line or quoted string
+                    if not campaign_id:
+                        # Try to find quoted string
+                        quoted_match = re.search(r'"([^"]+)"', response_text)
+                        if quoted_match:
+                            campaign_id = quoted_match.group(1)
+                        else:
+                            # Take the last non-empty line
+                            lines = [line.strip() for line in response_text.split('\n') if line.strip()]
+                            if lines:
+                                campaign_id = lines[-1].strip('"')
                     # Save mapping response to debug if provided
                     if debug_dir:
                         response_file = debug_path / f"campaign_mapping_response_{location_str.replace(' ', '_')}_{int(time.time())}.txt"
-                        _dump(f"=== CAMPAIGN MAPPING RESPONSE ===\nLocation: {location_str}\nMapped to: {campaign_id}\n{'='*80}\n\n{_extract_text(response)}", response_file)
+                        _dump(f"=== CAMPAIGN MAPPING RESPONSE ===\nLocation: {location_str}\nFull response:\n{response_text}\nExtracted campaign_id: {campaign_id}\n{'='*80}", response_file)
                     # Add campaign_id to location
-                    loc['campaign_id'] = campaign_id
+                    if campaign_id:
+                        loc['campaign_id'] = campaign_id
                     log.info(f"Mapped {location_str} to campaign: {campaign_id}")
                 except Exception as exc:
                     log.warning(f"Failed to map location to campaign: {exc}")
@@ -2038,8 +2059,15 @@ def run_pipeline(
     sequences = get_sequences(full_text, model, pdf_paths=pdf_paths, debug_dir=debug_dir)
     # 4a. Try PDB extraction if no sequences found -----------------------------
-    if not sequences or all(s.aa_seq is None for s in sequences):
-        log.info("No sequences found in paper, attempting PDB extraction...")
+    # Check if we need PDB sequences (no sequences or only partial sequences)
+    MIN_PROTEIN_LENGTH = 50  # Most proteins are >50 AA
+    needs_pdb = (not sequences or
+                 all(s.aa_seq is None or (s.aa_seq and len(s.aa_seq) < MIN_PROTEIN_LENGTH)
+                     for s in sequences))
+    if needs_pdb:
+        log.info("No full-length sequences found in paper (only partial sequences < %d AA), attempting PDB extraction...",
+                 MIN_PROTEIN_LENGTH)
         # Extract PDB IDs from all PDFs
         pdb_ids = []

{debase-0.1.3.dist-info → debase-0.1.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: debase
-Version: 0.1.3
+Version: 0.1.4
 Summary: Enzyme lineage analysis and sequence extraction package
 Home-page: https://github.com/YuemingLong/DEBase
 Author: DEBase Team
@@ -61,14 +61,70 @@ Enzyme lineage analysis and sequence extraction package with advanced parallel p
 ## Installation
+### Quick Install (PyPI)
 ```bash
 pip install debase
 ```
+### Development Setup with Conda (Recommended)
+1. **Clone the repository**
+```bash
+git clone https://github.com/YuemingLong/DEBase.git
+cd DEBase
+```
+2. **Create conda environment from provided file**
+```bash
+conda env create -f environment.yml
+conda activate debase
+```
+3. **Install DEBase in development mode**
+```bash
+pip install -e .
+```
+### Manual Setup
+If you prefer to set up the environment manually:
+```bash
+# Create new conda environment
+conda create -n debase python=3.9
+conda activate debase
+# Install conda packages
+conda install -c conda-forge pandas numpy matplotlib seaborn jupyter jupyterlab openpyxl biopython requests tqdm
+# Install RDKit (optional - used for SMILES canonicalization)
+conda install -c conda-forge rdkit
+# Install pip-only packages
+pip install PyMuPDF google-generativeai debase
+```
+**Note about RDKit**: RDKit is optional and only used for canonicalizing SMILES strings in the output. If not installed, DEBase will still function normally but SMILES strings won't be standardized.
 ## Requirements
 - Python 3.8 or higher
 - A Gemini API key (set as environment variable `GEMINI_API_KEY`)
+### Setting up Gemini API Key
+```bash
+# Option 1: Export in your shell
+export GEMINI_API_KEY="your-api-key-here"
+# Option 2: Add to ~/.bashrc or ~/.zshrc for persistence
+echo 'export GEMINI_API_KEY="your-api-key-here"' >> ~/.bashrc
+source ~/.bashrc
+# Option 3: Create .env file in project directory
+echo 'GEMINI_API_KEY=your-api-key-here' > .env
+```
 ## Recent Updates
 - **Campaign-Aware Extraction**: Automatically detects and processes multiple directed evolution campaigns in a single paper

{debase-0.1.3.dist-info → debase-0.1.4.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 debase/PIPELINE_FLOW.md,sha256=S4nQyZlX39-Bchw1gQWPK60sHiFpB1eWHqo5GR9oTY8,4741
 debase/__init__.py,sha256=YeKveGj_8fwuu5ozoK2mUU86so_FjiCwsvg1d_lYVZU,586
 debase/__main__.py,sha256=LbxYt2x9TG5Ced7LpzzX_8gkWyXeZSlVHzqHfqAiPwQ,160
-debase/_version.py,sha256=92QgGO0ZoG0AhULGdcMTX2RSEJkv8UZrDw2peYQOh4U,49
+debase/_version.py,sha256=mcDHWqAxAKwMNAAyHmpWVDTK-zafQ1kQjmiwnsZbUD4,49
 debase/build_db.py,sha256=bW574GxsL1BJtDwM19urLbciPcejLzfraXZPpzm09FQ,7167
 debase/cleanup_sequence.py,sha256=QyhUqvTBVFTGM7ebAHmP3tif3Jq-8hvoLApYwAJtpH4,32702
-debase/enzyme_lineage_extractor.py,sha256=sJ9Lz7Usse5NqdoZatoOEDMwbMYEgNH1HCLIGS9avn8,87774
+debase/enzyme_lineage_extractor.py,sha256=s1kPOomvJjfMSN5odxeyXNmxiaOzXyOZICr4YUWU6j8,89288
 debase/lineage_format.py,sha256=mACni9M1RXA_1tIyDZJpStQoutd_HLG2qQMAORTusZs,30045
 debase/reaction_info_extractor.py,sha256=6wWj4IyUNSugNjxpwMGjABSAp68yHABaz_7ZRjh9GEk,112162
 debase/substrate_scope_extractor.py,sha256=dbve8q3K7ggA3A6EwB-KK9L19BnMNgPZMZ05G937dSY,82262
 debase/wrapper.py,sha256=lTx375a57EVuXcZ_roXaj5UDj8HjRcb5ViNaSgPN4Ik,10352
-debase-0.1.3.dist-info/licenses/LICENSE,sha256=5sk9_tcNmr1r2iMIUAiioBo7wo38u8BrPlO7f0seqgE,1075
-debase-0.1.3.dist-info/METADATA,sha256=WUJha43ZPKgGDNZD1DYu8CfJwxUOj09kzuPSqfwe96s,9382
-debase-0.1.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-debase-0.1.3.dist-info/entry_points.txt,sha256=hUcxA1b4xORu-HHBFTe9u2KTdbxPzt0dwz95_6JNe9M,48
-debase-0.1.3.dist-info/top_level.txt,sha256=2BUeq-4kmQr0Rhl06AnRzmmZNs8WzBRK9OcJehkcdk8,7
-debase-0.1.3.dist-info/RECORD,,
+debase-0.1.4.dist-info/licenses/LICENSE,sha256=5sk9_tcNmr1r2iMIUAiioBo7wo38u8BrPlO7f0seqgE,1075
+debase-0.1.4.dist-info/METADATA,sha256=fZwXCP1i1s0VNq7Ds5bd2ys3pONgaV1XCe_edUkQdRU,10789
+debase-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+debase-0.1.4.dist-info/entry_points.txt,sha256=hUcxA1b4xORu-HHBFTe9u2KTdbxPzt0dwz95_6JNe9M,48
+debase-0.1.4.dist-info/top_level.txt,sha256=2BUeq-4kmQr0Rhl06AnRzmmZNs8WzBRK9OcJehkcdk8,7
+debase-0.1.4.dist-info/RECORD,,

{debase-0.1.3.dist-info → debase-0.1.4.dist-info}/WHEEL RENAMED Viewed

File without changes

{debase-0.1.3.dist-info → debase-0.1.4.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{debase-0.1.3.dist-info → debase-0.1.4.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{debase-0.1.3.dist-info → debase-0.1.4.dist-info}/top_level.txt RENAMED Viewed

File without changes

debase 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl

debase 0.1.3py3-none-any.whl → 0.1.4py3-none-any.whl