PyPI - debase - Versions diffs - 0.1.7__tar.gz → 0.1.8__tar.gz - Mend

debase 0.1.7tar.gz → 0.1.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

{debase-0.1.7 → debase-0.1.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: debase
-Version: 0.1.7
+Version: 0.1.8
 Summary: Enzyme lineage analysis and sequence extraction package
 Home-page: https://github.com/YuemingLong/DEBase
 Author: DEBase Team

{debase-0.1.7 → debase-0.1.8}/src/debase/_version.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """Version information."""
-__version__ = "0.1.7"
+__version__ = "0.1.8"

{debase-0.1.7 → debase-0.1.8}/src/debase/enzyme_lineage_extractor.py RENAMED Viewed

@@ -2005,6 +2005,10 @@ Return ONLY a JSON object mapping lineage variant IDs to sequence variant IDs. I
                         log.info(f"Matched {lineage_id} -> {seq_id} using Gemini")
+                # Log the final state after all matches
+                matched_count = (~df['aa_seq'].isna()).sum()
+                log.info(f"After Gemini matching: {matched_count}/{len(df)} variants have sequences")
             except Exception as e:
                 log.warning(f"Failed to match variants using Gemini: {e}")
@@ -2025,6 +2029,12 @@ Return ONLY a JSON object mapping lineage variant IDs to sequence variant IDs. I
     # 5. Sort rows: primary by generation, then by variant_id
     df = df.sort_values(["generation", "variant_id"], kind="mergesort")
+    # Debug: Log final merge state
+    seq_count = (~df['aa_seq'].isna()).sum()
+    log.info(f"_merge_lineage_and_sequences returning: {len(df)} variants, {seq_count} with sequences")
+    if seq_count > 0:
+        log.info(f"Sample variant with sequence: {df[~df['aa_seq'].isna()].iloc[0]['variant_id']}")
     return df
 # --- 8.3  Public API -----------------------------------------------------------
@@ -2053,6 +2063,10 @@ def merge_and_score(
     if missing_rate > 0.5:
         log.warning(">50%% of variants lack sequences (%d / %d)", df["aa_seq"].isna().sum(), len(df))
+    # Debug log before returning
+    seq_count = (~df['aa_seq'].isna()).sum() if 'aa_seq' in df else 0
+    log.info(f"merge_and_score returning: {len(df)} variants, {seq_count} with sequences")
     return df
 # -------------------------------------------------------------------- end 8 ---
@@ -2236,6 +2250,14 @@ def run_pipeline(
         output_csv_path = Path(output_csv)
         # Save final data with sequences using same filename (overwrites lineage-only)
         sequence_path = output_csv_path.parent / "enzyme_lineage_data.csv"
+        # Debug: Log what we're about to save
+        seq_count = (~df_final['aa_seq'].isna()).sum() if 'aa_seq' in df_final else 0
+        log.info(f"About to save CSV: {len(df_final)} variants, {seq_count} with sequences")
+        if seq_count > 0 and 'aa_seq' in df_final:
+            with_seq = df_final[~df_final['aa_seq'].isna()]
+            log.info(f"First variant with sequence: {with_seq.iloc[0]['variant_id']} has {len(with_seq.iloc[0]['aa_seq'])} AA")
         df_final.to_csv(sequence_path, index=False)
         log.info(
             "Overwrote with final results -> %s (%.1f kB)",

{debase-0.1.7 → debase-0.1.8}/src/debase.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: debase
-Version: 0.1.7
+Version: 0.1.8
 Summary: Enzyme lineage analysis and sequence extraction package
 Home-page: https://github.com/YuemingLong/DEBase
 Author: DEBase Team