debase 0.1.7__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {debase-0.1.7 → debase-0.1.8}/PKG-INFO +1 -1
  2. {debase-0.1.7 → debase-0.1.8}/src/debase/_version.py +1 -1
  3. {debase-0.1.7 → debase-0.1.8}/src/debase/enzyme_lineage_extractor.py +22 -0
  4. {debase-0.1.7 → debase-0.1.8}/src/debase.egg-info/PKG-INFO +1 -1
  5. {debase-0.1.7 → debase-0.1.8}/.gitignore +0 -0
  6. {debase-0.1.7 → debase-0.1.8}/CONTRIBUTING.md +0 -0
  7. {debase-0.1.7 → debase-0.1.8}/LICENSE +0 -0
  8. {debase-0.1.7 → debase-0.1.8}/MANIFEST.in +0 -0
  9. {debase-0.1.7 → debase-0.1.8}/README.md +0 -0
  10. {debase-0.1.7 → debase-0.1.8}/docs/README.md +0 -0
  11. {debase-0.1.7 → debase-0.1.8}/docs/examples/README.md +0 -0
  12. {debase-0.1.7 → debase-0.1.8}/environment.yml +0 -0
  13. {debase-0.1.7 → debase-0.1.8}/pyproject.toml +0 -0
  14. {debase-0.1.7 → debase-0.1.8}/setup.cfg +0 -0
  15. {debase-0.1.7 → debase-0.1.8}/setup.py +0 -0
  16. {debase-0.1.7 → debase-0.1.8}/src/__init__.py +0 -0
  17. {debase-0.1.7 → debase-0.1.8}/src/debase/PIPELINE_FLOW.md +0 -0
  18. {debase-0.1.7 → debase-0.1.8}/src/debase/__init__.py +0 -0
  19. {debase-0.1.7 → debase-0.1.8}/src/debase/__main__.py +0 -0
  20. {debase-0.1.7 → debase-0.1.8}/src/debase/build_db.py +0 -0
  21. {debase-0.1.7 → debase-0.1.8}/src/debase/cleanup_sequence.py +0 -0
  22. {debase-0.1.7 → debase-0.1.8}/src/debase/lineage_format.py +0 -0
  23. {debase-0.1.7 → debase-0.1.8}/src/debase/reaction_info_extractor.py +0 -0
  24. {debase-0.1.7 → debase-0.1.8}/src/debase/substrate_scope_extractor.py +0 -0
  25. {debase-0.1.7 → debase-0.1.8}/src/debase/wrapper.py +0 -0
  26. {debase-0.1.7 → debase-0.1.8}/src/debase.egg-info/SOURCES.txt +0 -0
  27. {debase-0.1.7 → debase-0.1.8}/src/debase.egg-info/dependency_links.txt +0 -0
  28. {debase-0.1.7 → debase-0.1.8}/src/debase.egg-info/entry_points.txt +0 -0
  29. {debase-0.1.7 → debase-0.1.8}/src/debase.egg-info/requires.txt +0 -0
  30. {debase-0.1.7 → debase-0.1.8}/src/debase.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: debase
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Enzyme lineage analysis and sequence extraction package
5
5
  Home-page: https://github.com/YuemingLong/DEBase
6
6
  Author: DEBase Team
@@ -1,3 +1,3 @@
1
1
  """Version information."""
2
2
 
3
- __version__ = "0.1.7"
3
+ __version__ = "0.1.8"
@@ -2005,6 +2005,10 @@ Return ONLY a JSON object mapping lineage variant IDs to sequence variant IDs. I
2005
2005
 
2006
2006
  log.info(f"Matched {lineage_id} -> {seq_id} using Gemini")
2007
2007
 
2008
+ # Log the final state after all matches
2009
+ matched_count = (~df['aa_seq'].isna()).sum()
2010
+ log.info(f"After Gemini matching: {matched_count}/{len(df)} variants have sequences")
2011
+
2008
2012
  except Exception as e:
2009
2013
  log.warning(f"Failed to match variants using Gemini: {e}")
2010
2014
 
@@ -2025,6 +2029,12 @@ Return ONLY a JSON object mapping lineage variant IDs to sequence variant IDs. I
2025
2029
  # 5. Sort rows: primary by generation, then by variant_id
2026
2030
  df = df.sort_values(["generation", "variant_id"], kind="mergesort")
2027
2031
 
2032
+ # Debug: Log final merge state
2033
+ seq_count = (~df['aa_seq'].isna()).sum()
2034
+ log.info(f"_merge_lineage_and_sequences returning: {len(df)} variants, {seq_count} with sequences")
2035
+ if seq_count > 0:
2036
+ log.info(f"Sample variant with sequence: {df[~df['aa_seq'].isna()].iloc[0]['variant_id']}")
2037
+
2028
2038
  return df
2029
2039
 
2030
2040
  # --- 8.3 Public API -----------------------------------------------------------
@@ -2053,6 +2063,10 @@ def merge_and_score(
2053
2063
  if missing_rate > 0.5:
2054
2064
  log.warning(">50%% of variants lack sequences (%d / %d)", df["aa_seq"].isna().sum(), len(df))
2055
2065
 
2066
+ # Debug log before returning
2067
+ seq_count = (~df['aa_seq'].isna()).sum() if 'aa_seq' in df else 0
2068
+ log.info(f"merge_and_score returning: {len(df)} variants, {seq_count} with sequences")
2069
+
2056
2070
  return df
2057
2071
 
2058
2072
  # -------------------------------------------------------------------- end 8 ---
@@ -2236,6 +2250,14 @@ def run_pipeline(
2236
2250
  output_csv_path = Path(output_csv)
2237
2251
  # Save final data with sequences using same filename (overwrites lineage-only)
2238
2252
  sequence_path = output_csv_path.parent / "enzyme_lineage_data.csv"
2253
+
2254
+ # Debug: Log what we're about to save
2255
+ seq_count = (~df_final['aa_seq'].isna()).sum() if 'aa_seq' in df_final else 0
2256
+ log.info(f"About to save CSV: {len(df_final)} variants, {seq_count} with sequences")
2257
+ if seq_count > 0 and 'aa_seq' in df_final:
2258
+ with_seq = df_final[~df_final['aa_seq'].isna()]
2259
+ log.info(f"First variant with sequence: {with_seq.iloc[0]['variant_id']} has {len(with_seq.iloc[0]['aa_seq'])} AA")
2260
+
2239
2261
  df_final.to_csv(sequence_path, index=False)
2240
2262
  log.info(
2241
2263
  "Overwrote with final results -> %s (%.1f kB)",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: debase
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: Enzyme lineage analysis and sequence extraction package
5
5
  Home-page: https://github.com/YuemingLong/DEBase
6
6
  Author: DEBase Team
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes