PyPI - debase - Versions diffs - 0.1.18__py3-none-any.whl → 0.1.19__py3-none-any.whl - Mend

debase 0.1.18py3-none-any.whl → 0.1.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

debase/_version.py CHANGED Viewed

@@ -1,3 +1,3 @@
 """Version information."""
-__version__ = "0.1.18"
+__version__ = "0.1.19"

debase/reaction_info_extractor.py CHANGED Viewed

@@ -927,12 +927,77 @@ Ignore locations that contain data for other campaigns.
     # ------------------------------------------------------------------
     # 6.3 Extract metrics in batch
     # ------------------------------------------------------------------
+    def _validate_location_exists(self, ref: str) -> bool:
+        """Verify that the referenced location actually exists in the document."""
+        # Search for the actual reference in the document
+        for page_num in range(len(self.doc)):
+            page = self.doc[page_num]
+            text = page.get_text()
+            # Look for table references like "Table 1", "Table S1", etc.
+            if re.search(rf'\b{re.escape(ref)}\b', text, re.IGNORECASE):
+                return True
+        return False
+    def _validate_context(self, snippet: str, enzyme_list: List[str], ref: str) -> bool:
+        """Validate that the context contains meaningful content for extraction."""
+        if not snippet or len(snippet.strip()) < 50:
+            LOGGER.warning("Insufficient context for extraction from %s - skipping", ref)
+            return False
+        # Check if context actually mentions the enzymes we're looking for
+        enzyme_mentions = sum(1 for enzyme in enzyme_list if enzyme.lower() in snippet.lower())
+        if enzyme_mentions == 0:
+            LOGGER.warning("No enzyme mentions found in context for %s - skipping", ref)
+            return False
+        # Check for performance-related keywords
+        performance_keywords = ['yield', 'selectivity', 'conversion', 'ee', 'er', 'ttn', 'ton', 'tof', '%', 'percent']
+        has_performance_data = any(keyword in snippet.lower() for keyword in performance_keywords)
+        if not has_performance_data:
+            LOGGER.warning("No performance metrics found in context for %s - skipping", ref)
+            return False
+        LOGGER.info("Context validated for %s: %d chars, %d enzyme mentions", ref, len(snippet), enzyme_mentions)
+        return True
+    def _validate_response(self, data: Dict, enzyme_list: List[str], ref: str) -> bool:
+        """Validate that the response contains meaningful data for the requested enzymes."""
+        if not data or not isinstance(data, dict):
+            LOGGER.warning("Invalid response format from %s - skipping", ref)
+            return False
+        # Check if we got data for at least one enzyme
+        enzymes_with_data = 0
+        for enzyme in enzyme_list:
+            enzyme_data = data.get(enzyme, {})
+            if isinstance(enzyme_data, dict) and enzyme_data:
+                # Check if there's at least one non-null metric
+                metrics = ['yield', 'ttn', 'ton', 'selectivity', 'conversion', 'tof', 'activity']
+                has_metric = any(enzyme_data.get(metric) is not None for metric in metrics)
+                if has_metric:
+                    enzymes_with_data += 1
+        if enzymes_with_data == 0:
+            LOGGER.warning("No valid metrics found in response from %s - skipping", ref)
+            return False
+        LOGGER.info("Response validated for %s: %d enzymes with data", ref, enzymes_with_data)
+        return True
     def extract_metrics_batch(self, enzyme_list: List[str], ref: str) -> List[Dict[str, Any]]:
         """Extract performance metrics for multiple enzymes from the identified location in batch."""
         ref_lc = ref.lower()
         image_b64: Optional[str] = None
+        # First, validate that the location actually exists in the document
+        if not self._validate_location_exists(ref):
+            LOGGER.warning("Location %s not found in document - skipping", ref)
+            return []
         # Add campaign context if available
         campaign_context = ""
         if self.campaign_filter:
@@ -953,6 +1018,10 @@ Ignore locations that contain data for other campaigns.
         else:
             snippet = self._page_with_reference(ref) or ""
+        # Validate context before sending to Gemini
+        if not image_b64 and not self._validate_context(snippet, enzyme_list, ref):
+            return []
         enzyme_names = "\n".join([f"- {enzyme}" for enzyme in enzyme_list])
         if image_b64:
@@ -977,6 +1046,10 @@ Ignore locations that contain data for other campaigns.
                 image_b64=image_b64
             )
+            # Validate response has meaningful data
+            if not self._validate_response(data, enzyme_list, ref):
+                return []
             # Handle the response format - expecting a dict with enzyme names as keys
             results = []
             if isinstance(data, dict):

{debase-0.1.18.dist-info → debase-0.1.19.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: debase
-Version: 0.1.18
+Version: 0.1.19
 Summary: Enzyme lineage analysis and sequence extraction package
 Home-page: https://github.com/YuemingLong/DEBase
 Author: DEBase Team

{debase-0.1.18.dist-info → debase-0.1.19.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
 debase/PIPELINE_FLOW.md,sha256=S4nQyZlX39-Bchw1gQWPK60sHiFpB1eWHqo5GR9oTY8,4741
 debase/__init__.py,sha256=YeKveGj_8fwuu5ozoK2mUU86so_FjiCwsvg1d_lYVZU,586
 debase/__main__.py,sha256=LbxYt2x9TG5Ced7LpzzX_8gkWyXeZSlVHzqHfqAiPwQ,160
-debase/_version.py,sha256=Qd1kKsssesKE5FvJnDdAuZsx_BrxTSJJyt68SK99D54,50
+debase/_version.py,sha256=VbYiJzmzValsIDmCyQWPabFFsmy_TQ_Qp35j2mo-UKc,50
 debase/build_db.py,sha256=bW574GxsL1BJtDwM19urLbciPcejLzfraXZPpzm09FQ,7167
 debase/cleanup_sequence.py,sha256=QyhUqvTBVFTGM7ebAHmP3tif3Jq-8hvoLApYwAJtpH4,32702
 debase/enzyme_lineage_extractor.py,sha256=xbNKkIMRCM2dYHsX24vWX1EsQINaGSWBj-iTX10B8Mw,117057
 debase/lineage_format.py,sha256=IS9ig-Uv7KxtI9enZKM6YgQ7sitqwOo4cdXbOy38J3s,34232
-debase/reaction_info_extractor.py,sha256=W9CS0puFTdhJ_T2Fpy931EgnjOCsHHjbtU6RdnzDlhw,113140
+debase/reaction_info_extractor.py,sha256=otj8D3MnrThhUR_xOCc3sSVIw8hrCKnB4OY6y6NnaWA,116674
 debase/substrate_scope_extractor.py,sha256=9XDF-DxOqB63AwaVceAMvg7BcjoTQXE_pG2c_seM_DA,100698
 debase/wrapper.py,sha256=V9bs8ZiyCpJHMM5VuN74kiKdkQRVU6vyvLKCrO1BUB8,20890
-debase-0.1.18.dist-info/licenses/LICENSE,sha256=5sk9_tcNmr1r2iMIUAiioBo7wo38u8BrPlO7f0seqgE,1075
-debase-0.1.18.dist-info/METADATA,sha256=XvSrveJ0Y40c53JYUfiveaQNJ3qoEkxaQ61n3_--1cQ,10790
-debase-0.1.18.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-debase-0.1.18.dist-info/entry_points.txt,sha256=hUcxA1b4xORu-HHBFTe9u2KTdbxPzt0dwz95_6JNe9M,48
-debase-0.1.18.dist-info/top_level.txt,sha256=2BUeq-4kmQr0Rhl06AnRzmmZNs8WzBRK9OcJehkcdk8,7
-debase-0.1.18.dist-info/RECORD,,
+debase-0.1.19.dist-info/licenses/LICENSE,sha256=5sk9_tcNmr1r2iMIUAiioBo7wo38u8BrPlO7f0seqgE,1075
+debase-0.1.19.dist-info/METADATA,sha256=i1dFEB8kPkfTt8q8hJpAAAkZA29T2kb1bzPFMjzPdJU,10790
+debase-0.1.19.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+debase-0.1.19.dist-info/entry_points.txt,sha256=hUcxA1b4xORu-HHBFTe9u2KTdbxPzt0dwz95_6JNe9M,48
+debase-0.1.19.dist-info/top_level.txt,sha256=2BUeq-4kmQr0Rhl06AnRzmmZNs8WzBRK9OcJehkcdk8,7
+debase-0.1.19.dist-info/RECORD,,

{debase-0.1.18.dist-info → debase-0.1.19.dist-info}/WHEEL RENAMED Viewed

File without changes

{debase-0.1.18.dist-info → debase-0.1.19.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{debase-0.1.18.dist-info → debase-0.1.19.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{debase-0.1.18.dist-info → debase-0.1.19.dist-info}/top_level.txt RENAMED Viewed

File without changes

debase 0.1.18__py3-none-any.whl → 0.1.19__py3-none-any.whl

debase 0.1.18py3-none-any.whl → 0.1.19py3-none-any.whl