debase 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- debase/_version.py +1 -1
- debase/enzyme_lineage_extractor.py +14 -8
- debase/lineage_format.py +335 -56
- debase/reaction_info_extractor.py +60 -32
- debase/substrate_scope_extractor.py +366 -93
- debase/wrapper.py +37 -11
- {debase-0.4.1.dist-info → debase-0.4.2.dist-info}/METADATA +1 -1
- debase-0.4.2.dist-info/RECORD +16 -0
- debase-0.4.1.dist-info/RECORD +0 -16
- {debase-0.4.1.dist-info → debase-0.4.2.dist-info}/WHEEL +0 -0
- {debase-0.4.1.dist-info → debase-0.4.2.dist-info}/entry_points.txt +0 -0
- {debase-0.4.1.dist-info → debase-0.4.2.dist-info}/licenses/LICENSE +0 -0
- {debase-0.4.1.dist-info → debase-0.4.2.dist-info}/top_level.txt +0 -0
@@ -1332,12 +1332,28 @@ class ReactionExtractor:
|
|
1332
1332
|
y_offset += pix.height * scale
|
1333
1333
|
|
1334
1334
|
# Convert the page to a pixmap
|
1335
|
-
|
1335
|
+
# Limit zoom factor to avoid creating excessively large images
|
1336
|
+
# Gemini has limits on image size (approx 20MB or 20 megapixels)
|
1337
|
+
zoom = 5.0
|
1338
|
+
estimated_pixels = (max_width * zoom) * (total_height * zoom)
|
1339
|
+
max_pixels = 20_000_000 # 20 megapixels
|
1340
|
+
|
1341
|
+
if estimated_pixels > max_pixels:
|
1342
|
+
# Calculate appropriate zoom to stay under limit
|
1343
|
+
zoom = min(5.0, (max_pixels / (max_width * total_height)) ** 0.5)
|
1344
|
+
LOGGER.warning(f"Reducing zoom from 5.0 to {zoom:.2f} to stay under {max_pixels/1e6:.1f} megapixel limit")
|
1345
|
+
|
1346
|
+
mat = fitz.Matrix(zoom, zoom)
|
1336
1347
|
combined_pix = page.get_pixmap(matrix=mat)
|
1337
1348
|
combined_pix = self._ensure_rgb_pixmap(combined_pix)
|
1338
1349
|
|
1339
1350
|
# Convert to PNG and return
|
1340
1351
|
img_bytes = combined_pix.tobytes("png")
|
1352
|
+
|
1353
|
+
# Check final size
|
1354
|
+
final_size_mb = len(img_bytes) / (1024 * 1024)
|
1355
|
+
if final_size_mb > 20:
|
1356
|
+
LOGGER.warning(f"Combined image is {final_size_mb:.1f}MB, may be too large for vision API")
|
1341
1357
|
output_doc.close()
|
1342
1358
|
|
1343
1359
|
# Save debug file if available
|
@@ -2317,39 +2333,51 @@ Different campaigns may use different model reactions and substrates.
|
|
2317
2333
|
}
|
2318
2334
|
)
|
2319
2335
|
|
2320
|
-
response = model.generate_content(content_parts)
|
2321
|
-
|
2322
|
-
# Track token usage if available
|
2323
2336
|
try:
|
2324
|
-
|
2325
|
-
input_tokens = getattr(response.usage_metadata, 'prompt_token_count', 0)
|
2326
|
-
output_tokens = getattr(response.usage_metadata, 'candidates_token_count', 0)
|
2327
|
-
if input_tokens or output_tokens:
|
2328
|
-
try:
|
2329
|
-
from .wrapper import add_token_usage
|
2330
|
-
add_token_usage('reaction_info_extractor', input_tokens, output_tokens)
|
2331
|
-
except ImportError:
|
2332
|
-
pass # wrapper not available
|
2333
|
-
except Exception:
|
2334
|
-
pass # token tracking is best-effort
|
2335
|
-
|
2336
|
-
# Parse JSON from response
|
2337
|
-
if response and response.text:
|
2338
|
-
# Save debug output
|
2339
|
-
if self.debug_dir:
|
2340
|
-
timestamp = int(time.time())
|
2341
|
-
_dump(prompt, self.debug_dir / f"model_reaction_multimodal_prompt_{timestamp}.txt")
|
2342
|
-
_dump(response.text, self.debug_dir / f"model_reaction_multimodal_response_{timestamp}.txt")
|
2337
|
+
response = model.generate_content(content_parts)
|
2343
2338
|
|
2344
|
-
#
|
2345
|
-
|
2346
|
-
|
2347
|
-
|
2348
|
-
|
2349
|
-
|
2350
|
-
|
2351
|
-
|
2352
|
-
|
2339
|
+
# Track token usage if available
|
2340
|
+
try:
|
2341
|
+
if hasattr(response, 'usage_metadata'):
|
2342
|
+
input_tokens = getattr(response.usage_metadata, 'prompt_token_count', 0)
|
2343
|
+
output_tokens = getattr(response.usage_metadata, 'candidates_token_count', 0)
|
2344
|
+
if input_tokens or output_tokens:
|
2345
|
+
try:
|
2346
|
+
from .wrapper import add_token_usage
|
2347
|
+
add_token_usage('reaction_info_extractor', input_tokens, output_tokens)
|
2348
|
+
except ImportError:
|
2349
|
+
pass # wrapper not available
|
2350
|
+
except Exception:
|
2351
|
+
pass # token tracking is best-effort
|
2352
|
+
|
2353
|
+
# Parse JSON from response
|
2354
|
+
if response and response.text:
|
2355
|
+
# Save debug output
|
2356
|
+
if self.debug_dir:
|
2357
|
+
timestamp = int(time.time())
|
2358
|
+
_dump(prompt, self.debug_dir / f"model_reaction_multimodal_prompt_{timestamp}.txt")
|
2359
|
+
_dump(response.text, self.debug_dir / f"model_reaction_multimodal_response_{timestamp}.txt")
|
2360
|
+
|
2361
|
+
# Extract JSON from response
|
2362
|
+
text = response.text.strip()
|
2363
|
+
if text.startswith("```json"):
|
2364
|
+
text = text[7:]
|
2365
|
+
if text.endswith("```"):
|
2366
|
+
text = text[:-3]
|
2367
|
+
data = json.loads(text.strip())
|
2368
|
+
else:
|
2369
|
+
raise ValueError("Empty response from multimodal model")
|
2370
|
+
except Exception as vision_error:
|
2371
|
+
LOGGER.error("Vision API call failed: %s", vision_error)
|
2372
|
+
LOGGER.info("Falling back to text-only extraction")
|
2373
|
+
# Fall back to text-only extraction
|
2374
|
+
data = generate_json_with_retry(
|
2375
|
+
self.model,
|
2376
|
+
prompt,
|
2377
|
+
temperature=self.cfg.model_reaction_temperature,
|
2378
|
+
debug_dir=self.debug_dir,
|
2379
|
+
tag="model_reaction_fallback"
|
2380
|
+
)
|
2353
2381
|
else:
|
2354
2382
|
# Fall back to text-only extraction
|
2355
2383
|
data = generate_json_with_retry(
|