debase 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1332,12 +1332,28 @@ class ReactionExtractor:
1332
1332
  y_offset += pix.height * scale
1333
1333
 
1334
1334
  # Convert the page to a pixmap
1335
- mat = fitz.Matrix(5.0, 5.0) # 5x zoom for quality
1335
+ # Limit zoom factor to avoid creating excessively large images
1336
+ # Gemini has limits on image size (approx 20MB or 20 megapixels)
1337
+ zoom = 5.0
1338
+ estimated_pixels = (max_width * zoom) * (total_height * zoom)
1339
+ max_pixels = 20_000_000 # 20 megapixels
1340
+
1341
+ if estimated_pixels > max_pixels:
1342
+ # Calculate appropriate zoom to stay under limit
1343
+ zoom = min(5.0, (max_pixels / (max_width * total_height)) ** 0.5)
1344
+ LOGGER.warning(f"Reducing zoom from 5.0 to {zoom:.2f} to stay under {max_pixels/1e6:.1f} megapixel limit")
1345
+
1346
+ mat = fitz.Matrix(zoom, zoom)
1336
1347
  combined_pix = page.get_pixmap(matrix=mat)
1337
1348
  combined_pix = self._ensure_rgb_pixmap(combined_pix)
1338
1349
 
1339
1350
  # Convert to PNG and return
1340
1351
  img_bytes = combined_pix.tobytes("png")
1352
+
1353
+ # Check final size
1354
+ final_size_mb = len(img_bytes) / (1024 * 1024)
1355
+ if final_size_mb > 20:
1356
+ LOGGER.warning(f"Combined image is {final_size_mb:.1f}MB, may be too large for vision API")
1341
1357
  output_doc.close()
1342
1358
 
1343
1359
  # Save debug file if available
@@ -2317,39 +2333,51 @@ Different campaigns may use different model reactions and substrates.
2317
2333
  }
2318
2334
  )
2319
2335
 
2320
- response = model.generate_content(content_parts)
2321
-
2322
- # Track token usage if available
2323
2336
  try:
2324
- if hasattr(response, 'usage_metadata'):
2325
- input_tokens = getattr(response.usage_metadata, 'prompt_token_count', 0)
2326
- output_tokens = getattr(response.usage_metadata, 'candidates_token_count', 0)
2327
- if input_tokens or output_tokens:
2328
- try:
2329
- from .wrapper import add_token_usage
2330
- add_token_usage('reaction_info_extractor', input_tokens, output_tokens)
2331
- except ImportError:
2332
- pass # wrapper not available
2333
- except Exception:
2334
- pass # token tracking is best-effort
2335
-
2336
- # Parse JSON from response
2337
- if response and response.text:
2338
- # Save debug output
2339
- if self.debug_dir:
2340
- timestamp = int(time.time())
2341
- _dump(prompt, self.debug_dir / f"model_reaction_multimodal_prompt_{timestamp}.txt")
2342
- _dump(response.text, self.debug_dir / f"model_reaction_multimodal_response_{timestamp}.txt")
2337
+ response = model.generate_content(content_parts)
2343
2338
 
2344
- # Extract JSON from response
2345
- text = response.text.strip()
2346
- if text.startswith("```json"):
2347
- text = text[7:]
2348
- if text.endswith("```"):
2349
- text = text[:-3]
2350
- data = json.loads(text.strip())
2351
- else:
2352
- raise ValueError("Empty response from multimodal model")
2339
+ # Track token usage if available
2340
+ try:
2341
+ if hasattr(response, 'usage_metadata'):
2342
+ input_tokens = getattr(response.usage_metadata, 'prompt_token_count', 0)
2343
+ output_tokens = getattr(response.usage_metadata, 'candidates_token_count', 0)
2344
+ if input_tokens or output_tokens:
2345
+ try:
2346
+ from .wrapper import add_token_usage
2347
+ add_token_usage('reaction_info_extractor', input_tokens, output_tokens)
2348
+ except ImportError:
2349
+ pass # wrapper not available
2350
+ except Exception:
2351
+ pass # token tracking is best-effort
2352
+
2353
+ # Parse JSON from response
2354
+ if response and response.text:
2355
+ # Save debug output
2356
+ if self.debug_dir:
2357
+ timestamp = int(time.time())
2358
+ _dump(prompt, self.debug_dir / f"model_reaction_multimodal_prompt_{timestamp}.txt")
2359
+ _dump(response.text, self.debug_dir / f"model_reaction_multimodal_response_{timestamp}.txt")
2360
+
2361
+ # Extract JSON from response
2362
+ text = response.text.strip()
2363
+ if text.startswith("```json"):
2364
+ text = text[7:]
2365
+ if text.endswith("```"):
2366
+ text = text[:-3]
2367
+ data = json.loads(text.strip())
2368
+ else:
2369
+ raise ValueError("Empty response from multimodal model")
2370
+ except Exception as vision_error:
2371
+ LOGGER.error("Vision API call failed: %s", vision_error)
2372
+ LOGGER.info("Falling back to text-only extraction")
2373
+ # Fall back to text-only extraction
2374
+ data = generate_json_with_retry(
2375
+ self.model,
2376
+ prompt,
2377
+ temperature=self.cfg.model_reaction_temperature,
2378
+ debug_dir=self.debug_dir,
2379
+ tag="model_reaction_fallback"
2380
+ )
2353
2381
  else:
2354
2382
  # Fall back to text-only extraction
2355
2383
  data = generate_json_with_retry(