debase 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
debase/_version.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Version information."""
2
2
 
3
- __version__ = "0.4.0"
3
+ __version__ = "0.4.1"
@@ -309,68 +309,28 @@ def extract_figure_image(pdf_paths: List[Path], figure_ref: str) -> Optional[str
309
309
 
310
310
  log.info("Found figure caption on page %d at y=%.0f", page_num + 1, caption_rect.y0)
311
311
 
312
- # Extract multi-page region including the figure and content below
313
- # The figure should be between the top of the viewable area and extend to subsequent pages
312
+ # Extract just the figure with its caption, avoiding excessive white space
314
313
  page_rect = page.rect
315
314
 
316
- # Define the region to extract
317
- # Extract everything above the caption plus additional content from subsequent pages
318
- top_margin = 0 # Start from the very top of the page
319
- additional_pages = 2 # Number of additional pages to include
320
- left_margin = 0 # Use full page width
321
- right_margin = 0
322
-
323
- # Calculate the figure region for the first page
324
- fig_top = top_margin
325
- fig_bottom = max(caption_rect.y0 + 150, page_rect.height) # At least 150px below caption or full page
326
- fig_left = left_margin
327
- fig_right = page_rect.width - right_margin
328
-
329
- # Create list to store all page images
330
- page_images = []
315
+ # Calculate the figure region on current page only
316
+ # Extract from top of page to just below the caption
317
+ fig_top = 0 # Start from top of page
318
+ fig_bottom = min(caption_rect.y0 + 200, page_rect.height) # 200px below caption, but not more than page height
319
+ fig_left = 0 # Full width
320
+ fig_right = page_rect.width
331
321
 
332
- # Extract first page (from top to bottom)
322
+ # Extract only the figure region (no additional pages to avoid white space)
333
323
  clip_rect = fitz.Rect(fig_left, fig_top, fig_right, fig_bottom)
334
324
  mat = fitz.Matrix(2, 2) # 2x zoom for better quality
335
325
  pix = page.get_pixmap(clip=clip_rect, matrix=mat)
336
- page_images.append(pix)
337
326
 
338
- # Extract additional pages if they exist
339
- for additional_page_offset in range(1, additional_pages + 1):
340
- next_page_num = page_num + additional_page_offset
341
- if next_page_num < doc.page_count:
342
- next_page = doc.load_page(next_page_num)
343
- next_page_rect = next_page.rect
344
-
345
- # Extract full page for additional pages
346
- next_clip_rect = fitz.Rect(0, 0, next_page_rect.width, next_page_rect.height)
347
- next_pix = next_page.get_pixmap(clip=next_clip_rect, matrix=mat)
348
- page_images.append(next_pix)
349
- log.info("Added page %d to multi-page extraction", next_page_num + 1)
350
-
351
- # Combine all page images vertically
352
- if len(page_images) == 1:
353
- # Single page extraction
354
- combined_pix = page_images[0]
355
- else:
356
- # Multi-page extraction - combine vertically
357
- total_width = max(pix.width for pix in page_images)
358
- total_height = sum(pix.height for pix in page_images)
359
-
360
- # Create a new pixmap to hold the combined image
361
- combined_pix = fitz.Pixmap(fitz.csRGB, fitz.IRect(0, 0, total_width, total_height))
362
- combined_pix.clear_with(255) # White background
363
-
364
- current_y = 0
365
- for pix in page_images:
366
- # Copy each page image to the combined image
367
- combined_pix.copy(pix, fitz.IRect(0, current_y, pix.width, current_y + pix.height))
368
- current_y += pix.height
327
+ log.info("Extracted figure region: %.0fx%.0f pixels from page %d",
328
+ pix.width, pix.height, page_num + 1)
369
329
 
370
330
  # Convert to PNG
371
- img_bytes = combined_pix.tobytes("png")
372
- log.info("Extracted multi-page figure region: %dx%d pixels from %d pages starting at page %d",
373
- combined_pix.width, combined_pix.height, len(page_images), page_num + 1)
331
+ img_bytes = pix.tobytes("png")
332
+ log.info("Extracted figure region: %dx%d pixels from page %d",
333
+ pix.width, pix.height, page_num + 1)
374
334
 
375
335
  return b64encode(img_bytes).decode()
376
336
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: debase
3
- Version: 0.4.0
3
+ Version: 0.4.1
4
4
  Summary: Enzyme lineage analysis and sequence extraction package
5
5
  Home-page: https://github.com/YuemingLong/DEBase
6
6
  Author: DEBase Team
@@ -1,16 +1,16 @@
1
1
  debase/__init__.py,sha256=YeKveGj_8fwuu5ozoK2mUU86so_FjiCwsvg1d_lYVZU,586
2
2
  debase/__main__.py,sha256=LbxYt2x9TG5Ced7LpzzX_8gkWyXeZSlVHzqHfqAiPwQ,160
3
- debase/_version.py,sha256=X9pfcQjm1Y8ILtLtdscGnfFKSp5XWTeamXgSHPOw2K0,49
3
+ debase/_version.py,sha256=qJBDmAhFTv4pYwvc3Umy_Lwc_v8doVBVCiysT7Eoh3E,49
4
4
  debase/build_db.py,sha256=bW574GxsL1BJtDwM19urLbciPcejLzfraXZPpzm09FQ,7167
5
5
  debase/cleanup_sequence.py,sha256=ngxb_tPekjCWvampAjyuFqK4wLk_meFSj_TwfvOxheQ,33978
6
6
  debase/enzyme_lineage_extractor.py,sha256=laIw9A5AuJ_kJe9h6Fp_WzMh_ctCN31bo2b2-RKrFd4,124019
7
7
  debase/lineage_format.py,sha256=IS9ig-Uv7KxtI9enZKM6YgQ7sitqwOo4cdXbOy38J3s,34232
8
8
  debase/reaction_info_extractor.py,sha256=xRyYoQKqSzer-k8FZwg55nDd0D-6QBc0F-HAyfvisG0,150368
9
- debase/substrate_scope_extractor.py,sha256=ny4n_J4SDFQnxhCHHHan1xouqM8FkueJm_z-hm6gr-o,103761
9
+ debase/substrate_scope_extractor.py,sha256=JLXHEEeMDFiFQRt8gVCnnhimrxDF23-z0jq3N4-3gn8,101469
10
10
  debase/wrapper.py,sha256=TGU5eq0qWTrkRR35ztsp8WMb1E9Nt64BdbHuYHROmYA,24279
11
- debase-0.4.0.dist-info/licenses/LICENSE,sha256=5sk9_tcNmr1r2iMIUAiioBo7wo38u8BrPlO7f0seqgE,1075
12
- debase-0.4.0.dist-info/METADATA,sha256=Qpvyi4nbq_wmhbl_089pRIlGAubVxjwVfX1eUSK3lLY,10789
13
- debase-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- debase-0.4.0.dist-info/entry_points.txt,sha256=hUcxA1b4xORu-HHBFTe9u2KTdbxPzt0dwz95_6JNe9M,48
15
- debase-0.4.0.dist-info/top_level.txt,sha256=2BUeq-4kmQr0Rhl06AnRzmmZNs8WzBRK9OcJehkcdk8,7
16
- debase-0.4.0.dist-info/RECORD,,
11
+ debase-0.4.1.dist-info/licenses/LICENSE,sha256=5sk9_tcNmr1r2iMIUAiioBo7wo38u8BrPlO7f0seqgE,1075
12
+ debase-0.4.1.dist-info/METADATA,sha256=kSH58QfBv6WGb8Ds3mcei-DUmWQormuSyPHNOmpbcQ8,10789
13
+ debase-0.4.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ debase-0.4.1.dist-info/entry_points.txt,sha256=hUcxA1b4xORu-HHBFTe9u2KTdbxPzt0dwz95_6JNe9M,48
15
+ debase-0.4.1.dist-info/top_level.txt,sha256=2BUeq-4kmQr0Rhl06AnRzmmZNs8WzBRK9OcJehkcdk8,7
16
+ debase-0.4.1.dist-info/RECORD,,
File without changes