rpdfium 0.4.3 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: acc10e2700ca1e013088725b09fd05d4dee4f9fcf7acdcddba499306f24ec9f7
4
- data.tar.gz: 9f1f2bb6b40e61e457589d7756eff6fdb85f99310cea86c0a41a187da6f4e32a
3
+ metadata.gz: b20ddc3e261b5d9a8a8fa0d84c8ff65e26a6a952a7e3d57dcfe64680944c652c
4
+ data.tar.gz: cf5c332e8231777062ecd593458b2192794dec3429a2875cf679279ee4ac6a5f
5
5
  SHA512:
6
- metadata.gz: 07a60e4c5304e1650f583a691e324daf0f2b5fcbb7f1efdd8d333ac8a1fc7e48b7230e23d9cda9c0d8b73e8382c5faa7bf0737e3230bebf55cefe13d822e474d
7
- data.tar.gz: 2dc6accfa959ced0852d1b48c729eab47556a26eb0ee3e94bc6d3644d7f5d78530ccfd70244a6aa88ea7e2cac4c6a46973db8d8f2838c8b4cad5eccade42a53d
6
+ metadata.gz: d59b9039332abdf90ee46032ac7b6501b99b753e9045da3526c410e2806a53b2b5dd416ec83c254d2692a5926f64ff0aea8c1c5859633a26a26e84a43fd2cdb5
7
+ data.tar.gz: 7e218d1c1f6632ae871fa6b0453de3e97a256a9e9871788f4bed34a93c3ee71d91bee3042712ab083be963fec38fa5aa9bb20a608970bf45e7358115ddf554ed
data/CHANGELOG.md CHANGED
@@ -8,6 +8,20 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
8
8
 
9
9
  ## [Unreleased]
10
10
 
11
+ ## [0.4.4] - 2026-06-18
12
+
13
+ ### Changed
14
+
15
+ - **`Page#words` allocates ~77% fewer objects.** The row/word pipeline was
16
+ rebuilt to avoid transient allocations: the by-row sort now uses a
17
+ comparator block instead of `sort_by { [top, x0] }` (no per-char 2-element
18
+ key array nor Schwartzian pair array), the per-row sort runs in place with
19
+ `sort!`, and `word_from_chars` folds `text`/`top`/`bottom` in a single pass
20
+ instead of three separate `map`+`join`/`min`/`max` passes (each of which
21
+ allocated an intermediate array). Measured on a real 3567-char page (438
22
+ words): 6552 → 1534 allocated objects per call (1.84 → 0.43 objects/char).
23
+ Behavior is unchanged.
24
+
11
25
  ## [0.4.3] - 2026-06-16
12
26
 
13
27
  ### Fixed
data/lib/rpdfium/page.rb CHANGED
@@ -876,13 +876,20 @@ module Rpdfium
876
876
  cs = chars(**char_opts)
877
877
  return [] if cs.empty?
878
878
 
879
- # Group into rows by y
880
- rows = group_consecutive(cs.sort_by { |c| [c[:top], c[:x0]] }) do |a, b|
879
+ # Group into rows by y. Comparator sort instead of sort_by avoids one
880
+ # 2-element key array per char (plus the Schwartzian pair array) the
881
+ # dominant transient allocation on pages with thousands of chars.
882
+ sorted_cs = cs.sort do |a, b|
883
+ c = a[:top] <=> b[:top]
884
+ c.zero? ? a[:x0] <=> b[:x0] : c
885
+ end
886
+ rows = group_consecutive(sorted_cs) do |a, b|
881
887
  (a[:top] - b[:top]).abs <= y_tolerance
882
888
  end
883
889
 
884
890
  rows.flat_map do |row|
885
- sorted = row.sort_by { |c| c[:x0] }
891
+ # In-place: `row` is a fresh group array, no need for a new copy.
892
+ sorted = row.sort! { |a, b| a[:x0] <=> b[:x0] }
886
893
  # Split on gap > x_tolerance or explicit space
887
894
  word_groups = []
888
895
  buf = []
@@ -1595,14 +1602,27 @@ module Rpdfium
1595
1602
  end
1596
1603
 
1597
1604
  def word_from_chars(chars)
1605
+ # Single pass: fold text/top/bottom together instead of 3 separate
1606
+ # map+join/min/max passes (each allocating an intermediate array).
1607
+ first = chars.first
1608
+ text = +''
1609
+ top = first[:top]
1610
+ bottom = first[:bottom]
1611
+ chars.each do |c|
1612
+ text << c[:char].to_s
1613
+ t = c[:top]
1614
+ b = c[:bottom]
1615
+ top = t if t < top
1616
+ bottom = b if b > bottom
1617
+ end
1598
1618
  {
1599
- text: chars.map { |c| c[:char] }.join,
1600
- x0: chars.first[:x0],
1619
+ text: text,
1620
+ x0: first[:x0],
1601
1621
  x1: chars.last[:x1],
1602
- top: chars.map { |c| c[:top] }.min,
1603
- bottom: chars.map { |c| c[:bottom] }.max,
1604
- fontsize: chars.first[:fontsize],
1605
- font: chars.first[:font],
1622
+ top: top,
1623
+ bottom: bottom,
1624
+ fontsize: first[:fontsize],
1625
+ font: first[:font],
1606
1626
  chars: chars
1607
1627
  }
1608
1628
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Rpdfium
4
- VERSION = "0.4.3"
4
+ VERSION = "0.4.4"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rpdfium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Roberto Scinocca