@xdarkicex/openclaw-memory-libravdb 1.3.20 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/docs/README.md CHANGED
@@ -9,7 +9,7 @@ to preserve project history and design evolution.
9
9
  - [uninstall.md](./uninstall.md) - Clean shutdown and removal guide for the plugin, daemon, and optional local data.
10
10
  - [architecture.md](./architecture.md) - End-to-end component model, turn lifecycle, compaction flow, and degraded behavior.
11
11
  - [problem.md](./problem.md) - Technical argument for replacing the stock OpenClaw memory lifecycle in this use case.
12
- - [mathematics-v2.md](./mathematics-v2.md) - Formal reference for hybrid scoring, decay, token budgeting, Matryoshka retrieval, compaction, and planned two-pass retrieval.
12
+ - [mathematics-v2.md](./mathematics-v2.md) - Formal reference for hybrid scoring, decay, token budgeting, Matryoshka retrieval, compaction, planned two-pass retrieval, and temporal-compositional projection.
13
13
  - [compaction-evaluation.md](./compaction-evaluation.md) - Real-model benchmark notes for T5 summary confidence, Nomic-space preservation, and the hard preservation gate.
14
14
  - [continuity.md](./continuity.md) - Continuity model for invariant context, preserved recent raw session tail, and retrieved older memory.
15
15
  - [ast-v2.md](./ast-v2.md) - Reviewed authoritative AST partitioning reference for authored Markdown hard invariants, soft invariants, and variant lore.
@@ -1386,3 +1386,488 @@ retaining for future work:
1386
1386
  These ideas are intentionally preserved as future mathematics rather than
1387
1387
  current contract. The present document remains normative only for the formulas
1388
1388
  and invariants already defined above.
1389
+
1390
+ ## 9. Temporal-Compositional Retrieval Extension
1391
+
1392
+ This section defines a narrow, mathematically principled extension to the
1393
+ $\mathrm{Proj}()$ operator that corrects the single-turn-centric failure mode on
1394
+ temporal-compositional queries such as "how many days before $X$ did $Y$
1395
+ happen."
1396
+
1397
+ The extension is self-contained. Every formula in this section is bounded and
1398
+ correct under the existing parameter domains. The assembly law
1399
+ $C_{\mathrm{total}}(q)$, the budget hierarchy, and the runtime invariants in
1400
+ Section 7.10 and [`continuity.md`](./continuity.md) are unchanged. Only the
1401
+ internal definition of $\mathrm{Proj}(\mathcal{V}_{\mathrm{rest}}, q)$ is
1402
+ refined.
1403
+
1404
+ Implemented in: `src/temporal.ts` (planned).
1405
+
1406
+ ### 9.1 Motivation: The Set-Scoring Gap
1407
+
1408
+ The standard Pass-2 score $S_{\mathrm{final}}(d)$ maximizes over individual
1409
+ candidates:
1410
+
1411
+ $$
1412
+ \mathcal{C}_2(q)
1413
+ =
1414
+ \mathrm{TopK}_{d \in \mathcal{C}_1(q)}
1415
+ \left(k_2,\, S_{\mathrm{final}}(d)\right)
1416
+ $$
1417
+
1418
+ This is optimal when the query is answerable from a single best document. It
1419
+ fails when the query requires two complementary date-bearing turns to be
1420
+ jointly present, neither of which is individually the best semantic match.
1421
+
1422
+ The failure pattern is:
1423
+
1424
+ - Turn $A$ covers the query topic broadly, so it earns a high
1425
+ $S_{\mathrm{final}}$ and wins alone.
1426
+ - Turn $B$ contains the missing date anchor, but earns only a moderate
1427
+ $S_{\mathrm{final}}$ and is evicted.
1428
+ - Neither $A$ alone nor $B$ alone answers the question.
1429
+
1430
+ The fix is to move from
1431
+ $\underset{d}{\arg\max}\; S_{\mathrm{final}}(d)$ to a coverage-aware set
1432
+ selector that rewards a set of candidates for jointly maximizing semantic
1433
+ relevance, temporal anchor density, and event-slot coverage while penalizing
1434
+ redundancy automatically via marginal scoring.
1435
+
1436
+ ### 9.2 Temporal Query Indicator $\xi(q)\in[0,1]$
1437
+
1438
+ To avoid mutating the retrieval contract for normal queries, the extension
1439
+ activates only when the query is detected to be temporal-compositional.
1440
+ Define the temporal query indicator using the same saturating-sum pattern as
1441
+ $T(t)$ in [`gating.md`](./gating.md):
1442
+
1443
+ $$
1444
+ \xi(q)
1445
+ =
1446
+ \min\!\left(
1447
+ \frac{\displaystyle\sum_i s_i \cdot \mathbf{1}[\mathrm{tpat}_i(q)]}
1448
+ {\theta_{\xi}^{\mathrm{norm}}},
1449
+ 1
1450
+ \right)
1451
+ $$
1452
+
1453
+ where the shipped temporal patterns $\mathrm{tpat}_i$ are zero-allocation
1454
+ byte-lexer matches over the query text, including but not limited to
1455
+ "how many days", "how long", "before", "after", "since", "first", "earlier",
1456
+ "which came first", "when did", and "between".
1457
+
1458
+ Each pattern carries a weight $s_i > 0$. The default normalization constant is
1459
+ $\theta_{\xi}^{\mathrm{norm}} = 1.5$, so two strong temporal signals saturate
1460
+ $\xi(q)=1$.
1461
+
1462
+ By construction, the $\min(\cdot, 1)$ clamp and non-negative numerator
1463
+ guarantee:
1464
+
1465
+ $$
1466
+ \xi(q)\in[0,1]
1467
+ $$
1468
+
1469
+ If no temporal patterns match, $\xi(q)=0$ and the extension contributes
1470
+ nothing to the scoring formula.
1471
+
1472
+ The extension activates only when $\xi(q)\ge\theta_\xi$, with shipped default
1473
+ $\theta_\xi = 0.3$. Below that threshold, the standard $\mathrm{Proj}$ path
1474
+ executes without modification.
1475
+
1476
+ ### 9.3 Temporal Anchor Density $A(d)\in[0,1]$
1477
+
1478
+ A document's temporal anchor density measures how many explicit date or time
1479
+ expressions it contains, normalized by a bounded saturation constant.
1480
+ Define the anchor count over a lightweight anchor pattern set $\mathcal{P}_A$
1481
+ (ISO dates, relative day expressions, clock times, calendar words, Unix
1482
+ timestamps):
1483
+
1484
+ $$
1485
+ A(d)
1486
+ =
1487
+ \min\!\left(
1488
+ \frac{\displaystyle\sum_j \mathbf{1}[\mathrm{anch}_j(d)]}
1489
+ {\theta_A^{\mathrm{norm}}},
1490
+ 1
1491
+ \right)
1492
+ $$
1493
+
1494
+ The default $\theta_A^{\mathrm{norm}} = 3$, so three or more distinct anchor
1495
+ expressions saturate $A(d)=1$.
1496
+
1497
+ Again, the clamp guarantees:
1498
+
1499
+ $$
1500
+ A(d)\in[0,1]
1501
+ $$
1502
+
1503
+ $A(d)$ is a precomputed document-level scalar. It does not depend on the query
1504
+ and should be cached in the same document-addressed cache $\Psi$ defined in
1505
+ [`ast-v2.md`](./ast-v2.md) Section 7 alongside tier partition and budget
1506
+ metadata. The value must be recomputed whenever a stored document is created,
1507
+ updated, or regenerated by compaction.
1508
+
1509
+ ### 9.4 Event-Slot Extraction and Marginal Coverage $\Delta\Phi$
1510
+
1511
+ #### 9.4.1 Event-Slot Extraction
1512
+
1513
+ For a temporal-compositional query $q$, define the event-slot set:
1514
+
1515
+ $$
1516
+ E(q)=\langle e_1, e_2, \dots, e_m \rangle
1517
+ $$
1518
+
1519
+ where each $e_j$ is a short noun-phrase span extracted from $q$ by a
1520
+ lightweight span extractor: named entities plus the main noun phrase preceding
1521
+ and following any detected temporal-pattern word. The extractor returns at
1522
+ most $m_{\max}=4$ slots to bound cost.
1523
+
1524
+ When $|E(q)|=0$, all coverage terms evaluate to zero and the formula degrades
1525
+ cleanly.
1526
+
1527
+ #### 9.4.2 Per-Slot Coverage Indicator
1528
+
1529
+ For each slot $e_j$ and candidate document $d$, define the binary slot-match
1530
+ indicator:
1531
+
1532
+ $$
1533
+ \phi_j(d)
1534
+ =
1535
+ \mathbf{1}\!\left[\varphi(e_j)^\top \varphi(d) \ge \theta_e\right]
1536
+ \in \{0,1\}
1537
+ $$
1538
+
1539
+ where $\varphi(\cdot)$ is the same unit-normalized embedding function defined
1540
+ in Section 7.1, and $\theta_e \in [-1,1]$ is the slot-match similarity
1541
+ threshold, default $\theta_e = 0.50$.
1542
+
1543
+ #### 9.4.3 Marginal Coverage
1544
+
1545
+ For a set $\mathcal{S}$ of already-selected documents, define the marginal
1546
+ coverage of adding $d$:
1547
+
1548
+ $$
1549
+ \Delta\Phi(d, \mathcal{S}, q)
1550
+ =
1551
+ \frac{1}{\max(|E(q)|, 1)}
1552
+ \sum_{j=1}^{|E(q)|}
1553
+ \phi_j(d)
1554
+ \cdot
1555
+ \mathbf{1}\!\left[\nexists d' \in \mathcal{S} : \phi_j(d') = 1\right]
1556
+ $$
1557
+
1558
+ This is the fraction of uncovered event slots that $d$ newly covers.
1559
+
1560
+ The outer factor is in $(0,1]$, the sum counts at most $|E(q)|$ binary terms,
1561
+ and therefore:
1562
+
1563
+ $$
1564
+ \Delta\Phi(d, \mathcal{S}, q)\in[0,1]
1565
+ $$
1566
+
1567
+ The indicator
1568
+ $\mathbf{1}\!\left[\nexists d' \in \mathcal{S} : \phi_j(d') = 1\right]$
1569
+ ensures that slots already covered by a previously selected document
1570
+ contribute zero marginal gain, automatically penalizing redundant anchor turns
1571
+ without a separate explicit penalty term.
1572
+
1573
+ As $|\mathcal{S}|$ grows, $\Delta\Phi(d,\mathcal{S},q)$ is monotone
1574
+ non-increasing: new selections can only cover more slots, leaving fewer
1575
+ uncovered slots for later candidates to gain credit for.
1576
+
1577
+ ### 9.5 Coverage-Augmented Blended Score
1578
+ $S_{\mathrm{proj}}(d,\mathcal{S},q)\in[0,1]$
1579
+
1580
+ Define the coverage-augmented score for candidate $d$ given already-selected
1581
+ set $\mathcal{S}$ and query $q$:
1582
+
1583
+ $$
1584
+ S_{\mathrm{cov}}(d, \mathcal{S}, q)
1585
+ =
1586
+ \mu \cdot S_{\mathrm{final}}(d)
1587
+ + \nu \cdot A(d)
1588
+ + \rho \cdot \Delta\Phi(d, \mathcal{S}, q)
1589
+ $$
1590
+
1591
+ where:
1592
+
1593
+ $$
1594
+ \mu,\nu,\rho\in[0,1],
1595
+ \qquad
1596
+ \mu+\nu+\rho=1
1597
+ $$
1598
+
1599
+ The default shipped weights are $\mu=0.60$, $\nu=0.20$, and $\rho=0.20$.
1600
+
1601
+ Blend this with the standard score using $\xi(q)$ as an interpolation scalar:
1602
+
1603
+ $$
1604
+ S_{\mathrm{proj}}(d, \mathcal{S}, q)
1605
+ =
1606
+ (1 - \xi(q)) \cdot S_{\mathrm{final}}(d)
1607
+ + \xi(q) \cdot S_{\mathrm{cov}}(d, \mathcal{S}, q)
1608
+ $$
1609
+
1610
+ Substituting $S_{\mathrm{cov}}$ yields:
1611
+
1612
+ $$
1613
+ S_{\mathrm{proj}}
1614
+ =
1615
+ \bigl(1 - \xi(1-\mu)\bigr)\cdot S_{\mathrm{final}}
1616
+ + \xi\nu \cdot A
1617
+ + \xi\rho \cdot \Delta\Phi
1618
+ $$
1619
+
1620
+ All coefficients are non-negative, and they sum to one:
1621
+
1622
+ $$
1623
+ \bigl(1 - \xi(1-\mu)\bigr) + \xi\nu + \xi\rho
1624
+ =
1625
+ 1 - \xi + \xi\mu + \xi\nu + \xi\rho
1626
+ =
1627
+ 1 - \xi + \xi(\mu+\nu+\rho)
1628
+ =
1629
+ 1
1630
+ $$
1631
+
1632
+ Because $S_{\mathrm{final}}(d)$, $A(d)$, and
1633
+ $\Delta\Phi(d,\mathcal{S},q)$ all lie in $[0,1]$, this is a proper convex
1634
+ combination, so:
1635
+
1636
+ $$
1637
+ S_{\mathrm{proj}}(d,\mathcal{S},q)\in[0,1]
1638
+ $$
1639
+
1640
+ Degeneracy cases:
1641
+
1642
+ | Condition | Behavior |
1643
+ | --- | --- |
1644
+ | $\xi(q)=0$ | $S_{\mathrm{proj}} = S_{\mathrm{final}}(d)$; standard retrieval unchanged |
1645
+ | $\xi(q)=1$, $\nu=\rho=0$, $\mu=1$ | Explicit no-op configuration; still $S_{\mathrm{proj}} = S_{\mathrm{final}}(d)$ |
1646
+ | $|E(q)|=0$ | $\Delta\Phi=0$ for all $d$; the $\rho$ term vanishes |
1647
+ | $\mathcal{S}=\emptyset$ | $\Delta\Phi$ equals full slot-coverage fraction |
1648
+ | all slots already covered by $\mathcal{S}$ | $\Delta\Phi=0$ for all remaining $d$ |
1649
+
1650
+ Note: the greedy selector below optimizes a submodular coverage term
1651
+ $\Delta\Phi$ augmented with fixed document priors $S_{\mathrm{final}}(d)$ and
1652
+ $A(d)$. The classic $(1-1/e)$ approximation guarantee applies strictly to the
1653
+ coverage component; in practice the blended score preserves greedy usefulness
1654
+ for temporal anchor selection.
1655
+
1656
+ ### 9.6 Temporal Recovery Candidate Set
1657
+ $\mathcal{C}_{\mathrm{rec}}(q)$
1658
+
1659
+ The root cause of the observed benchmark failure is not only that documents are
1660
+ scored incorrectly; it is also that the necessary complementary anchor turn may
1661
+ never enter $\mathcal{C}_2(q)$ because its semantic similarity to the
1662
+ whole-query embedding is too low.
1663
+
1664
+ A bounded recovery pass admits anchor-rich documents below the normal Pass-1
1665
+ threshold:
1666
+
1667
+ $$
1668
+ \mathcal{C}_{\mathrm{rec}}(q)
1669
+ =
1670
+ \mathrm{TopK}_{d \in
1671
+ \left\{d' \in \mathcal{V}_{\mathrm{rest}} :
1672
+ \mathrm{sim}(q,d') \ge \theta_{\mathrm{rec}}\right\}}
1673
+ \left(k_{\mathrm{rec}},\, A(d)\right)
1674
+ \setminus \mathcal{C}_2(q)
1675
+ $$
1676
+
1677
+ where:
1678
+
1679
+ - $\theta_{\mathrm{rec}} < \theta_1$ is a looser semantic floor, default
1680
+ $\theta_{\mathrm{rec}} = 0.15$, preventing pure noise while still admitting
1681
+ anchor-heavy but semantically distant turns.
1682
+ - $k_{\mathrm{rec}}$ is a small cap, default $k_{\mathrm{rec}} = 10$, bounding
1683
+ recovery cost to $O(k_{\mathrm{rec}})$.
1684
+
1685
+ The combined candidate pool for the greedy selector is:
1686
+
1687
+ $$
1688
+ \mathcal{C}_{\mathrm{pool}}(q)
1689
+ =
1690
+ \mathcal{C}_2(q)\cup\mathcal{C}_{\mathrm{rec}}(q)
1691
+ $$
1692
+
1693
+ By construction,
1694
+ $\mathcal{C}_{\mathrm{pool}}(q)\subseteq\mathcal{V}_{\mathrm{rest}}$, so
1695
+ partition integrity is preserved.
1696
+
1697
+ ### 9.7 Greedy Coverage-Aware Selector
1698
+
1699
+ Given $\mathcal{C}_{\mathrm{pool}}(q)$, the selector builds the final chosen
1700
+ set greedily, using the same rank-then-prefix-accept spirit as the existing
1701
+ token-budget packing in Section 7.8.
1702
+
1703
+ Let $k_{\mathrm{cov}}\le k_2$ be the maximum number of anchor turns to select,
1704
+ default $k_{\mathrm{cov}}=3$.
1705
+
1706
+ Initialize:
1707
+
1708
+ $$
1709
+ \mathcal{S}_0 = \emptyset
1710
+ $$
1711
+
1712
+ For $i = 0, 1, \dots, k_{\mathrm{cov}}-1$:
1713
+
1714
+ $$
1715
+ d_i^*
1716
+ =
1717
+ \underset{d \in \mathcal{C}_{\mathrm{pool}}(q)\setminus\mathcal{S}_i}{\arg\max}
1718
+ \;
1719
+ S_{\mathrm{proj}}(d, \mathcal{S}_i, q)
1720
+ $$
1721
+
1722
+ Early stop if:
1723
+
1724
+ $$
1725
+ S_{\mathrm{proj}}(d_i^*, \mathcal{S}_i, q) < \theta_{\mathrm{stop}}
1726
+ $$
1727
+
1728
+ with default $\theta_{\mathrm{stop}}=0.10$. Otherwise:
1729
+
1730
+ $$
1731
+ \mathcal{S}_{i+1} = \mathcal{S}_i \cup \{d_i^*\}
1732
+ $$
1733
+
1734
+ The final selected set is $\mathcal{S}^*(q)$, or the earlier set at which
1735
+ early stopping triggered.
1736
+
1737
+ Each greedy step scans at most
1738
+ $|\mathcal{C}_{\mathrm{pool}}(q)| \le k_2 + k_{\mathrm{rec}}$ candidates.
1739
+ Total complexity is therefore:
1740
+
1741
+ $$
1742
+ O\!\left(k_{\mathrm{cov}} \cdot (k_2 + k_{\mathrm{rec}})\right)
1743
+ $$
1744
+
1745
+ which is negligible relative to embedding and vector-search cost.
1746
+
1747
+ ### 9.8 Modified Projection Operator
1748
+
1749
+ The temporal extension redefines $\mathrm{Proj}$ conditionally:
1750
+
1751
+ $$
1752
+ \mathrm{Proj}(\mathcal{V}_{\mathrm{rest}}, q)
1753
+ =
1754
+ \begin{cases}
1755
+ \mathcal{S}^*(q)\cup\mathcal{C}_{hop}^{*}(q)
1756
+ & \text{if } \xi(q) \ge \theta_\xi \\[4pt]
1757
+ \mathcal{C}_2(q)\cup\mathcal{C}_{hop}^{*}(q)
1758
+ & \text{otherwise}
1759
+ \end{cases}
1760
+ $$
1761
+
1762
+ The assembly law and budget equations remain unchanged:
1763
+
1764
+ $$
1765
+ C_{\mathrm{total}}(q)=\mathcal{I}_1\cup\mathcal{I}_2^{*}\cup T_{\mathrm{recent}}\cup \mathrm{Proj}(\mathcal{V}_{\mathrm{rest}}, q)
1766
+ $$
1767
+
1768
+ $$
1769
+ \tau_{\mathcal{V}}(q)
1770
+ =
1771
+ \tau-\tau_{\mathcal{I}_1}
1772
+ -\sum_{d\in\mathcal{I}_2^{*}}\mathrm{toks}(d)
1773
+ -\sum_{d\in T_{\mathrm{recent}}}\mathrm{toks}(d)
1774
+ $$
1775
+
1776
+ Documents in $\mathrm{Proj}(\mathcal{V}_{\mathrm{rest}}, q)$ are injected in
1777
+ descending $\sigma(d)$ order until $\tau_{\mathcal{V}}(q)$ is exhausted.
1778
+
1779
+ For documents entering through the temporal selector, the merged score sequence
1780
+ is extended:
1781
+
1782
+ $$
1783
+ \sigma(d)=
1784
+ \begin{cases}
1785
+ S_{\mathrm{proj}}(d, \mathcal{S}^*\setminus\{d\}, q)
1786
+ & d\in\mathcal{S}^*(q) \\
1787
+ S_{hop}(d)
1788
+ & d\in\mathcal{C}_{hop}^{*}(q)
1789
+ \end{cases}
1790
+ $$
1791
+
1792
+ For documents that were already present in $\mathcal{C}_2(q)$, the standard
1793
+ $S_{\mathrm{final}}(d)$ path remains authoritative and duplicates are excluded
1794
+ by construction.
1795
+
1796
+ ### 9.9 Preservation of Section 7.10 Runtime Invariants
1797
+
1798
+ All runtime invariants from Section 7.10 remain preserved:
1799
+
1800
+ 1. Invariant completeness is unaffected because $\mathcal{I}_1$ injection is
1801
+ independent of $\mathrm{Proj}$.
1802
+ 2. Soft invariant order preservation is unaffected because
1803
+ $\mathcal{I}_2^{*}$ is unchanged.
1804
+ 3. Partition integrity is preserved because
1805
+ $\mathcal{C}_{\mathrm{rec}}\subseteq\mathcal{V}_{\mathrm{rest}}$ and
1806
+ $\mathcal{S}^*\subseteq\mathcal{C}_{\mathrm{pool}}
1807
+ \subseteq\mathcal{V}_{\mathrm{rest}}$.
1808
+ 4. Mandatory recent-tail completeness is unaffected because
1809
+ $T_{\mathrm{base}}\subseteq T_{\mathrm{recent}}$ remains independent of
1810
+ $\mathrm{Proj}$.
1811
+ 5. Score boundedness is preserved because
1812
+ $S_{\mathrm{proj}}(d,\mathcal{S},q)\in[0,1]$.
1813
+ 6. Token budget respect is preserved because the result still flows through the
1814
+ same residual variant budget and greedy token packing contract.
1815
+ 7. Compaction boundary safety is preserved because
1816
+ $\mathcal{S}^*\subseteq\mathcal{V}_{\mathrm{rest}}$.
1817
+ 8. Hop termination is unchanged because $\mathcal{C}_{hop}^{*}(q)$ is defined
1818
+ identically.
1819
+ 9. Edge-case safety is preserved by the guards below.
1820
+
1821
+ Edge-case additions:
1822
+
1823
+ - $\mathcal{C}_{\mathrm{pool}}(q)=\emptyset$: the greedy selector returns
1824
+ $\mathcal{S}^*=\emptyset$ and $\mathrm{Proj}$ reduces to
1825
+ $\mathcal{C}_{hop}^{*}(q)$ only.
1826
+ - $|E(q)|=0$: the denominator in $\Delta\Phi$ uses $\max(|E(q)|,1)$, so no
1827
+ division by zero is possible.
1828
+ - $\xi(q)<\theta_\xi$: the conditional routes directly to the existing
1829
+ $\mathcal{C}_2(q)\cup\mathcal{C}_{hop}^{*}(q)$ behavior.
1830
+ - $\tau_{\mathcal{V}}(q)=0$: the selector may compute $\mathcal{S}^*$, but
1831
+ packing injects zero documents and the budget invariant still holds.
1832
+
1833
+ ### 9.10 Symbol Table (Section 9 Additions)
1834
+
1835
+ | Symbol | Domain | Meaning |
1836
+ | --- | --- | --- |
1837
+ | $\xi(q)$ | $[0,1]$ | Temporal-compositional query indicator |
1838
+ | $\theta_\xi$ | $(0,1)$ | Activation threshold for temporal mode |
1839
+ | $\theta_{\xi}^{\mathrm{norm}}$ | $(0,\infty)$ | Saturation normalization for $\xi$ |
1840
+ | $A(d)$ | $[0,1]$ | Temporal anchor density of document $d$ |
1841
+ | $\theta_A^{\mathrm{norm}}$ | $(0,\infty)$ | Saturation normalization for $A$ |
1842
+ | $E(q)$ | ordered tuple set | Event-slot sequence extracted from $q$ |
1843
+ | $\phi_j(d)$ | $\{0,1\}$ | Binary slot-match indicator |
1844
+ | $\theta_e$ | $[-1,1]$ | Slot-match similarity threshold |
1845
+ | $\Delta\Phi(d,\mathcal{S},q)$ | $[0,1]$ | Marginal event-slot coverage |
1846
+ | $\mu,\nu,\rho$ | $[0,1]$, sum to 1 | Coverage score weights |
1847
+ | $S_{\mathrm{cov}}(d,\mathcal{S},q)$ | $[0,1]$ | Coverage-augmented score |
1848
+ | $S_{\mathrm{proj}}(d,\mathcal{S},q)$ | $[0,1]$ | Final blended projection score |
1849
+ | $\mathcal{C}_{\mathrm{rec}}(q)$ | $\subseteq\mathcal{V}_{\mathrm{rest}}$ | Recovery candidate set |
1850
+ | $\theta_{\mathrm{rec}}$ | $[-1,1]$ | Semantic floor for recovery pass |
1851
+ | $k_{\mathrm{rec}}$ | $\mathbb{Z}_{>0}$ | Recovery set size cap |
1852
+ | $\mathcal{C}_{\mathrm{pool}}(q)$ | $\subseteq\mathcal{V}_{\mathrm{rest}}$ | Combined greedy input pool |
1853
+ | $k_{\mathrm{cov}}$ | $\mathbb{Z}_{>0}, \le k_2$ | Maximum anchor turns to select |
1854
+ | $\theta_{\mathrm{stop}}$ | $[0,1]$ | Early-stop floor for greedy selector |
1855
+ | $\mathcal{S}^*(q)$ | $\subseteq\mathcal{C}_{\mathrm{pool}}$ | Greedy-selected coverage-aware anchor set |
1856
+
1857
+ ### 9.11 Relationship to Existing Sections
1858
+
1859
+ This section is an extension, not a replacement:
1860
+
1861
+ - Section 1 hybrid score $\mathrm{score}(d)$ is unchanged and still feeds
1862
+ $S_{\mathrm{final}}(d)$ as before.
1863
+ - Section 7.5 $S_{\mathrm{final}}(d)$ is the first input to
1864
+ $S_{\mathrm{proj}}$; when $\xi(q)=0$, the two are identical.
1865
+ - Section 7.7 hop expansion $\mathcal{C}_{hop}^{*}$ is unchanged and is
1866
+ unioned with $\mathcal{S}^*$ exactly as before.
1867
+ - Section 7.8 budget arithmetic is unchanged; $\mathrm{Proj}$ is still bounded
1868
+ by $\tau_{\mathcal{V}}(q)$ and still greedy-packed.
1869
+ - [`gating.md`](./gating.md) inspired the saturating-sum pattern for $\xi(q)$,
1870
+ but the two operate on different objects and at different pipeline stages.
1871
+ - [`ast-v2.md`](./ast-v2.md) Section 7's document-addressed cache $\Psi$ should
1872
+ be extended to store the precomputed $A(d)$ value alongside existing tier and
1873
+ budget metadata.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xdarkicex/openclaw-memory-libravdb",
3
- "version": "1.3.20",
3
+ "version": "1.4.0",
4
4
  "type": "module",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -12,7 +12,13 @@ import {
12
12
  rankSection7VariantCandidates,
13
13
  } from "./scoring.js";
14
14
  import { buildInjectedMemoryMessageContent, buildMemoryHeader, recentIds } from "./recall-utils.js";
15
- import { countTokens, estimateTokens, fitPromptBudget } from "./tokens.js";
15
+ import {
16
+ decideTemporalSelectorGuard,
17
+ detectTemporalQuerySignal,
18
+ rankTemporalRecoveryCandidates,
19
+ } from "./temporal.js";
20
+ import type { TemporalRecoveryRankingResult } from "./temporal.js";
21
+ import { countTokens, estimateTokens, fitPromptBudget, fitPromptBudgetFirstFit } from "./tokens.js";
16
22
  import type { RpcGetter } from "./plugin-runtime.js";
17
23
  import type {
18
24
  ContextAssembleArgs,
@@ -57,6 +63,7 @@ export function buildContextEngineFactory(
57
63
  }
58
64
 
59
65
  return {
66
+ info: { id: "libravdb-memory" },
60
67
  ownsCompaction: true,
61
68
  async bootstrap({ sessionId, userId }: ContextBootstrapArgs) {
62
69
  const rpc = await getRpc();
@@ -190,6 +197,8 @@ export function buildContextEngineFactory(
190
197
  systemPromptAddition: "",
191
198
  } satisfies ContextAssembleResult;
192
199
  }
200
+ const temporalQuery = detectTemporalQuerySignal(queryText);
201
+ const temporalSelectorGuard = decideTemporalSelectorGuard(queryText, temporalQuery);
193
202
 
194
203
  const excluded = recentIds(messages, 4);
195
204
  const cached = recallCache.take({ userId, queryText });
@@ -253,6 +262,8 @@ export function buildContextEngineFactory(
253
262
  cached,
254
263
  excluded,
255
264
  queryText,
265
+ temporalQuery,
266
+ temporalSelectorGuard,
256
267
  sessionId,
257
268
  userId,
258
269
  messages,
@@ -287,6 +298,8 @@ export function buildContextEngineFactory(
287
298
  cached,
288
299
  excluded,
289
300
  queryText,
301
+ temporalQuery,
302
+ temporalSelectorGuard,
290
303
  sessionId,
291
304
  userId,
292
305
  messages,
@@ -303,6 +316,8 @@ export function buildContextEngineFactory(
303
316
  cached: ReturnType<RecallCache<SearchResult>["take"]>;
304
317
  excluded: string[];
305
318
  queryText: string;
319
+ temporalQuery: ReturnType<typeof detectTemporalQuerySignal>;
320
+ temporalSelectorGuard: ReturnType<typeof decideTemporalSelectorGuard>;
306
321
  sessionId: string;
307
322
  userId: string;
308
323
  messages: Array<{ role: string; content: string }>;
@@ -562,6 +577,7 @@ export function buildContextEngineFactory(
562
577
  // it never modifies the C_total(q) output and does not spend from tau_V.
563
578
  let recoveryItems: SearchResult[] = [];
564
579
  let rawUserRecoveryDebug: NonNullable<NonNullable<ContextAssembleResult["_debug"]>["rawUserRecoveryCandidates"]> = [];
580
+ let temporalRecoveryResult: TemporalRecoveryRankingResult | null = null;
565
581
  if (recoveryTrigger.fire || crossSessionRawRecovery) {
566
582
  profiler?.mark("recovery_expand");
567
583
  const recoveryExcludeIDs = [...excluded, ...recentTailIDs, ...theoremSelectedIDs];
@@ -599,14 +615,44 @@ export function buildContextEngineFactory(
599
615
  k: Math.max((cfg.topK ?? 8) * 4, 8),
600
616
  excludeIds: recoveryExcludeIDs,
601
617
  });
602
- const reranked = rankRawUserRecoveryCandidates(
603
- annotateCollection(rawUserResults.results ?? [], `turns:${userId}`),
604
- { queryText },
605
- );
618
+ const annotatedUserResults = annotateCollection(rawUserResults.results ?? [], `turns:${userId}`);
619
+ temporalRecoveryResult = temporalSelectorGuard.shouldApply
620
+ ? rankTemporalRecoveryCandidates(annotatedUserResults, {
621
+ queryText,
622
+ maxSelected: 3,
623
+ nowMs: Date.now(),
624
+ recencyLambda: cfg.recencyLambdaUser ?? 0.00001,
625
+ })
626
+ : null;
627
+ const reranked = temporalRecoveryResult
628
+ ? temporalRecoveryResult
629
+ : rankRawUserRecoveryCandidates(annotatedUserResults, { queryText });
606
630
  if (debugRecovery) {
607
631
  rawUserRecoveryDebug = reranked.debug.slice(0, 8).map((item) => ({
608
- ...item,
632
+ id: item.id,
633
+ text: item.text,
609
634
  selected: false,
635
+ tokenEstimate: estimateTokens(item.text),
636
+ temporalAnchorDensity: "temporalAnchorDensity" in item && typeof item.temporalAnchorDensity === "number"
637
+ ? item.temporalAnchorDensity
638
+ : 0,
639
+ semanticScore: "semanticScore" in item && typeof item.semanticScore === "number"
640
+ ? item.semanticScore
641
+ : 0,
642
+ slotCoverage: "slotCoverage" in item && typeof item.slotCoverage === "number"
643
+ ? item.slotCoverage
644
+ : undefined,
645
+ slotMatches: "slotMatches" in item && Array.isArray(item.slotMatches)
646
+ ? item.slotMatches
647
+ : undefined,
648
+ lexicalCoverage: "lexicalCoverage" in item && typeof item.lexicalCoverage === "number"
649
+ ? item.lexicalCoverage
650
+ : ("slotCoverage" in item && typeof item.slotCoverage === "number" ? item.slotCoverage : 0),
651
+ recencyScore: "recencyScore" in item && typeof item.recencyScore === "number"
652
+ ? item.recencyScore
653
+ : 0,
654
+ finalScore: typeof item.finalScore === "number" ? item.finalScore : 0,
655
+ rationale: typeof item.rationale === "string" ? item.rationale : "",
610
656
  }));
611
657
  }
612
658
  recoveryCandidates.push(
@@ -622,7 +668,7 @@ export function buildContextEngineFactory(
622
668
  );
623
669
  }
624
670
 
625
- const fittedRecovery = fitPromptBudget(
671
+ const fittedRecovery = fitPromptBudgetFirstFit(
626
672
  dedupeRecoveryCandidates(recoveryCandidates),
627
673
  recoveryReserveTokens,
628
674
  );
@@ -667,6 +713,13 @@ export function buildContextEngineFactory(
667
713
  ? {
668
714
  recoveryTriggerFired: recoveryTrigger.fire,
669
715
  crossSessionRawRecovery,
716
+ recoveryReserveTokens,
717
+ temporalQueryIndicator: temporalQuery.indicator,
718
+ temporalQueryActive: temporalQuery.active,
719
+ temporalQueryPatterns: temporalQuery.matchedPatterns,
720
+ temporalSelectorApplied: temporalSelectorGuard.shouldApply,
721
+ temporalSelectorReason: temporalSelectorGuard.reason,
722
+ temporalRecoverySlots: temporalRecoveryResult?.slots,
670
723
  rawUserRecoveryCandidates: rawUserRecoveryDebug,
671
724
  }
672
725
  : undefined,
package/src/scoring.ts CHANGED
@@ -1,4 +1,5 @@
1
1
  import type { SearchResult } from "./types.js";
2
+ import { getTemporalAnchorDensity } from "./temporal.js";
2
3
 
3
4
  interface HybridOptions {
4
5
  alpha?: number;
@@ -41,6 +42,7 @@ interface RawUserRecoveryOptions {
41
42
  export interface RawUserRecoveryDebugCandidate {
42
43
  id: string;
43
44
  text: string;
45
+ temporalAnchorDensity: number;
44
46
  semanticScore: number;
45
47
  lexicalCoverage: number;
46
48
  recencyScore: number;
@@ -319,17 +321,29 @@ export function rankRawUserRecoveryCandidates(
319
321
  const now = opts.nowMs ?? Date.now();
320
322
  const recencyLambda = Math.max(0, opts.recencyLambda ?? 0.00001);
321
323
  const keywords = extractKeywords(opts.queryText);
324
+ const intentPhrases = extractIntentPhrases(opts.queryText);
322
325
 
323
326
  const ranked = items
324
327
  .map((item) => {
325
328
  const semanticScore = clamp01(typeof item.score === "number" ? item.score : 0);
326
329
  const lexicalCoverage = normalizedKeywordCoverage(keywords, item.text);
327
330
  const recencyScore = computeRecencyScore(item, now, recencyLambda);
328
- const finalScore = clamp01((0.30 * semanticScore) + (0.60 * lexicalCoverage) + (0.10 * recencyScore));
331
+ const temporalAnchorDensity = getTemporalAnchorDensity(
332
+ `${typeof item.metadata.collection === "string" ? item.metadata.collection : "unknown"}::${item.id}`,
333
+ item.text,
334
+ );
335
+ const intentAlignmentBonus = computeIntentAlignmentBonus(item.text, intentPhrases);
336
+ const finalScore = clamp01(
337
+ (0.30 * semanticScore) +
338
+ (0.60 * lexicalCoverage) +
339
+ (0.10 * recencyScore) +
340
+ intentAlignmentBonus,
341
+ );
329
342
  const rationale = buildRawUserRecoveryRationale({
330
343
  semanticScore,
331
344
  lexicalCoverage,
332
345
  recencyScore,
346
+ intentAlignmentBonus,
333
347
  });
334
348
 
335
349
  return {
@@ -340,6 +354,7 @@ export function rankRawUserRecoveryCandidates(
340
354
  debug: {
341
355
  id: item.id,
342
356
  text: item.text,
357
+ temporalAnchorDensity,
343
358
  semanticScore,
344
359
  lexicalCoverage,
345
360
  recencyScore,
@@ -473,7 +488,11 @@ function buildRawUserRecoveryRationale(scores: {
473
488
  semanticScore: number;
474
489
  lexicalCoverage: number;
475
490
  recencyScore: number;
491
+ intentAlignmentBonus: number;
476
492
  }): string {
493
+ if (scores.intentAlignmentBonus >= 0.04) {
494
+ return "intent phrase overlap lifted this candidate toward the query's direct ask";
495
+ }
477
496
  const lexicalDelta = scores.lexicalCoverage - scores.semanticScore;
478
497
  if (lexicalDelta > 0.15) {
479
498
  return "lexical coverage lifted this candidate above its semantic score";
@@ -487,6 +506,79 @@ function buildRawUserRecoveryRationale(scores: {
487
506
  return "semantic and lexical scores were balanced";
488
507
  }
489
508
 
509
+ function computeIntentAlignmentBonus(text: string, intentPhrases: string[]): number {
510
+ if (intentPhrases.length === 0) {
511
+ return 0;
512
+ }
513
+ const normalized = normalizeTextForPhraseMatch(text);
514
+ const matched = intentPhrases.filter((phrase) => normalized.includes(phrase)).length;
515
+ if (matched === 0) {
516
+ return 0;
517
+ }
518
+ return Math.min(0.08, matched * 0.02);
519
+ }
520
+
521
+ function extractIntentPhrases(text: string): string[] {
522
+ const terms = normalizeTerms(text).filter((term) => !INTENT_STOPWORDS.has(term));
523
+ const phrases: string[] = [];
524
+ for (let size = 4; size >= 2; size -= 1) {
525
+ for (let i = 0; i <= terms.length - size; i += 1) {
526
+ const phraseTerms = terms.slice(i, i + size);
527
+ if (phraseTerms.some((term) => term.length < 3)) {
528
+ continue;
529
+ }
530
+ const phrase = phraseTerms.join(" ");
531
+ if (!phrases.includes(phrase)) {
532
+ phrases.push(phrase);
533
+ }
534
+ }
535
+ }
536
+ return phrases.slice(0, 12);
537
+ }
538
+
539
+ function normalizeTextForPhraseMatch(text: string): string {
540
+ return normalizeTerms(text).join(" ");
541
+ }
542
+
543
+ const INTENT_STOPWORDS = new Set([
544
+ "the",
545
+ "and",
546
+ "for",
547
+ "with",
548
+ "that",
549
+ "this",
550
+ "have",
551
+ "from",
552
+ "your",
553
+ "what",
554
+ "when",
555
+ "where",
556
+ "which",
557
+ "would",
558
+ "could",
559
+ "should",
560
+ "about",
561
+ "into",
562
+ "some",
563
+ "before",
564
+ "after",
565
+ "them",
566
+ "they",
567
+ "been",
568
+ "just",
569
+ "want",
570
+ "looking",
571
+ "look",
572
+ "help",
573
+ "need",
574
+ "recommend",
575
+ "suggestions",
576
+ "suggest",
577
+ "advice",
578
+ "think",
579
+ "also",
580
+ ]);
581
+
490
582
  function extractKeywords(text: string): string[] {
491
583
  const tokens = normalizeTerms(text);
492
584
  const seen = new Set<string>();
package/src/sidecar.ts CHANGED
@@ -1,3 +1,4 @@
1
+ import fs from "node:fs";
1
2
  import net from "node:net";
2
3
  import os from "node:os";
3
4
  import path from "node:path";
@@ -268,13 +269,42 @@ export function daemonProvisioningHint(): string {
268
269
  }
269
270
 
270
271
  export function defaultEndpoint(platform = process.platform, homeDir = os.homedir()): string {
272
+ // Honour the daemon's own env var first (set by Homebrew LaunchAgent / systemd unit).
273
+ const envEndpoint = process.env.LIBRAVDB_RPC_ENDPOINT?.trim();
274
+ if (envEndpoint && isConfiguredEndpoint(envEndpoint)) {
275
+ return envEndpoint;
276
+ }
277
+
271
278
  if (platform === "win32") {
272
279
  return "tcp:127.0.0.1:37421";
273
280
  }
281
+
282
+ const sockName = "libravdb.sock";
283
+ const candidateDirs = [
284
+ // User-local (npm plugin convention)
285
+ homeDir?.trim() ? path.join(homeDir, ".clawdb", "run") : null,
286
+ // Homebrew (Apple Silicon) — matches the Homebrew formula LaunchAgent
287
+ "/opt/homebrew/var/clawdb/run",
288
+ // Homebrew (Intel Mac) / manual Linux installs
289
+ "/usr/local/var/clawdb/run",
290
+ ].filter((d): d is string => d !== null);
291
+
292
+ for (const dir of candidateDirs) {
293
+ const sockPath = path.join(dir, sockName);
294
+ try {
295
+ if (fs.existsSync(sockPath)) {
296
+ return `unix:${sockPath}`;
297
+ }
298
+ } catch {
299
+ // Permission error or similar — skip this candidate.
300
+ }
301
+ }
302
+
303
+ // Fallback to the original user-local path so error messages stay familiar.
274
304
  const baseDir = homeDir?.trim()
275
305
  ? path.join(homeDir, ".clawdb", "run")
276
306
  : path.join(".", ".clawdb", "run");
277
- return `unix:${path.join(baseDir, "libravdb.sock")}`;
307
+ return `unix:${path.join(baseDir, sockName)}`;
278
308
  }
279
309
 
280
310
  export function buildSidecarEnv(cfg: PluginConfig): Record<string, string> {
@@ -0,0 +1,433 @@
1
+ import type { SearchResult } from "./types.js";
2
+
3
+ const TEMPORAL_PATTERN_WEIGHTS: Array<{ label: string; weight: number; patterns: RegExp[] }> = [
4
+ {
5
+ label: "how many days",
6
+ weight: 1.0,
7
+ patterns: [/\bhow\s+many\s+days\b/i],
8
+ },
9
+ {
10
+ label: "how long",
11
+ weight: 0.9,
12
+ patterns: [/\bhow\s+long\b/i],
13
+ },
14
+ {
15
+ label: "before or after",
16
+ weight: 0.8,
17
+ patterns: [/\bbefore\b/i, /\bafter\b/i],
18
+ },
19
+ {
20
+ label: "since or between",
21
+ weight: 0.7,
22
+ patterns: [/\bsince\b/i, /\bbetween\b/i],
23
+ },
24
+ {
25
+ label: "first or earlier",
26
+ weight: 0.8,
27
+ patterns: [/\bfirst\b/i, /\bearlier\b/i, /\bwhich\s+came\s+first\b/i],
28
+ },
29
+ {
30
+ label: "when did",
31
+ weight: 0.7,
32
+ patterns: [/\bwhen\s+did\b/i],
33
+ },
34
+ ];
35
+
36
+ const TEMPORAL_ANCHOR_PATTERNS: RegExp[] = [
37
+ /\b\d{4}-\d{2}-\d{2}\b/g,
38
+ /\b\d{1,2}\/\d{1,2}(?:\/\d{2,4})?\b/g,
39
+ /\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s+\d{1,2}(?:st|nd|rd|th)?(?:,\s*\d{4})?\b/gi,
40
+ /\b(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/gi,
41
+ /\b(?:today|yesterday|tomorrow|last\s+(?:week|month|year|night|saturday|sunday)|next\s+(?:week|month|year|monday|tuesday|wednesday|thursday|friday|saturday|sunday)|mid-?[a-z]+)\b/gi,
42
+ /\b\d{1,2}:\d{2}(?:\s?[ap]m)?\b/gi,
43
+ /\b\d{10,13}\b/g,
44
+ ];
45
+
46
+ const TEMPORAL_XI_NORM = 1.5;
47
+ const TEMPORAL_XI_THRESHOLD = 0.3;
48
+ const TEMPORAL_ANCHOR_NORM = 3;
49
+ const TEMPORAL_ANCHOR_CACHE_MAX = 4096;
50
+ const temporalAnchorCache = new Map<string, number>();
51
+
52
+ const TEMPORAL_SLOT_STOPWORDS = new Set([
53
+ "the",
54
+ "and",
55
+ "for",
56
+ "with",
57
+ "that",
58
+ "this",
59
+ "have",
60
+ "from",
61
+ "your",
62
+ "what",
63
+ "when",
64
+ "where",
65
+ "which",
66
+ "would",
67
+ "could",
68
+ "should",
69
+ "about",
70
+ "into",
71
+ "some",
72
+ "them",
73
+ "they",
74
+ "been",
75
+ "just",
76
+ "want",
77
+ "looking",
78
+ "look",
79
+ "help",
80
+ "need",
81
+ "recommend",
82
+ "suggestions",
83
+ "suggest",
84
+ "advice",
85
+ "think",
86
+ "also",
87
+ "did",
88
+ "does",
89
+ "do",
90
+ "after",
91
+ "before",
92
+ "since",
93
+ "between",
94
+ "first",
95
+ "earlier",
96
+ "many",
97
+ "days",
98
+ "long",
99
+ "how",
100
+ "did",
101
+ "take",
102
+ "took",
103
+ "it",
104
+ "me",
105
+ "my",
106
+ "i",
107
+ ]);
108
+
109
+ export interface TemporalQuerySignal {
110
+ indicator: number;
111
+ active: boolean;
112
+ matchedPatterns: string[];
113
+ }
114
+
115
+ export interface TemporalSelectorGuardDecision {
116
+ shouldApply: boolean;
117
+ slots: string[];
118
+ reason: string;
119
+ }
120
+
121
+ export interface TemporalRecoveryDebugCandidate {
122
+ id: string;
123
+ text: string;
124
+ selected: boolean;
125
+ temporalAnchorDensity: number;
126
+ semanticScore: number;
127
+ recencyScore: number;
128
+ slotCoverage: number;
129
+ slotMatches: string[];
130
+ finalScore: number;
131
+ rationale: string;
132
+ }
133
+
134
+ export interface TemporalRecoveryRankingResult {
135
+ ranked: SearchResult[];
136
+ debug: TemporalRecoveryDebugCandidate[];
137
+ temporalQuery: TemporalQuerySignal;
138
+ slots: string[];
139
+ }
140
+
141
+ export function detectTemporalQuerySignal(queryText: string): TemporalQuerySignal {
142
+ const matchedPatterns: string[] = [];
143
+ let weightedMatches = 0;
144
+
145
+ for (const entry of TEMPORAL_PATTERN_WEIGHTS) {
146
+ if (entry.patterns.some((pattern) => pattern.test(queryText))) {
147
+ matchedPatterns.push(entry.label);
148
+ weightedMatches += entry.weight;
149
+ }
150
+ }
151
+
152
+ const indicator = clamp01(weightedMatches / TEMPORAL_XI_NORM);
153
+ return {
154
+ indicator,
155
+ active: indicator >= TEMPORAL_XI_THRESHOLD,
156
+ matchedPatterns,
157
+ };
158
+ }
159
+
160
+ export function getTemporalAnchorDensity(docKey: string, text: string): number {
161
+ const cacheKey = `${docKey}\n${text}`;
162
+ const cached = temporalAnchorCache.get(cacheKey);
163
+ if (typeof cached === "number") {
164
+ touchTemporalAnchorCache(cacheKey, cached);
165
+ return cached;
166
+ }
167
+
168
+ const uniqueMatches = new Set<string>();
169
+ for (const pattern of TEMPORAL_ANCHOR_PATTERNS) {
170
+ for (const match of text.matchAll(pattern)) {
171
+ const value = match[0]?.trim().toLowerCase();
172
+ if (value) {
173
+ uniqueMatches.add(value);
174
+ }
175
+ }
176
+ }
177
+
178
+ const density = clamp01(uniqueMatches.size / TEMPORAL_ANCHOR_NORM);
179
+ touchTemporalAnchorCache(cacheKey, density);
180
+ return density;
181
+ }
182
+
183
+ export function rankTemporalRecoveryCandidates(
184
+ items: SearchResult[],
185
+ opts: {
186
+ queryText: string;
187
+ maxSelected?: number;
188
+ nowMs?: number;
189
+ recencyLambda?: number;
190
+ },
191
+ ): TemporalRecoveryRankingResult {
192
+ const temporalQuery = detectTemporalQuerySignal(opts.queryText);
193
+ const slots = extractTemporalSlots(opts.queryText);
194
+ const recencyLambda = Math.max(0, opts.recencyLambda ?? 0.00001);
195
+ const now = opts.nowMs ?? Date.now();
196
+ const maxSelected = Math.max(1, Math.floor(opts.maxSelected ?? 3));
197
+
198
+ const decorated = items.map((item) => {
199
+ const semanticScore = clamp01(typeof item.finalScore === "number" ? item.finalScore : item.score ?? 0);
200
+ const recencyScore = computeRecencyScore(item, now, recencyLambda);
201
+ const temporalAnchorDensity = getTemporalAnchorDensity(
202
+ `${typeof item.metadata.collection === "string" ? item.metadata.collection : "unknown"}::${item.id}`,
203
+ item.text,
204
+ );
205
+ const { coverage, matches } = computeSlotCoverage(slots, item.text);
206
+ const finalScore = clamp01(
207
+ (0.40 * semanticScore) +
208
+ (0.25 * recencyScore) +
209
+ (0.20 * temporalAnchorDensity) +
210
+ (0.15 * coverage) +
211
+ (temporalQuery.active ? 0.05 : 0),
212
+ );
213
+ return {
214
+ item,
215
+ semanticScore,
216
+ recencyScore,
217
+ temporalAnchorDensity,
218
+ slotCoverage: coverage,
219
+ slotMatches: matches,
220
+ finalScore,
221
+ };
222
+ });
223
+
224
+ const selectedIDs = new Set<string>();
225
+ const coveredSlots = new Set<string>();
226
+ const selected: SearchResult[] = [];
227
+
228
+ for (let pass = 0; pass < maxSelected; pass += 1) {
229
+ let best: (typeof decorated)[number] | null = null;
230
+ let bestScore = Number.NEGATIVE_INFINITY;
231
+
232
+ for (const candidate of decorated) {
233
+ if (selectedIDs.has(candidate.item.id)) {
234
+ continue;
235
+ }
236
+ const marginalCoverage = candidate.slotMatches.filter((slot) => !coveredSlots.has(slot)).length / Math.max(1, slots.length);
237
+ const combined = candidate.finalScore + (0.25 * marginalCoverage);
238
+ if (combined > bestScore) {
239
+ best = candidate;
240
+ bestScore = combined;
241
+ }
242
+ }
243
+
244
+ if (!best || bestScore < 0.12) {
245
+ break;
246
+ }
247
+
248
+ selectedIDs.add(best.item.id);
249
+ for (const slot of best.slotMatches) {
250
+ coveredSlots.add(slot);
251
+ }
252
+ selected.push({
253
+ ...best.item,
254
+ finalScore: best.finalScore,
255
+ });
256
+ }
257
+
258
+ const remaining = decorated
259
+ .filter((candidate) => !selectedIDs.has(candidate.item.id))
260
+ .sort((left, right) => right.finalScore - left.finalScore)
261
+ .map((candidate) => ({
262
+ ...candidate.item,
263
+ finalScore: candidate.finalScore,
264
+ }));
265
+
266
+ const ranked = [...selected, ...remaining];
267
+ const debug = decorated
268
+ .sort((left, right) => right.finalScore - left.finalScore)
269
+ .map((candidate) => ({
270
+ id: candidate.item.id,
271
+ text: candidate.item.text,
272
+ selected: selectedIDs.has(candidate.item.id),
273
+ temporalAnchorDensity: candidate.temporalAnchorDensity,
274
+ semanticScore: candidate.semanticScore,
275
+ recencyScore: candidate.recencyScore,
276
+ slotCoverage: candidate.slotCoverage,
277
+ slotMatches: candidate.slotMatches,
278
+ finalScore: candidate.finalScore,
279
+ rationale: buildTemporalRecoveryRationale(candidate.slotCoverage, candidate.temporalAnchorDensity, candidate.semanticScore),
280
+ }));
281
+
282
+ return { ranked, debug, temporalQuery, slots };
283
+ }
284
+
285
+ export function decideTemporalSelectorGuard(
286
+ queryText: string,
287
+ temporalQuery: TemporalQuerySignal = detectTemporalQuerySignal(queryText),
288
+ ): TemporalSelectorGuardDecision {
289
+ const slots = extractTemporalSlots(queryText);
290
+ if (!temporalQuery.active) {
291
+ return {
292
+ shouldApply: false,
293
+ slots,
294
+ reason: "temporal query gate inactive",
295
+ };
296
+ }
297
+
298
+ const strongCompositionalPattern = temporalQuery.matchedPatterns.some((pattern) =>
299
+ pattern === "how many days" ||
300
+ pattern === "how long" ||
301
+ pattern === "before or after" ||
302
+ pattern === "since or between"
303
+ );
304
+ if (!strongCompositionalPattern) {
305
+ return {
306
+ shouldApply: false,
307
+ slots,
308
+ reason: "query lacks strong compositional temporal pattern",
309
+ };
310
+ }
311
+
312
+ if (slots.length !== 2) {
313
+ return {
314
+ shouldApply: false,
315
+ slots,
316
+ reason: "query did not resolve to exactly two temporal slots",
317
+ };
318
+ }
319
+
320
+ return {
321
+ shouldApply: true,
322
+ slots,
323
+ reason: "strong temporal query with two-slot decomposition",
324
+ };
325
+ }
326
+
327
+ export function resetTemporalCachesForTest(): void {
328
+ temporalAnchorCache.clear();
329
+ }
330
+
331
+ function extractTemporalSlots(text: string): string[] {
332
+ const clauses = text
333
+ .split(/(?:\bafter\b|\bbefore\b|\bbetween\b|\bor\b|\band\b|\bthen\b|[?.!,;]+)/i)
334
+ .map((part) => part.trim())
335
+ .filter((part) => part.length > 0);
336
+ const slots = new Set<string>();
337
+
338
+ for (const clause of clauses) {
339
+ const terms = normalizeTerms(clause)
340
+ .filter((term) => term.length >= 3 && !TEMPORAL_SLOT_STOPWORDS.has(term));
341
+ if (terms.length === 0) {
342
+ continue;
343
+ }
344
+ if (terms.length <= 3) {
345
+ slots.add(terms.join(" "));
346
+ continue;
347
+ }
348
+ slots.add(terms.slice(0, 4).join(" "));
349
+ slots.add(terms.slice(-4).join(" "));
350
+ }
351
+
352
+ if (slots.size === 0) {
353
+ const fallback = normalizeTerms(text).filter((term) => term.length >= 3 && !TEMPORAL_SLOT_STOPWORDS.has(term));
354
+ if (fallback.length > 0) {
355
+ slots.add(fallback.slice(0, 4).join(" "));
356
+ }
357
+ }
358
+
359
+ return [...slots].slice(0, 4);
360
+ }
361
+
362
+ function computeSlotCoverage(slots: string[], candidateText: string): { coverage: number; matches: string[] } {
363
+ if (slots.length === 0) {
364
+ return { coverage: 0, matches: [] };
365
+ }
366
+
367
+ const candidateTerms = new Set(normalizeTerms(candidateText));
368
+ const matches: string[] = [];
369
+ let covered = 0;
370
+
371
+ for (const slot of slots) {
372
+ const slotTerms = normalizeTerms(slot).filter((term) => term.length >= 3);
373
+ if (slotTerms.length === 0) {
374
+ continue;
375
+ }
376
+ const overlap = slotTerms.filter((term) => candidateTerms.has(term)).length / slotTerms.length;
377
+ if (overlap >= 0.5) {
378
+ covered += 1;
379
+ matches.push(slot);
380
+ }
381
+ }
382
+
383
+ return {
384
+ coverage: covered / Math.max(1, slots.length),
385
+ matches,
386
+ };
387
+ }
388
+
389
+ function buildTemporalRecoveryRationale(slotCoverage: number, anchorDensity: number, semanticScore: number): string {
390
+ if (slotCoverage >= 0.5 && anchorDensity >= 0.5) {
391
+ return "slot coverage and temporal anchors both supported this candidate";
392
+ }
393
+ if (slotCoverage >= 0.5) {
394
+ return "slot coverage lifted this candidate toward the query's subevents";
395
+ }
396
+ if (anchorDensity >= 0.5) {
397
+ return "temporal anchors lifted this candidate toward the query's date logic";
398
+ }
399
+ if (semanticScore >= 0.6) {
400
+ return "semantic similarity kept this candidate in the temporal pool";
401
+ }
402
+ return "candidate remained in the bounded temporal recovery pool";
403
+ }
404
+
405
+ function computeRecencyScore(item: SearchResult, now: number, recencyLambda: number): number {
406
+ const ts = typeof item.metadata.ts === "number" ? item.metadata.ts : now;
407
+ const ageSeconds = Math.max(0, now - ts) / 1000;
408
+ return Math.exp(-recencyLambda * ageSeconds);
409
+ }
410
+
411
+ function normalizeTerms(text: string): string[] {
412
+ return text
413
+ .toLowerCase()
414
+ .split(/[^a-z0-9_]+/i)
415
+ .filter((term) => term.length > 0);
416
+ }
417
+
418
+ function touchTemporalAnchorCache(cacheKey: string, value: number): void {
419
+ if (temporalAnchorCache.has(cacheKey)) {
420
+ temporalAnchorCache.delete(cacheKey);
421
+ }
422
+ temporalAnchorCache.set(cacheKey, value);
423
+ if (temporalAnchorCache.size > TEMPORAL_ANCHOR_CACHE_MAX) {
424
+ const oldestKey = temporalAnchorCache.keys().next().value;
425
+ if (typeof oldestKey === "string") {
426
+ temporalAnchorCache.delete(oldestKey);
427
+ }
428
+ }
429
+ }
430
+
431
+ function clamp01(value: number): number {
432
+ return Math.min(1, Math.max(0, value));
433
+ }
package/src/tokens.ts CHANGED
@@ -21,6 +21,22 @@ export function fitPromptBudget(items: SearchResult[], budget: number): SearchRe
21
21
  return selected;
22
22
  }
23
23
 
24
+ export function fitPromptBudgetFirstFit(items: SearchResult[], budget: number): SearchResult[] {
25
+ const selected: SearchResult[] = [];
26
+ let used = 0;
27
+
28
+ for (const item of items) {
29
+ const cost = estimateTokens(item.text);
30
+ if (used + cost > budget) {
31
+ continue;
32
+ }
33
+ selected.push(item);
34
+ used += cost;
35
+ }
36
+
37
+ return selected;
38
+ }
39
+
24
40
  export function countTokens(messages: Array<{ content: string }>): number {
25
41
  return messages.reduce((sum, msg) => sum + estimateTokens(msg.content), 0);
26
42
  }
package/src/types.ts CHANGED
@@ -203,12 +203,23 @@ export interface ContextAssembleResult {
203
203
  id: string;
204
204
  text: string;
205
205
  selected: boolean;
206
+ tokenEstimate: number;
207
+ temporalAnchorDensity: number;
206
208
  semanticScore: number;
209
+ slotCoverage?: number;
210
+ slotMatches?: string[];
207
211
  lexicalCoverage: number;
208
212
  recencyScore: number;
209
213
  finalScore: number;
210
214
  rationale: string;
211
215
  }>;
216
+ recoveryReserveTokens?: number;
217
+ temporalQueryIndicator?: number;
218
+ temporalQueryActive?: boolean;
219
+ temporalQueryPatterns?: string[];
220
+ temporalSelectorApplied?: boolean;
221
+ temporalSelectorReason?: string;
222
+ temporalRecoverySlots?: string[];
212
223
  };
213
224
  }
214
225