@xdarkicex/openclaw-memory-libravdb 1.3.20 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/README.md +1 -1
- package/docs/mathematics-v2.md +485 -0
- package/package.json +1 -1
- package/src/context-engine.ts +60 -7
- package/src/scoring.ts +93 -1
- package/src/sidecar.ts +31 -1
- package/src/temporal.ts +433 -0
- package/src/tokens.ts +16 -0
- package/src/types.ts +11 -0
package/docs/README.md
CHANGED
|
@@ -9,7 +9,7 @@ to preserve project history and design evolution.
|
|
|
9
9
|
- [uninstall.md](./uninstall.md) - Clean shutdown and removal guide for the plugin, daemon, and optional local data.
|
|
10
10
|
- [architecture.md](./architecture.md) - End-to-end component model, turn lifecycle, compaction flow, and degraded behavior.
|
|
11
11
|
- [problem.md](./problem.md) - Technical argument for replacing the stock OpenClaw memory lifecycle in this use case.
|
|
12
|
-
- [mathematics-v2.md](./mathematics-v2.md) - Formal reference for hybrid scoring, decay, token budgeting, Matryoshka retrieval, compaction,
|
|
12
|
+
- [mathematics-v2.md](./mathematics-v2.md) - Formal reference for hybrid scoring, decay, token budgeting, Matryoshka retrieval, compaction, planned two-pass retrieval, and temporal-compositional projection.
|
|
13
13
|
- [compaction-evaluation.md](./compaction-evaluation.md) - Real-model benchmark notes for T5 summary confidence, Nomic-space preservation, and the hard preservation gate.
|
|
14
14
|
- [continuity.md](./continuity.md) - Continuity model for invariant context, preserved recent raw session tail, and retrieved older memory.
|
|
15
15
|
- [ast-v2.md](./ast-v2.md) - Reviewed authoritative AST partitioning reference for authored Markdown hard invariants, soft invariants, and variant lore.
|
package/docs/mathematics-v2.md
CHANGED
|
@@ -1386,3 +1386,488 @@ retaining for future work:
|
|
|
1386
1386
|
These ideas are intentionally preserved as future mathematics rather than
|
|
1387
1387
|
current contract. The present document remains normative only for the formulas
|
|
1388
1388
|
and invariants already defined above.
|
|
1389
|
+
|
|
1390
|
+
## 9. Temporal-Compositional Retrieval Extension
|
|
1391
|
+
|
|
1392
|
+
This section defines a narrow, mathematically principled extension to the
|
|
1393
|
+
$\mathrm{Proj}()$ operator that corrects the single-turn-centric failure mode on
|
|
1394
|
+
temporal-compositional queries such as "how many days before $X$ did $Y$
|
|
1395
|
+
happen."
|
|
1396
|
+
|
|
1397
|
+
The extension is self-contained. Every formula in this section is bounded and
|
|
1398
|
+
correct under the existing parameter domains. The assembly law
|
|
1399
|
+
$C_{\mathrm{total}}(q)$, the budget hierarchy, and the runtime invariants in
|
|
1400
|
+
Section 7.10 and [`continuity.md`](./continuity.md) are unchanged. Only the
|
|
1401
|
+
internal definition of $\mathrm{Proj}(\mathcal{V}_{\mathrm{rest}}, q)$ is
|
|
1402
|
+
refined.
|
|
1403
|
+
|
|
1404
|
+
Implemented in: `src/temporal.ts` (planned).
|
|
1405
|
+
|
|
1406
|
+
### 9.1 Motivation: The Set-Scoring Gap
|
|
1407
|
+
|
|
1408
|
+
The standard Pass-2 score $S_{\mathrm{final}}(d)$ maximizes over individual
|
|
1409
|
+
candidates:
|
|
1410
|
+
|
|
1411
|
+
$$
|
|
1412
|
+
\mathcal{C}_2(q)
|
|
1413
|
+
=
|
|
1414
|
+
\mathrm{TopK}_{d \in \mathcal{C}_1(q)}
|
|
1415
|
+
\left(k_2,\, S_{\mathrm{final}}(d)\right)
|
|
1416
|
+
$$
|
|
1417
|
+
|
|
1418
|
+
This is optimal when the query is answerable from a single best document. It
|
|
1419
|
+
fails when the query requires two complementary date-bearing turns to be
|
|
1420
|
+
jointly present, neither of which is individually the best semantic match.
|
|
1421
|
+
|
|
1422
|
+
The failure pattern is:
|
|
1423
|
+
|
|
1424
|
+
- Turn $A$ covers the query topic broadly, so it earns a high
|
|
1425
|
+
$S_{\mathrm{final}}$ and wins alone.
|
|
1426
|
+
- Turn $B$ contains the missing date anchor, but earns only a moderate
|
|
1427
|
+
$S_{\mathrm{final}}$ and is evicted.
|
|
1428
|
+
- Neither $A$ alone nor $B$ alone answers the question.
|
|
1429
|
+
|
|
1430
|
+
The fix is to move from
|
|
1431
|
+
$\underset{d}{\arg\max}\; S_{\mathrm{final}}(d)$ to a coverage-aware set
|
|
1432
|
+
selector that rewards a set of candidates for jointly maximizing semantic
|
|
1433
|
+
relevance, temporal anchor density, and event-slot coverage while penalizing
|
|
1434
|
+
redundancy automatically via marginal scoring.
|
|
1435
|
+
|
|
1436
|
+
### 9.2 Temporal Query Indicator $\xi(q)\in[0,1]$
|
|
1437
|
+
|
|
1438
|
+
To avoid mutating the retrieval contract for normal queries, the extension
|
|
1439
|
+
activates only when the query is detected to be temporal-compositional.
|
|
1440
|
+
Define the temporal query indicator using the same saturating-sum pattern as
|
|
1441
|
+
$T(t)$ in [`gating.md`](./gating.md):
|
|
1442
|
+
|
|
1443
|
+
$$
|
|
1444
|
+
\xi(q)
|
|
1445
|
+
=
|
|
1446
|
+
\min\!\left(
|
|
1447
|
+
\frac{\displaystyle\sum_i s_i \cdot \mathbf{1}[\mathrm{tpat}_i(q)]}
|
|
1448
|
+
{\theta_{\xi}^{\mathrm{norm}}},
|
|
1449
|
+
1
|
|
1450
|
+
\right)
|
|
1451
|
+
$$
|
|
1452
|
+
|
|
1453
|
+
where the shipped temporal patterns $\mathrm{tpat}_i$ are zero-allocation
|
|
1454
|
+
byte-lexer matches over the query text, including but not limited to
|
|
1455
|
+
"how many days", "how long", "before", "after", "since", "first", "earlier",
|
|
1456
|
+
"which came first", "when did", and "between".
|
|
1457
|
+
|
|
1458
|
+
Each pattern carries a weight $s_i > 0$. The default normalization constant is
|
|
1459
|
+
$\theta_{\xi}^{\mathrm{norm}} = 1.5$, so two strong temporal signals saturate
|
|
1460
|
+
$\xi(q)=1$.
|
|
1461
|
+
|
|
1462
|
+
By construction, the $\min(\cdot, 1)$ clamp and non-negative numerator
|
|
1463
|
+
guarantee:
|
|
1464
|
+
|
|
1465
|
+
$$
|
|
1466
|
+
\xi(q)\in[0,1]
|
|
1467
|
+
$$
|
|
1468
|
+
|
|
1469
|
+
If no temporal patterns match, $\xi(q)=0$ and the extension contributes
|
|
1470
|
+
nothing to the scoring formula.
|
|
1471
|
+
|
|
1472
|
+
The extension activates only when $\xi(q)\ge\theta_\xi$, with shipped default
|
|
1473
|
+
$\theta_\xi = 0.3$. Below that threshold, the standard $\mathrm{Proj}$ path
|
|
1474
|
+
executes without modification.
|
|
1475
|
+
|
|
1476
|
+
### 9.3 Temporal Anchor Density $A(d)\in[0,1]$
|
|
1477
|
+
|
|
1478
|
+
A document's temporal anchor density measures how many explicit date or time
|
|
1479
|
+
expressions it contains, normalized by a bounded saturation constant.
|
|
1480
|
+
Define the anchor count over a lightweight anchor pattern set $\mathcal{P}_A$
|
|
1481
|
+
(ISO dates, relative day expressions, clock times, calendar words, Unix
|
|
1482
|
+
timestamps):
|
|
1483
|
+
|
|
1484
|
+
$$
|
|
1485
|
+
A(d)
|
|
1486
|
+
=
|
|
1487
|
+
\min\!\left(
|
|
1488
|
+
\frac{\displaystyle\sum_j \mathbf{1}[\mathrm{anch}_j(d)]}
|
|
1489
|
+
{\theta_A^{\mathrm{norm}}},
|
|
1490
|
+
1
|
|
1491
|
+
\right)
|
|
1492
|
+
$$
|
|
1493
|
+
|
|
1494
|
+
The default $\theta_A^{\mathrm{norm}} = 3$, so three or more distinct anchor
|
|
1495
|
+
expressions saturate $A(d)=1$.
|
|
1496
|
+
|
|
1497
|
+
Again, the clamp guarantees:
|
|
1498
|
+
|
|
1499
|
+
$$
|
|
1500
|
+
A(d)\in[0,1]
|
|
1501
|
+
$$
|
|
1502
|
+
|
|
1503
|
+
$A(d)$ is a precomputed document-level scalar. It does not depend on the query
|
|
1504
|
+
and should be cached in the same document-addressed cache $\Psi$ defined in
|
|
1505
|
+
[`ast-v2.md`](./ast-v2.md) Section 7 alongside tier partition and budget
|
|
1506
|
+
metadata. The value must be recomputed whenever a stored document is created,
|
|
1507
|
+
updated, or regenerated by compaction.
|
|
1508
|
+
|
|
1509
|
+
### 9.4 Event-Slot Extraction and Marginal Coverage $\Delta\Phi$
|
|
1510
|
+
|
|
1511
|
+
#### 9.4.1 Event-Slot Extraction
|
|
1512
|
+
|
|
1513
|
+
For a temporal-compositional query $q$, define the event-slot set:
|
|
1514
|
+
|
|
1515
|
+
$$
|
|
1516
|
+
E(q)=\langle e_1, e_2, \dots, e_m \rangle
|
|
1517
|
+
$$
|
|
1518
|
+
|
|
1519
|
+
where each $e_j$ is a short noun-phrase span extracted from $q$ by a
|
|
1520
|
+
lightweight span extractor: named entities plus the main noun phrase preceding
|
|
1521
|
+
and following any detected temporal-pattern word. The extractor returns at
|
|
1522
|
+
most $m_{\max}=4$ slots to bound cost.
|
|
1523
|
+
|
|
1524
|
+
When $|E(q)|=0$, all coverage terms evaluate to zero and the formula degrades
|
|
1525
|
+
cleanly.
|
|
1526
|
+
|
|
1527
|
+
#### 9.4.2 Per-Slot Coverage Indicator
|
|
1528
|
+
|
|
1529
|
+
For each slot $e_j$ and candidate document $d$, define the binary slot-match
|
|
1530
|
+
indicator:
|
|
1531
|
+
|
|
1532
|
+
$$
|
|
1533
|
+
\phi_j(d)
|
|
1534
|
+
=
|
|
1535
|
+
\mathbf{1}\!\left[\varphi(e_j)^\top \varphi(d) \ge \theta_e\right]
|
|
1536
|
+
\in \{0,1\}
|
|
1537
|
+
$$
|
|
1538
|
+
|
|
1539
|
+
where $\varphi(\cdot)$ is the same unit-normalized embedding function defined
|
|
1540
|
+
in Section 7.1, and $\theta_e \in [-1,1]$ is the slot-match similarity
|
|
1541
|
+
threshold, default $\theta_e = 0.50$.
|
|
1542
|
+
|
|
1543
|
+
#### 9.4.3 Marginal Coverage
|
|
1544
|
+
|
|
1545
|
+
For a set $\mathcal{S}$ of already-selected documents, define the marginal
|
|
1546
|
+
coverage of adding $d$:
|
|
1547
|
+
|
|
1548
|
+
$$
|
|
1549
|
+
\Delta\Phi(d, \mathcal{S}, q)
|
|
1550
|
+
=
|
|
1551
|
+
\frac{1}{\max(|E(q)|, 1)}
|
|
1552
|
+
\sum_{j=1}^{|E(q)|}
|
|
1553
|
+
\phi_j(d)
|
|
1554
|
+
\cdot
|
|
1555
|
+
\mathbf{1}\!\left[\nexists d' \in \mathcal{S} : \phi_j(d') = 1\right]
|
|
1556
|
+
$$
|
|
1557
|
+
|
|
1558
|
+
This is the fraction of uncovered event slots that $d$ newly covers.
|
|
1559
|
+
|
|
1560
|
+
The outer factor is in $(0,1]$, the sum counts at most $|E(q)|$ binary terms,
|
|
1561
|
+
and therefore:
|
|
1562
|
+
|
|
1563
|
+
$$
|
|
1564
|
+
\Delta\Phi(d, \mathcal{S}, q)\in[0,1]
|
|
1565
|
+
$$
|
|
1566
|
+
|
|
1567
|
+
The indicator
|
|
1568
|
+
$\mathbf{1}\!\left[\nexists d' \in \mathcal{S} : \phi_j(d') = 1\right]$
|
|
1569
|
+
ensures that slots already covered by a previously selected document
|
|
1570
|
+
contribute zero marginal gain, automatically penalizing redundant anchor turns
|
|
1571
|
+
without a separate explicit penalty term.
|
|
1572
|
+
|
|
1573
|
+
As $|\mathcal{S}|$ grows, $\Delta\Phi(d,\mathcal{S},q)$ is monotone
|
|
1574
|
+
non-increasing: new selections can only cover more slots, leaving fewer
|
|
1575
|
+
uncovered slots for later candidates to gain credit for.
|
|
1576
|
+
|
|
1577
|
+
### 9.5 Coverage-Augmented Blended Score
|
|
1578
|
+
$S_{\mathrm{proj}}(d,\mathcal{S},q)\in[0,1]$
|
|
1579
|
+
|
|
1580
|
+
Define the coverage-augmented score for candidate $d$ given already-selected
|
|
1581
|
+
set $\mathcal{S}$ and query $q$:
|
|
1582
|
+
|
|
1583
|
+
$$
|
|
1584
|
+
S_{\mathrm{cov}}(d, \mathcal{S}, q)
|
|
1585
|
+
=
|
|
1586
|
+
\mu \cdot S_{\mathrm{final}}(d)
|
|
1587
|
+
+ \nu \cdot A(d)
|
|
1588
|
+
+ \rho \cdot \Delta\Phi(d, \mathcal{S}, q)
|
|
1589
|
+
$$
|
|
1590
|
+
|
|
1591
|
+
where:
|
|
1592
|
+
|
|
1593
|
+
$$
|
|
1594
|
+
\mu,\nu,\rho\in[0,1],
|
|
1595
|
+
\qquad
|
|
1596
|
+
\mu+\nu+\rho=1
|
|
1597
|
+
$$
|
|
1598
|
+
|
|
1599
|
+
The default shipped weights are $\mu=0.60$, $\nu=0.20$, and $\rho=0.20$.
|
|
1600
|
+
|
|
1601
|
+
Blend this with the standard score using $\xi(q)$ as an interpolation scalar:
|
|
1602
|
+
|
|
1603
|
+
$$
|
|
1604
|
+
S_{\mathrm{proj}}(d, \mathcal{S}, q)
|
|
1605
|
+
=
|
|
1606
|
+
(1 - \xi(q)) \cdot S_{\mathrm{final}}(d)
|
|
1607
|
+
+ \xi(q) \cdot S_{\mathrm{cov}}(d, \mathcal{S}, q)
|
|
1608
|
+
$$
|
|
1609
|
+
|
|
1610
|
+
Substituting $S_{\mathrm{cov}}$ yields:
|
|
1611
|
+
|
|
1612
|
+
$$
|
|
1613
|
+
S_{\mathrm{proj}}
|
|
1614
|
+
=
|
|
1615
|
+
\bigl(1 - \xi(1-\mu)\bigr)\cdot S_{\mathrm{final}}
|
|
1616
|
+
+ \xi\nu \cdot A
|
|
1617
|
+
+ \xi\rho \cdot \Delta\Phi
|
|
1618
|
+
$$
|
|
1619
|
+
|
|
1620
|
+
All coefficients are non-negative, and they sum to one:
|
|
1621
|
+
|
|
1622
|
+
$$
|
|
1623
|
+
\bigl(1 - \xi(1-\mu)\bigr) + \xi\nu + \xi\rho
|
|
1624
|
+
=
|
|
1625
|
+
1 - \xi + \xi\mu + \xi\nu + \xi\rho
|
|
1626
|
+
=
|
|
1627
|
+
1 - \xi + \xi(\mu+\nu+\rho)
|
|
1628
|
+
=
|
|
1629
|
+
1
|
|
1630
|
+
$$
|
|
1631
|
+
|
|
1632
|
+
Because $S_{\mathrm{final}}(d)$, $A(d)$, and
|
|
1633
|
+
$\Delta\Phi(d,\mathcal{S},q)$ all lie in $[0,1]$, this is a proper convex
|
|
1634
|
+
combination, so:
|
|
1635
|
+
|
|
1636
|
+
$$
|
|
1637
|
+
S_{\mathrm{proj}}(d,\mathcal{S},q)\in[0,1]
|
|
1638
|
+
$$
|
|
1639
|
+
|
|
1640
|
+
Degeneracy cases:
|
|
1641
|
+
|
|
1642
|
+
| Condition | Behavior |
|
|
1643
|
+
| --- | --- |
|
|
1644
|
+
| $\xi(q)=0$ | $S_{\mathrm{proj}} = S_{\mathrm{final}}(d)$; standard retrieval unchanged |
|
|
1645
|
+
| $\xi(q)=1$, $\nu=\rho=0$, $\mu=1$ | Explicit no-op configuration; still $S_{\mathrm{proj}} = S_{\mathrm{final}}(d)$ |
|
|
1646
|
+
| $|E(q)|=0$ | $\Delta\Phi=0$ for all $d$; the $\rho$ term vanishes |
|
|
1647
|
+
| $\mathcal{S}=\emptyset$ | $\Delta\Phi$ equals full slot-coverage fraction |
|
|
1648
|
+
| all slots already covered by $\mathcal{S}$ | $\Delta\Phi=0$ for all remaining $d$ |
|
|
1649
|
+
|
|
1650
|
+
Note: the greedy selector below optimizes a submodular coverage term
|
|
1651
|
+
$\Delta\Phi$ augmented with fixed document priors $S_{\mathrm{final}}(d)$ and
|
|
1652
|
+
$A(d)$. The classic $(1-1/e)$ approximation guarantee applies strictly to the
|
|
1653
|
+
coverage component; in practice the blended score preserves greedy usefulness
|
|
1654
|
+
for temporal anchor selection.
|
|
1655
|
+
|
|
1656
|
+
### 9.6 Temporal Recovery Candidate Set
|
|
1657
|
+
$\mathcal{C}_{\mathrm{rec}}(q)$
|
|
1658
|
+
|
|
1659
|
+
The root cause of the observed benchmark failure is not only that documents are
|
|
1660
|
+
scored incorrectly; it is also that the necessary complementary anchor turn may
|
|
1661
|
+
never enter $\mathcal{C}_2(q)$ because its semantic similarity to the
|
|
1662
|
+
whole-query embedding is too low.
|
|
1663
|
+
|
|
1664
|
+
A bounded recovery pass admits anchor-rich documents below the normal Pass-1
|
|
1665
|
+
threshold:
|
|
1666
|
+
|
|
1667
|
+
$$
|
|
1668
|
+
\mathcal{C}_{\mathrm{rec}}(q)
|
|
1669
|
+
=
|
|
1670
|
+
\mathrm{TopK}_{d \in
|
|
1671
|
+
\left\{d' \in \mathcal{V}_{\mathrm{rest}} :
|
|
1672
|
+
\mathrm{sim}(q,d') \ge \theta_{\mathrm{rec}}\right\}}
|
|
1673
|
+
\left(k_{\mathrm{rec}},\, A(d)\right)
|
|
1674
|
+
\setminus \mathcal{C}_2(q)
|
|
1675
|
+
$$
|
|
1676
|
+
|
|
1677
|
+
where:
|
|
1678
|
+
|
|
1679
|
+
- $\theta_{\mathrm{rec}} < \theta_1$ is a looser semantic floor, default
|
|
1680
|
+
$\theta_{\mathrm{rec}} = 0.15$, preventing pure noise while still admitting
|
|
1681
|
+
anchor-heavy but semantically distant turns.
|
|
1682
|
+
- $k_{\mathrm{rec}}$ is a small cap, default $k_{\mathrm{rec}} = 10$, bounding
|
|
1683
|
+
recovery cost to $O(k_{\mathrm{rec}})$.
|
|
1684
|
+
|
|
1685
|
+
The combined candidate pool for the greedy selector is:
|
|
1686
|
+
|
|
1687
|
+
$$
|
|
1688
|
+
\mathcal{C}_{\mathrm{pool}}(q)
|
|
1689
|
+
=
|
|
1690
|
+
\mathcal{C}_2(q)\cup\mathcal{C}_{\mathrm{rec}}(q)
|
|
1691
|
+
$$
|
|
1692
|
+
|
|
1693
|
+
By construction,
|
|
1694
|
+
$\mathcal{C}_{\mathrm{pool}}(q)\subseteq\mathcal{V}_{\mathrm{rest}}$, so
|
|
1695
|
+
partition integrity is preserved.
|
|
1696
|
+
|
|
1697
|
+
### 9.7 Greedy Coverage-Aware Selector
|
|
1698
|
+
|
|
1699
|
+
Given $\mathcal{C}_{\mathrm{pool}}(q)$, the selector builds the final chosen
|
|
1700
|
+
set greedily, using the same rank-then-prefix-accept spirit as the existing
|
|
1701
|
+
token-budget packing in Section 7.8.
|
|
1702
|
+
|
|
1703
|
+
Let $k_{\mathrm{cov}}\le k_2$ be the maximum number of anchor turns to select,
|
|
1704
|
+
default $k_{\mathrm{cov}}=3$.
|
|
1705
|
+
|
|
1706
|
+
Initialize:
|
|
1707
|
+
|
|
1708
|
+
$$
|
|
1709
|
+
\mathcal{S}_0 = \emptyset
|
|
1710
|
+
$$
|
|
1711
|
+
|
|
1712
|
+
For $i = 0, 1, \dots, k_{\mathrm{cov}}-1$:
|
|
1713
|
+
|
|
1714
|
+
$$
|
|
1715
|
+
d_i^*
|
|
1716
|
+
=
|
|
1717
|
+
\underset{d \in \mathcal{C}_{\mathrm{pool}}(q)\setminus\mathcal{S}_i}{\arg\max}
|
|
1718
|
+
\;
|
|
1719
|
+
S_{\mathrm{proj}}(d, \mathcal{S}_i, q)
|
|
1720
|
+
$$
|
|
1721
|
+
|
|
1722
|
+
Early stop if:
|
|
1723
|
+
|
|
1724
|
+
$$
|
|
1725
|
+
S_{\mathrm{proj}}(d_i^*, \mathcal{S}_i, q) < \theta_{\mathrm{stop}}
|
|
1726
|
+
$$
|
|
1727
|
+
|
|
1728
|
+
with default $\theta_{\mathrm{stop}}=0.10$. Otherwise:
|
|
1729
|
+
|
|
1730
|
+
$$
|
|
1731
|
+
\mathcal{S}_{i+1} = \mathcal{S}_i \cup \{d_i^*\}
|
|
1732
|
+
$$
|
|
1733
|
+
|
|
1734
|
+
The final selected set is $\mathcal{S}^*(q)$, or the earlier set at which
|
|
1735
|
+
early stopping triggered.
|
|
1736
|
+
|
|
1737
|
+
Each greedy step scans at most
|
|
1738
|
+
$|\mathcal{C}_{\mathrm{pool}}(q)| \le k_2 + k_{\mathrm{rec}}$ candidates.
|
|
1739
|
+
Total complexity is therefore:
|
|
1740
|
+
|
|
1741
|
+
$$
|
|
1742
|
+
O\!\left(k_{\mathrm{cov}} \cdot (k_2 + k_{\mathrm{rec}})\right)
|
|
1743
|
+
$$
|
|
1744
|
+
|
|
1745
|
+
which is negligible relative to embedding and vector-search cost.
|
|
1746
|
+
|
|
1747
|
+
### 9.8 Modified Projection Operator
|
|
1748
|
+
|
|
1749
|
+
The temporal extension redefines $\mathrm{Proj}$ conditionally:
|
|
1750
|
+
|
|
1751
|
+
$$
|
|
1752
|
+
\mathrm{Proj}(\mathcal{V}_{\mathrm{rest}}, q)
|
|
1753
|
+
=
|
|
1754
|
+
\begin{cases}
|
|
1755
|
+
\mathcal{S}^*(q)\cup\mathcal{C}_{hop}^{*}(q)
|
|
1756
|
+
& \text{if } \xi(q) \ge \theta_\xi \\[4pt]
|
|
1757
|
+
\mathcal{C}_2(q)\cup\mathcal{C}_{hop}^{*}(q)
|
|
1758
|
+
& \text{otherwise}
|
|
1759
|
+
\end{cases}
|
|
1760
|
+
$$
|
|
1761
|
+
|
|
1762
|
+
The assembly law and budget equations remain unchanged:
|
|
1763
|
+
|
|
1764
|
+
$$
|
|
1765
|
+
C_{\mathrm{total}}(q)=\mathcal{I}_1\cup\mathcal{I}_2^{*}\cup T_{\mathrm{recent}}\cup \mathrm{Proj}(\mathcal{V}_{\mathrm{rest}}, q)
|
|
1766
|
+
$$
|
|
1767
|
+
|
|
1768
|
+
$$
|
|
1769
|
+
\tau_{\mathcal{V}}(q)
|
|
1770
|
+
=
|
|
1771
|
+
\tau-\tau_{\mathcal{I}_1}
|
|
1772
|
+
-\sum_{d\in\mathcal{I}_2^{*}}\mathrm{toks}(d)
|
|
1773
|
+
-\sum_{d\in T_{\mathrm{recent}}}\mathrm{toks}(d)
|
|
1774
|
+
$$
|
|
1775
|
+
|
|
1776
|
+
Documents in $\mathrm{Proj}(\mathcal{V}_{\mathrm{rest}}, q)$ are injected in
|
|
1777
|
+
descending $\sigma(d)$ order until $\tau_{\mathcal{V}}(q)$ is exhausted.
|
|
1778
|
+
|
|
1779
|
+
For documents entering through the temporal selector, the merged score sequence
|
|
1780
|
+
is extended:
|
|
1781
|
+
|
|
1782
|
+
$$
|
|
1783
|
+
\sigma(d)=
|
|
1784
|
+
\begin{cases}
|
|
1785
|
+
S_{\mathrm{proj}}(d, \mathcal{S}^*\setminus\{d\}, q)
|
|
1786
|
+
& d\in\mathcal{S}^*(q) \\
|
|
1787
|
+
S_{hop}(d)
|
|
1788
|
+
& d\in\mathcal{C}_{hop}^{*}(q)
|
|
1789
|
+
\end{cases}
|
|
1790
|
+
$$
|
|
1791
|
+
|
|
1792
|
+
For documents that were already present in $\mathcal{C}_2(q)$, the standard
|
|
1793
|
+
$S_{\mathrm{final}}(d)$ path remains authoritative and duplicates are excluded
|
|
1794
|
+
by construction.
|
|
1795
|
+
|
|
1796
|
+
### 9.9 Preservation of Section 7.10 Runtime Invariants
|
|
1797
|
+
|
|
1798
|
+
All runtime invariants from Section 7.10 remain preserved:
|
|
1799
|
+
|
|
1800
|
+
1. Invariant completeness is unaffected because $\mathcal{I}_1$ injection is
|
|
1801
|
+
independent of $\mathrm{Proj}$.
|
|
1802
|
+
2. Soft invariant order preservation is unaffected because
|
|
1803
|
+
$\mathcal{I}_2^{*}$ is unchanged.
|
|
1804
|
+
3. Partition integrity is preserved because
|
|
1805
|
+
$\mathcal{C}_{\mathrm{rec}}\subseteq\mathcal{V}_{\mathrm{rest}}$ and
|
|
1806
|
+
$\mathcal{S}^*\subseteq\mathcal{C}_{\mathrm{pool}}
|
|
1807
|
+
\subseteq\mathcal{V}_{\mathrm{rest}}$.
|
|
1808
|
+
4. Mandatory recent-tail completeness is unaffected because
|
|
1809
|
+
$T_{\mathrm{base}}\subseteq T_{\mathrm{recent}}$ remains independent of
|
|
1810
|
+
$\mathrm{Proj}$.
|
|
1811
|
+
5. Score boundedness is preserved because
|
|
1812
|
+
$S_{\mathrm{proj}}(d,\mathcal{S},q)\in[0,1]$.
|
|
1813
|
+
6. Token budget respect is preserved because the result still flows through the
|
|
1814
|
+
same residual variant budget and greedy token packing contract.
|
|
1815
|
+
7. Compaction boundary safety is preserved because
|
|
1816
|
+
$\mathcal{S}^*\subseteq\mathcal{V}_{\mathrm{rest}}$.
|
|
1817
|
+
8. Hop termination is unchanged because $\mathcal{C}_{hop}^{*}(q)$ is defined
|
|
1818
|
+
identically.
|
|
1819
|
+
9. Edge-case safety is preserved by the guards below.
|
|
1820
|
+
|
|
1821
|
+
Edge-case additions:
|
|
1822
|
+
|
|
1823
|
+
- $\mathcal{C}_{\mathrm{pool}}(q)=\emptyset$: the greedy selector returns
|
|
1824
|
+
$\mathcal{S}^*=\emptyset$ and $\mathrm{Proj}$ reduces to
|
|
1825
|
+
$\mathcal{C}_{hop}^{*}(q)$ only.
|
|
1826
|
+
- $|E(q)|=0$: the denominator in $\Delta\Phi$ uses $\max(|E(q)|,1)$, so no
|
|
1827
|
+
division by zero is possible.
|
|
1828
|
+
- $\xi(q)<\theta_\xi$: the conditional routes directly to the existing
|
|
1829
|
+
$\mathcal{C}_2(q)\cup\mathcal{C}_{hop}^{*}(q)$ behavior.
|
|
1830
|
+
- $\tau_{\mathcal{V}}(q)=0$: the selector may compute $\mathcal{S}^*$, but
|
|
1831
|
+
packing injects zero documents and the budget invariant still holds.
|
|
1832
|
+
|
|
1833
|
+
### 9.10 Symbol Table (Section 9 Additions)
|
|
1834
|
+
|
|
1835
|
+
| Symbol | Domain | Meaning |
|
|
1836
|
+
| --- | --- | --- |
|
|
1837
|
+
| $\xi(q)$ | $[0,1]$ | Temporal-compositional query indicator |
|
|
1838
|
+
| $\theta_\xi$ | $(0,1)$ | Activation threshold for temporal mode |
|
|
1839
|
+
| $\theta_{\xi}^{\mathrm{norm}}$ | $(0,\infty)$ | Saturation normalization for $\xi$ |
|
|
1840
|
+
| $A(d)$ | $[0,1]$ | Temporal anchor density of document $d$ |
|
|
1841
|
+
| $\theta_A^{\mathrm{norm}}$ | $(0,\infty)$ | Saturation normalization for $A$ |
|
|
1842
|
+
| $E(q)$ | ordered tuple set | Event-slot sequence extracted from $q$ |
|
|
1843
|
+
| $\phi_j(d)$ | $\{0,1\}$ | Binary slot-match indicator |
|
|
1844
|
+
| $\theta_e$ | $[-1,1]$ | Slot-match similarity threshold |
|
|
1845
|
+
| $\Delta\Phi(d,\mathcal{S},q)$ | $[0,1]$ | Marginal event-slot coverage |
|
|
1846
|
+
| $\mu,\nu,\rho$ | $[0,1]$, sum to 1 | Coverage score weights |
|
|
1847
|
+
| $S_{\mathrm{cov}}(d,\mathcal{S},q)$ | $[0,1]$ | Coverage-augmented score |
|
|
1848
|
+
| $S_{\mathrm{proj}}(d,\mathcal{S},q)$ | $[0,1]$ | Final blended projection score |
|
|
1849
|
+
| $\mathcal{C}_{\mathrm{rec}}(q)$ | $\subseteq\mathcal{V}_{\mathrm{rest}}$ | Recovery candidate set |
|
|
1850
|
+
| $\theta_{\mathrm{rec}}$ | $[-1,1]$ | Semantic floor for recovery pass |
|
|
1851
|
+
| $k_{\mathrm{rec}}$ | $\mathbb{Z}_{>0}$ | Recovery set size cap |
|
|
1852
|
+
| $\mathcal{C}_{\mathrm{pool}}(q)$ | $\subseteq\mathcal{V}_{\mathrm{rest}}$ | Combined greedy input pool |
|
|
1853
|
+
| $k_{\mathrm{cov}}$ | $\mathbb{Z}_{>0}, \le k_2$ | Maximum anchor turns to select |
|
|
1854
|
+
| $\theta_{\mathrm{stop}}$ | $[0,1]$ | Early-stop floor for greedy selector |
|
|
1855
|
+
| $\mathcal{S}^*(q)$ | $\subseteq\mathcal{C}_{\mathrm{pool}}$ | Greedy-selected coverage-aware anchor set |
|
|
1856
|
+
|
|
1857
|
+
### 9.11 Relationship to Existing Sections
|
|
1858
|
+
|
|
1859
|
+
This section is an extension, not a replacement:
|
|
1860
|
+
|
|
1861
|
+
- Section 1 hybrid score $\mathrm{score}(d)$ is unchanged and still feeds
|
|
1862
|
+
$S_{\mathrm{final}}(d)$ as before.
|
|
1863
|
+
- Section 7.5 $S_{\mathrm{final}}(d)$ is the first input to
|
|
1864
|
+
$S_{\mathrm{proj}}$; when $\xi(q)=0$, the two are identical.
|
|
1865
|
+
- Section 7.7 hop expansion $\mathcal{C}_{hop}^{*}$ is unchanged and is
|
|
1866
|
+
unioned with $\mathcal{S}^*$ exactly as before.
|
|
1867
|
+
- Section 7.8 budget arithmetic is unchanged; $\mathrm{Proj}$ is still bounded
|
|
1868
|
+
by $\tau_{\mathcal{V}}(q)$ and still greedy-packed.
|
|
1869
|
+
- [`gating.md`](./gating.md) inspired the saturating-sum pattern for $\xi(q)$,
|
|
1870
|
+
but the two operate on different objects and at different pipeline stages.
|
|
1871
|
+
- [`ast-v2.md`](./ast-v2.md) Section 7's document-addressed cache $\Psi$ should
|
|
1872
|
+
be extended to store the precomputed $A(d)$ value alongside existing tier and
|
|
1873
|
+
budget metadata.
|
package/package.json
CHANGED
package/src/context-engine.ts
CHANGED
|
@@ -12,7 +12,13 @@ import {
|
|
|
12
12
|
rankSection7VariantCandidates,
|
|
13
13
|
} from "./scoring.js";
|
|
14
14
|
import { buildInjectedMemoryMessageContent, buildMemoryHeader, recentIds } from "./recall-utils.js";
|
|
15
|
-
import {
|
|
15
|
+
import {
|
|
16
|
+
decideTemporalSelectorGuard,
|
|
17
|
+
detectTemporalQuerySignal,
|
|
18
|
+
rankTemporalRecoveryCandidates,
|
|
19
|
+
} from "./temporal.js";
|
|
20
|
+
import type { TemporalRecoveryRankingResult } from "./temporal.js";
|
|
21
|
+
import { countTokens, estimateTokens, fitPromptBudget, fitPromptBudgetFirstFit } from "./tokens.js";
|
|
16
22
|
import type { RpcGetter } from "./plugin-runtime.js";
|
|
17
23
|
import type {
|
|
18
24
|
ContextAssembleArgs,
|
|
@@ -57,6 +63,7 @@ export function buildContextEngineFactory(
|
|
|
57
63
|
}
|
|
58
64
|
|
|
59
65
|
return {
|
|
66
|
+
info: { id: "libravdb-memory" },
|
|
60
67
|
ownsCompaction: true,
|
|
61
68
|
async bootstrap({ sessionId, userId }: ContextBootstrapArgs) {
|
|
62
69
|
const rpc = await getRpc();
|
|
@@ -190,6 +197,8 @@ export function buildContextEngineFactory(
|
|
|
190
197
|
systemPromptAddition: "",
|
|
191
198
|
} satisfies ContextAssembleResult;
|
|
192
199
|
}
|
|
200
|
+
const temporalQuery = detectTemporalQuerySignal(queryText);
|
|
201
|
+
const temporalSelectorGuard = decideTemporalSelectorGuard(queryText, temporalQuery);
|
|
193
202
|
|
|
194
203
|
const excluded = recentIds(messages, 4);
|
|
195
204
|
const cached = recallCache.take({ userId, queryText });
|
|
@@ -253,6 +262,8 @@ export function buildContextEngineFactory(
|
|
|
253
262
|
cached,
|
|
254
263
|
excluded,
|
|
255
264
|
queryText,
|
|
265
|
+
temporalQuery,
|
|
266
|
+
temporalSelectorGuard,
|
|
256
267
|
sessionId,
|
|
257
268
|
userId,
|
|
258
269
|
messages,
|
|
@@ -287,6 +298,8 @@ export function buildContextEngineFactory(
|
|
|
287
298
|
cached,
|
|
288
299
|
excluded,
|
|
289
300
|
queryText,
|
|
301
|
+
temporalQuery,
|
|
302
|
+
temporalSelectorGuard,
|
|
290
303
|
sessionId,
|
|
291
304
|
userId,
|
|
292
305
|
messages,
|
|
@@ -303,6 +316,8 @@ export function buildContextEngineFactory(
|
|
|
303
316
|
cached: ReturnType<RecallCache<SearchResult>["take"]>;
|
|
304
317
|
excluded: string[];
|
|
305
318
|
queryText: string;
|
|
319
|
+
temporalQuery: ReturnType<typeof detectTemporalQuerySignal>;
|
|
320
|
+
temporalSelectorGuard: ReturnType<typeof decideTemporalSelectorGuard>;
|
|
306
321
|
sessionId: string;
|
|
307
322
|
userId: string;
|
|
308
323
|
messages: Array<{ role: string; content: string }>;
|
|
@@ -562,6 +577,7 @@ export function buildContextEngineFactory(
|
|
|
562
577
|
// it never modifies the C_total(q) output and does not spend from tau_V.
|
|
563
578
|
let recoveryItems: SearchResult[] = [];
|
|
564
579
|
let rawUserRecoveryDebug: NonNullable<NonNullable<ContextAssembleResult["_debug"]>["rawUserRecoveryCandidates"]> = [];
|
|
580
|
+
let temporalRecoveryResult: TemporalRecoveryRankingResult | null = null;
|
|
565
581
|
if (recoveryTrigger.fire || crossSessionRawRecovery) {
|
|
566
582
|
profiler?.mark("recovery_expand");
|
|
567
583
|
const recoveryExcludeIDs = [...excluded, ...recentTailIDs, ...theoremSelectedIDs];
|
|
@@ -599,14 +615,44 @@ export function buildContextEngineFactory(
|
|
|
599
615
|
k: Math.max((cfg.topK ?? 8) * 4, 8),
|
|
600
616
|
excludeIds: recoveryExcludeIDs,
|
|
601
617
|
});
|
|
602
|
-
const
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
618
|
+
const annotatedUserResults = annotateCollection(rawUserResults.results ?? [], `turns:${userId}`);
|
|
619
|
+
temporalRecoveryResult = temporalSelectorGuard.shouldApply
|
|
620
|
+
? rankTemporalRecoveryCandidates(annotatedUserResults, {
|
|
621
|
+
queryText,
|
|
622
|
+
maxSelected: 3,
|
|
623
|
+
nowMs: Date.now(),
|
|
624
|
+
recencyLambda: cfg.recencyLambdaUser ?? 0.00001,
|
|
625
|
+
})
|
|
626
|
+
: null;
|
|
627
|
+
const reranked = temporalRecoveryResult
|
|
628
|
+
? temporalRecoveryResult
|
|
629
|
+
: rankRawUserRecoveryCandidates(annotatedUserResults, { queryText });
|
|
606
630
|
if (debugRecovery) {
|
|
607
631
|
rawUserRecoveryDebug = reranked.debug.slice(0, 8).map((item) => ({
|
|
608
|
-
|
|
632
|
+
id: item.id,
|
|
633
|
+
text: item.text,
|
|
609
634
|
selected: false,
|
|
635
|
+
tokenEstimate: estimateTokens(item.text),
|
|
636
|
+
temporalAnchorDensity: "temporalAnchorDensity" in item && typeof item.temporalAnchorDensity === "number"
|
|
637
|
+
? item.temporalAnchorDensity
|
|
638
|
+
: 0,
|
|
639
|
+
semanticScore: "semanticScore" in item && typeof item.semanticScore === "number"
|
|
640
|
+
? item.semanticScore
|
|
641
|
+
: 0,
|
|
642
|
+
slotCoverage: "slotCoverage" in item && typeof item.slotCoverage === "number"
|
|
643
|
+
? item.slotCoverage
|
|
644
|
+
: undefined,
|
|
645
|
+
slotMatches: "slotMatches" in item && Array.isArray(item.slotMatches)
|
|
646
|
+
? item.slotMatches
|
|
647
|
+
: undefined,
|
|
648
|
+
lexicalCoverage: "lexicalCoverage" in item && typeof item.lexicalCoverage === "number"
|
|
649
|
+
? item.lexicalCoverage
|
|
650
|
+
: ("slotCoverage" in item && typeof item.slotCoverage === "number" ? item.slotCoverage : 0),
|
|
651
|
+
recencyScore: "recencyScore" in item && typeof item.recencyScore === "number"
|
|
652
|
+
? item.recencyScore
|
|
653
|
+
: 0,
|
|
654
|
+
finalScore: typeof item.finalScore === "number" ? item.finalScore : 0,
|
|
655
|
+
rationale: typeof item.rationale === "string" ? item.rationale : "",
|
|
610
656
|
}));
|
|
611
657
|
}
|
|
612
658
|
recoveryCandidates.push(
|
|
@@ -622,7 +668,7 @@ export function buildContextEngineFactory(
|
|
|
622
668
|
);
|
|
623
669
|
}
|
|
624
670
|
|
|
625
|
-
const fittedRecovery =
|
|
671
|
+
const fittedRecovery = fitPromptBudgetFirstFit(
|
|
626
672
|
dedupeRecoveryCandidates(recoveryCandidates),
|
|
627
673
|
recoveryReserveTokens,
|
|
628
674
|
);
|
|
@@ -667,6 +713,13 @@ export function buildContextEngineFactory(
|
|
|
667
713
|
? {
|
|
668
714
|
recoveryTriggerFired: recoveryTrigger.fire,
|
|
669
715
|
crossSessionRawRecovery,
|
|
716
|
+
recoveryReserveTokens,
|
|
717
|
+
temporalQueryIndicator: temporalQuery.indicator,
|
|
718
|
+
temporalQueryActive: temporalQuery.active,
|
|
719
|
+
temporalQueryPatterns: temporalQuery.matchedPatterns,
|
|
720
|
+
temporalSelectorApplied: temporalSelectorGuard.shouldApply,
|
|
721
|
+
temporalSelectorReason: temporalSelectorGuard.reason,
|
|
722
|
+
temporalRecoverySlots: temporalRecoveryResult?.slots,
|
|
670
723
|
rawUserRecoveryCandidates: rawUserRecoveryDebug,
|
|
671
724
|
}
|
|
672
725
|
: undefined,
|
package/src/scoring.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import type { SearchResult } from "./types.js";
|
|
2
|
+
import { getTemporalAnchorDensity } from "./temporal.js";
|
|
2
3
|
|
|
3
4
|
interface HybridOptions {
|
|
4
5
|
alpha?: number;
|
|
@@ -41,6 +42,7 @@ interface RawUserRecoveryOptions {
|
|
|
41
42
|
export interface RawUserRecoveryDebugCandidate {
|
|
42
43
|
id: string;
|
|
43
44
|
text: string;
|
|
45
|
+
temporalAnchorDensity: number;
|
|
44
46
|
semanticScore: number;
|
|
45
47
|
lexicalCoverage: number;
|
|
46
48
|
recencyScore: number;
|
|
@@ -319,17 +321,29 @@ export function rankRawUserRecoveryCandidates(
|
|
|
319
321
|
const now = opts.nowMs ?? Date.now();
|
|
320
322
|
const recencyLambda = Math.max(0, opts.recencyLambda ?? 0.00001);
|
|
321
323
|
const keywords = extractKeywords(opts.queryText);
|
|
324
|
+
const intentPhrases = extractIntentPhrases(opts.queryText);
|
|
322
325
|
|
|
323
326
|
const ranked = items
|
|
324
327
|
.map((item) => {
|
|
325
328
|
const semanticScore = clamp01(typeof item.score === "number" ? item.score : 0);
|
|
326
329
|
const lexicalCoverage = normalizedKeywordCoverage(keywords, item.text);
|
|
327
330
|
const recencyScore = computeRecencyScore(item, now, recencyLambda);
|
|
328
|
-
const
|
|
331
|
+
const temporalAnchorDensity = getTemporalAnchorDensity(
|
|
332
|
+
`${typeof item.metadata.collection === "string" ? item.metadata.collection : "unknown"}::${item.id}`,
|
|
333
|
+
item.text,
|
|
334
|
+
);
|
|
335
|
+
const intentAlignmentBonus = computeIntentAlignmentBonus(item.text, intentPhrases);
|
|
336
|
+
const finalScore = clamp01(
|
|
337
|
+
(0.30 * semanticScore) +
|
|
338
|
+
(0.60 * lexicalCoverage) +
|
|
339
|
+
(0.10 * recencyScore) +
|
|
340
|
+
intentAlignmentBonus,
|
|
341
|
+
);
|
|
329
342
|
const rationale = buildRawUserRecoveryRationale({
|
|
330
343
|
semanticScore,
|
|
331
344
|
lexicalCoverage,
|
|
332
345
|
recencyScore,
|
|
346
|
+
intentAlignmentBonus,
|
|
333
347
|
});
|
|
334
348
|
|
|
335
349
|
return {
|
|
@@ -340,6 +354,7 @@ export function rankRawUserRecoveryCandidates(
|
|
|
340
354
|
debug: {
|
|
341
355
|
id: item.id,
|
|
342
356
|
text: item.text,
|
|
357
|
+
temporalAnchorDensity,
|
|
343
358
|
semanticScore,
|
|
344
359
|
lexicalCoverage,
|
|
345
360
|
recencyScore,
|
|
@@ -473,7 +488,11 @@ function buildRawUserRecoveryRationale(scores: {
|
|
|
473
488
|
semanticScore: number;
|
|
474
489
|
lexicalCoverage: number;
|
|
475
490
|
recencyScore: number;
|
|
491
|
+
intentAlignmentBonus: number;
|
|
476
492
|
}): string {
|
|
493
|
+
if (scores.intentAlignmentBonus >= 0.04) {
|
|
494
|
+
return "intent phrase overlap lifted this candidate toward the query's direct ask";
|
|
495
|
+
}
|
|
477
496
|
const lexicalDelta = scores.lexicalCoverage - scores.semanticScore;
|
|
478
497
|
if (lexicalDelta > 0.15) {
|
|
479
498
|
return "lexical coverage lifted this candidate above its semantic score";
|
|
@@ -487,6 +506,79 @@ function buildRawUserRecoveryRationale(scores: {
|
|
|
487
506
|
return "semantic and lexical scores were balanced";
|
|
488
507
|
}
|
|
489
508
|
|
|
509
|
+
function computeIntentAlignmentBonus(text: string, intentPhrases: string[]): number {
|
|
510
|
+
if (intentPhrases.length === 0) {
|
|
511
|
+
return 0;
|
|
512
|
+
}
|
|
513
|
+
const normalized = normalizeTextForPhraseMatch(text);
|
|
514
|
+
const matched = intentPhrases.filter((phrase) => normalized.includes(phrase)).length;
|
|
515
|
+
if (matched === 0) {
|
|
516
|
+
return 0;
|
|
517
|
+
}
|
|
518
|
+
return Math.min(0.08, matched * 0.02);
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
function extractIntentPhrases(text: string): string[] {
|
|
522
|
+
const terms = normalizeTerms(text).filter((term) => !INTENT_STOPWORDS.has(term));
|
|
523
|
+
const phrases: string[] = [];
|
|
524
|
+
for (let size = 4; size >= 2; size -= 1) {
|
|
525
|
+
for (let i = 0; i <= terms.length - size; i += 1) {
|
|
526
|
+
const phraseTerms = terms.slice(i, i + size);
|
|
527
|
+
if (phraseTerms.some((term) => term.length < 3)) {
|
|
528
|
+
continue;
|
|
529
|
+
}
|
|
530
|
+
const phrase = phraseTerms.join(" ");
|
|
531
|
+
if (!phrases.includes(phrase)) {
|
|
532
|
+
phrases.push(phrase);
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
return phrases.slice(0, 12);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
function normalizeTextForPhraseMatch(text: string): string {
|
|
540
|
+
return normalizeTerms(text).join(" ");
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
const INTENT_STOPWORDS = new Set([
|
|
544
|
+
"the",
|
|
545
|
+
"and",
|
|
546
|
+
"for",
|
|
547
|
+
"with",
|
|
548
|
+
"that",
|
|
549
|
+
"this",
|
|
550
|
+
"have",
|
|
551
|
+
"from",
|
|
552
|
+
"your",
|
|
553
|
+
"what",
|
|
554
|
+
"when",
|
|
555
|
+
"where",
|
|
556
|
+
"which",
|
|
557
|
+
"would",
|
|
558
|
+
"could",
|
|
559
|
+
"should",
|
|
560
|
+
"about",
|
|
561
|
+
"into",
|
|
562
|
+
"some",
|
|
563
|
+
"before",
|
|
564
|
+
"after",
|
|
565
|
+
"them",
|
|
566
|
+
"they",
|
|
567
|
+
"been",
|
|
568
|
+
"just",
|
|
569
|
+
"want",
|
|
570
|
+
"looking",
|
|
571
|
+
"look",
|
|
572
|
+
"help",
|
|
573
|
+
"need",
|
|
574
|
+
"recommend",
|
|
575
|
+
"suggestions",
|
|
576
|
+
"suggest",
|
|
577
|
+
"advice",
|
|
578
|
+
"think",
|
|
579
|
+
"also",
|
|
580
|
+
]);
|
|
581
|
+
|
|
490
582
|
function extractKeywords(text: string): string[] {
|
|
491
583
|
const tokens = normalizeTerms(text);
|
|
492
584
|
const seen = new Set<string>();
|
package/src/sidecar.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
1
2
|
import net from "node:net";
|
|
2
3
|
import os from "node:os";
|
|
3
4
|
import path from "node:path";
|
|
@@ -268,13 +269,42 @@ export function daemonProvisioningHint(): string {
|
|
|
268
269
|
}
|
|
269
270
|
|
|
270
271
|
export function defaultEndpoint(platform = process.platform, homeDir = os.homedir()): string {
|
|
272
|
+
// Honour the daemon's own env var first (set by Homebrew LaunchAgent / systemd unit).
|
|
273
|
+
const envEndpoint = process.env.LIBRAVDB_RPC_ENDPOINT?.trim();
|
|
274
|
+
if (envEndpoint && isConfiguredEndpoint(envEndpoint)) {
|
|
275
|
+
return envEndpoint;
|
|
276
|
+
}
|
|
277
|
+
|
|
271
278
|
if (platform === "win32") {
|
|
272
279
|
return "tcp:127.0.0.1:37421";
|
|
273
280
|
}
|
|
281
|
+
|
|
282
|
+
const sockName = "libravdb.sock";
|
|
283
|
+
const candidateDirs = [
|
|
284
|
+
// User-local (npm plugin convention)
|
|
285
|
+
homeDir?.trim() ? path.join(homeDir, ".clawdb", "run") : null,
|
|
286
|
+
// Homebrew (Apple Silicon) — matches the Homebrew formula LaunchAgent
|
|
287
|
+
"/opt/homebrew/var/clawdb/run",
|
|
288
|
+
// Homebrew (Intel Mac) / manual Linux installs
|
|
289
|
+
"/usr/local/var/clawdb/run",
|
|
290
|
+
].filter((d): d is string => d !== null);
|
|
291
|
+
|
|
292
|
+
for (const dir of candidateDirs) {
|
|
293
|
+
const sockPath = path.join(dir, sockName);
|
|
294
|
+
try {
|
|
295
|
+
if (fs.existsSync(sockPath)) {
|
|
296
|
+
return `unix:${sockPath}`;
|
|
297
|
+
}
|
|
298
|
+
} catch {
|
|
299
|
+
// Permission error or similar — skip this candidate.
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Fallback to the original user-local path so error messages stay familiar.
|
|
274
304
|
const baseDir = homeDir?.trim()
|
|
275
305
|
? path.join(homeDir, ".clawdb", "run")
|
|
276
306
|
: path.join(".", ".clawdb", "run");
|
|
277
|
-
return `unix:${path.join(baseDir,
|
|
307
|
+
return `unix:${path.join(baseDir, sockName)}`;
|
|
278
308
|
}
|
|
279
309
|
|
|
280
310
|
export function buildSidecarEnv(cfg: PluginConfig): Record<string, string> {
|
package/src/temporal.ts
ADDED
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
import type { SearchResult } from "./types.js";
|
|
2
|
+
|
|
3
|
+
const TEMPORAL_PATTERN_WEIGHTS: Array<{ label: string; weight: number; patterns: RegExp[] }> = [
|
|
4
|
+
{
|
|
5
|
+
label: "how many days",
|
|
6
|
+
weight: 1.0,
|
|
7
|
+
patterns: [/\bhow\s+many\s+days\b/i],
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
label: "how long",
|
|
11
|
+
weight: 0.9,
|
|
12
|
+
patterns: [/\bhow\s+long\b/i],
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
label: "before or after",
|
|
16
|
+
weight: 0.8,
|
|
17
|
+
patterns: [/\bbefore\b/i, /\bafter\b/i],
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
label: "since or between",
|
|
21
|
+
weight: 0.7,
|
|
22
|
+
patterns: [/\bsince\b/i, /\bbetween\b/i],
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
label: "first or earlier",
|
|
26
|
+
weight: 0.8,
|
|
27
|
+
patterns: [/\bfirst\b/i, /\bearlier\b/i, /\bwhich\s+came\s+first\b/i],
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
label: "when did",
|
|
31
|
+
weight: 0.7,
|
|
32
|
+
patterns: [/\bwhen\s+did\b/i],
|
|
33
|
+
},
|
|
34
|
+
];
|
|
35
|
+
|
|
36
|
+
const TEMPORAL_ANCHOR_PATTERNS: RegExp[] = [
|
|
37
|
+
/\b\d{4}-\d{2}-\d{2}\b/g,
|
|
38
|
+
/\b\d{1,2}\/\d{1,2}(?:\/\d{2,4})?\b/g,
|
|
39
|
+
/\b(?:jan(?:uary)?|feb(?:ruary)?|mar(?:ch)?|apr(?:il)?|may|jun(?:e)?|jul(?:y)?|aug(?:ust)?|sep(?:t(?:ember)?)?|oct(?:ober)?|nov(?:ember)?|dec(?:ember)?)\s+\d{1,2}(?:st|nd|rd|th)?(?:,\s*\d{4})?\b/gi,
|
|
40
|
+
/\b(?:monday|tuesday|wednesday|thursday|friday|saturday|sunday)\b/gi,
|
|
41
|
+
/\b(?:today|yesterday|tomorrow|last\s+(?:week|month|year|night|saturday|sunday)|next\s+(?:week|month|year|monday|tuesday|wednesday|thursday|friday|saturday|sunday)|mid-?[a-z]+)\b/gi,
|
|
42
|
+
/\b\d{1,2}:\d{2}(?:\s?[ap]m)?\b/gi,
|
|
43
|
+
/\b\d{10,13}\b/g,
|
|
44
|
+
];
|
|
45
|
+
|
|
46
|
+
const TEMPORAL_XI_NORM = 1.5;
|
|
47
|
+
const TEMPORAL_XI_THRESHOLD = 0.3;
|
|
48
|
+
const TEMPORAL_ANCHOR_NORM = 3;
|
|
49
|
+
const TEMPORAL_ANCHOR_CACHE_MAX = 4096;
|
|
50
|
+
const temporalAnchorCache = new Map<string, number>();
|
|
51
|
+
|
|
52
|
+
const TEMPORAL_SLOT_STOPWORDS = new Set([
|
|
53
|
+
"the",
|
|
54
|
+
"and",
|
|
55
|
+
"for",
|
|
56
|
+
"with",
|
|
57
|
+
"that",
|
|
58
|
+
"this",
|
|
59
|
+
"have",
|
|
60
|
+
"from",
|
|
61
|
+
"your",
|
|
62
|
+
"what",
|
|
63
|
+
"when",
|
|
64
|
+
"where",
|
|
65
|
+
"which",
|
|
66
|
+
"would",
|
|
67
|
+
"could",
|
|
68
|
+
"should",
|
|
69
|
+
"about",
|
|
70
|
+
"into",
|
|
71
|
+
"some",
|
|
72
|
+
"them",
|
|
73
|
+
"they",
|
|
74
|
+
"been",
|
|
75
|
+
"just",
|
|
76
|
+
"want",
|
|
77
|
+
"looking",
|
|
78
|
+
"look",
|
|
79
|
+
"help",
|
|
80
|
+
"need",
|
|
81
|
+
"recommend",
|
|
82
|
+
"suggestions",
|
|
83
|
+
"suggest",
|
|
84
|
+
"advice",
|
|
85
|
+
"think",
|
|
86
|
+
"also",
|
|
87
|
+
"did",
|
|
88
|
+
"does",
|
|
89
|
+
"do",
|
|
90
|
+
"after",
|
|
91
|
+
"before",
|
|
92
|
+
"since",
|
|
93
|
+
"between",
|
|
94
|
+
"first",
|
|
95
|
+
"earlier",
|
|
96
|
+
"many",
|
|
97
|
+
"days",
|
|
98
|
+
"long",
|
|
99
|
+
"how",
|
|
100
|
+
"did",
|
|
101
|
+
"take",
|
|
102
|
+
"took",
|
|
103
|
+
"it",
|
|
104
|
+
"me",
|
|
105
|
+
"my",
|
|
106
|
+
"i",
|
|
107
|
+
]);
|
|
108
|
+
|
|
109
|
+
export interface TemporalQuerySignal {
|
|
110
|
+
indicator: number;
|
|
111
|
+
active: boolean;
|
|
112
|
+
matchedPatterns: string[];
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export interface TemporalSelectorGuardDecision {
|
|
116
|
+
shouldApply: boolean;
|
|
117
|
+
slots: string[];
|
|
118
|
+
reason: string;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
export interface TemporalRecoveryDebugCandidate {
|
|
122
|
+
id: string;
|
|
123
|
+
text: string;
|
|
124
|
+
selected: boolean;
|
|
125
|
+
temporalAnchorDensity: number;
|
|
126
|
+
semanticScore: number;
|
|
127
|
+
recencyScore: number;
|
|
128
|
+
slotCoverage: number;
|
|
129
|
+
slotMatches: string[];
|
|
130
|
+
finalScore: number;
|
|
131
|
+
rationale: string;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export interface TemporalRecoveryRankingResult {
|
|
135
|
+
ranked: SearchResult[];
|
|
136
|
+
debug: TemporalRecoveryDebugCandidate[];
|
|
137
|
+
temporalQuery: TemporalQuerySignal;
|
|
138
|
+
slots: string[];
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
export function detectTemporalQuerySignal(queryText: string): TemporalQuerySignal {
|
|
142
|
+
const matchedPatterns: string[] = [];
|
|
143
|
+
let weightedMatches = 0;
|
|
144
|
+
|
|
145
|
+
for (const entry of TEMPORAL_PATTERN_WEIGHTS) {
|
|
146
|
+
if (entry.patterns.some((pattern) => pattern.test(queryText))) {
|
|
147
|
+
matchedPatterns.push(entry.label);
|
|
148
|
+
weightedMatches += entry.weight;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
const indicator = clamp01(weightedMatches / TEMPORAL_XI_NORM);
|
|
153
|
+
return {
|
|
154
|
+
indicator,
|
|
155
|
+
active: indicator >= TEMPORAL_XI_THRESHOLD,
|
|
156
|
+
matchedPatterns,
|
|
157
|
+
};
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
export function getTemporalAnchorDensity(docKey: string, text: string): number {
|
|
161
|
+
const cacheKey = `${docKey}\n${text}`;
|
|
162
|
+
const cached = temporalAnchorCache.get(cacheKey);
|
|
163
|
+
if (typeof cached === "number") {
|
|
164
|
+
touchTemporalAnchorCache(cacheKey, cached);
|
|
165
|
+
return cached;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const uniqueMatches = new Set<string>();
|
|
169
|
+
for (const pattern of TEMPORAL_ANCHOR_PATTERNS) {
|
|
170
|
+
for (const match of text.matchAll(pattern)) {
|
|
171
|
+
const value = match[0]?.trim().toLowerCase();
|
|
172
|
+
if (value) {
|
|
173
|
+
uniqueMatches.add(value);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
const density = clamp01(uniqueMatches.size / TEMPORAL_ANCHOR_NORM);
|
|
179
|
+
touchTemporalAnchorCache(cacheKey, density);
|
|
180
|
+
return density;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
export function rankTemporalRecoveryCandidates(
|
|
184
|
+
items: SearchResult[],
|
|
185
|
+
opts: {
|
|
186
|
+
queryText: string;
|
|
187
|
+
maxSelected?: number;
|
|
188
|
+
nowMs?: number;
|
|
189
|
+
recencyLambda?: number;
|
|
190
|
+
},
|
|
191
|
+
): TemporalRecoveryRankingResult {
|
|
192
|
+
const temporalQuery = detectTemporalQuerySignal(opts.queryText);
|
|
193
|
+
const slots = extractTemporalSlots(opts.queryText);
|
|
194
|
+
const recencyLambda = Math.max(0, opts.recencyLambda ?? 0.00001);
|
|
195
|
+
const now = opts.nowMs ?? Date.now();
|
|
196
|
+
const maxSelected = Math.max(1, Math.floor(opts.maxSelected ?? 3));
|
|
197
|
+
|
|
198
|
+
const decorated = items.map((item) => {
|
|
199
|
+
const semanticScore = clamp01(typeof item.finalScore === "number" ? item.finalScore : item.score ?? 0);
|
|
200
|
+
const recencyScore = computeRecencyScore(item, now, recencyLambda);
|
|
201
|
+
const temporalAnchorDensity = getTemporalAnchorDensity(
|
|
202
|
+
`${typeof item.metadata.collection === "string" ? item.metadata.collection : "unknown"}::${item.id}`,
|
|
203
|
+
item.text,
|
|
204
|
+
);
|
|
205
|
+
const { coverage, matches } = computeSlotCoverage(slots, item.text);
|
|
206
|
+
const finalScore = clamp01(
|
|
207
|
+
(0.40 * semanticScore) +
|
|
208
|
+
(0.25 * recencyScore) +
|
|
209
|
+
(0.20 * temporalAnchorDensity) +
|
|
210
|
+
(0.15 * coverage) +
|
|
211
|
+
(temporalQuery.active ? 0.05 : 0),
|
|
212
|
+
);
|
|
213
|
+
return {
|
|
214
|
+
item,
|
|
215
|
+
semanticScore,
|
|
216
|
+
recencyScore,
|
|
217
|
+
temporalAnchorDensity,
|
|
218
|
+
slotCoverage: coverage,
|
|
219
|
+
slotMatches: matches,
|
|
220
|
+
finalScore,
|
|
221
|
+
};
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
const selectedIDs = new Set<string>();
|
|
225
|
+
const coveredSlots = new Set<string>();
|
|
226
|
+
const selected: SearchResult[] = [];
|
|
227
|
+
|
|
228
|
+
for (let pass = 0; pass < maxSelected; pass += 1) {
|
|
229
|
+
let best: (typeof decorated)[number] | null = null;
|
|
230
|
+
let bestScore = Number.NEGATIVE_INFINITY;
|
|
231
|
+
|
|
232
|
+
for (const candidate of decorated) {
|
|
233
|
+
if (selectedIDs.has(candidate.item.id)) {
|
|
234
|
+
continue;
|
|
235
|
+
}
|
|
236
|
+
const marginalCoverage = candidate.slotMatches.filter((slot) => !coveredSlots.has(slot)).length / Math.max(1, slots.length);
|
|
237
|
+
const combined = candidate.finalScore + (0.25 * marginalCoverage);
|
|
238
|
+
if (combined > bestScore) {
|
|
239
|
+
best = candidate;
|
|
240
|
+
bestScore = combined;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
if (!best || bestScore < 0.12) {
|
|
245
|
+
break;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
selectedIDs.add(best.item.id);
|
|
249
|
+
for (const slot of best.slotMatches) {
|
|
250
|
+
coveredSlots.add(slot);
|
|
251
|
+
}
|
|
252
|
+
selected.push({
|
|
253
|
+
...best.item,
|
|
254
|
+
finalScore: best.finalScore,
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const remaining = decorated
|
|
259
|
+
.filter((candidate) => !selectedIDs.has(candidate.item.id))
|
|
260
|
+
.sort((left, right) => right.finalScore - left.finalScore)
|
|
261
|
+
.map((candidate) => ({
|
|
262
|
+
...candidate.item,
|
|
263
|
+
finalScore: candidate.finalScore,
|
|
264
|
+
}));
|
|
265
|
+
|
|
266
|
+
const ranked = [...selected, ...remaining];
|
|
267
|
+
const debug = decorated
|
|
268
|
+
.sort((left, right) => right.finalScore - left.finalScore)
|
|
269
|
+
.map((candidate) => ({
|
|
270
|
+
id: candidate.item.id,
|
|
271
|
+
text: candidate.item.text,
|
|
272
|
+
selected: selectedIDs.has(candidate.item.id),
|
|
273
|
+
temporalAnchorDensity: candidate.temporalAnchorDensity,
|
|
274
|
+
semanticScore: candidate.semanticScore,
|
|
275
|
+
recencyScore: candidate.recencyScore,
|
|
276
|
+
slotCoverage: candidate.slotCoverage,
|
|
277
|
+
slotMatches: candidate.slotMatches,
|
|
278
|
+
finalScore: candidate.finalScore,
|
|
279
|
+
rationale: buildTemporalRecoveryRationale(candidate.slotCoverage, candidate.temporalAnchorDensity, candidate.semanticScore),
|
|
280
|
+
}));
|
|
281
|
+
|
|
282
|
+
return { ranked, debug, temporalQuery, slots };
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
export function decideTemporalSelectorGuard(
|
|
286
|
+
queryText: string,
|
|
287
|
+
temporalQuery: TemporalQuerySignal = detectTemporalQuerySignal(queryText),
|
|
288
|
+
): TemporalSelectorGuardDecision {
|
|
289
|
+
const slots = extractTemporalSlots(queryText);
|
|
290
|
+
if (!temporalQuery.active) {
|
|
291
|
+
return {
|
|
292
|
+
shouldApply: false,
|
|
293
|
+
slots,
|
|
294
|
+
reason: "temporal query gate inactive",
|
|
295
|
+
};
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
const strongCompositionalPattern = temporalQuery.matchedPatterns.some((pattern) =>
|
|
299
|
+
pattern === "how many days" ||
|
|
300
|
+
pattern === "how long" ||
|
|
301
|
+
pattern === "before or after" ||
|
|
302
|
+
pattern === "since or between"
|
|
303
|
+
);
|
|
304
|
+
if (!strongCompositionalPattern) {
|
|
305
|
+
return {
|
|
306
|
+
shouldApply: false,
|
|
307
|
+
slots,
|
|
308
|
+
reason: "query lacks strong compositional temporal pattern",
|
|
309
|
+
};
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
if (slots.length !== 2) {
|
|
313
|
+
return {
|
|
314
|
+
shouldApply: false,
|
|
315
|
+
slots,
|
|
316
|
+
reason: "query did not resolve to exactly two temporal slots",
|
|
317
|
+
};
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
return {
|
|
321
|
+
shouldApply: true,
|
|
322
|
+
slots,
|
|
323
|
+
reason: "strong temporal query with two-slot decomposition",
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
export function resetTemporalCachesForTest(): void {
|
|
328
|
+
temporalAnchorCache.clear();
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
function extractTemporalSlots(text: string): string[] {
|
|
332
|
+
const clauses = text
|
|
333
|
+
.split(/(?:\bafter\b|\bbefore\b|\bbetween\b|\bor\b|\band\b|\bthen\b|[?.!,;]+)/i)
|
|
334
|
+
.map((part) => part.trim())
|
|
335
|
+
.filter((part) => part.length > 0);
|
|
336
|
+
const slots = new Set<string>();
|
|
337
|
+
|
|
338
|
+
for (const clause of clauses) {
|
|
339
|
+
const terms = normalizeTerms(clause)
|
|
340
|
+
.filter((term) => term.length >= 3 && !TEMPORAL_SLOT_STOPWORDS.has(term));
|
|
341
|
+
if (terms.length === 0) {
|
|
342
|
+
continue;
|
|
343
|
+
}
|
|
344
|
+
if (terms.length <= 3) {
|
|
345
|
+
slots.add(terms.join(" "));
|
|
346
|
+
continue;
|
|
347
|
+
}
|
|
348
|
+
slots.add(terms.slice(0, 4).join(" "));
|
|
349
|
+
slots.add(terms.slice(-4).join(" "));
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
if (slots.size === 0) {
|
|
353
|
+
const fallback = normalizeTerms(text).filter((term) => term.length >= 3 && !TEMPORAL_SLOT_STOPWORDS.has(term));
|
|
354
|
+
if (fallback.length > 0) {
|
|
355
|
+
slots.add(fallback.slice(0, 4).join(" "));
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
return [...slots].slice(0, 4);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function computeSlotCoverage(slots: string[], candidateText: string): { coverage: number; matches: string[] } {
|
|
363
|
+
if (slots.length === 0) {
|
|
364
|
+
return { coverage: 0, matches: [] };
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
const candidateTerms = new Set(normalizeTerms(candidateText));
|
|
368
|
+
const matches: string[] = [];
|
|
369
|
+
let covered = 0;
|
|
370
|
+
|
|
371
|
+
for (const slot of slots) {
|
|
372
|
+
const slotTerms = normalizeTerms(slot).filter((term) => term.length >= 3);
|
|
373
|
+
if (slotTerms.length === 0) {
|
|
374
|
+
continue;
|
|
375
|
+
}
|
|
376
|
+
const overlap = slotTerms.filter((term) => candidateTerms.has(term)).length / slotTerms.length;
|
|
377
|
+
if (overlap >= 0.5) {
|
|
378
|
+
covered += 1;
|
|
379
|
+
matches.push(slot);
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
return {
|
|
384
|
+
coverage: covered / Math.max(1, slots.length),
|
|
385
|
+
matches,
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function buildTemporalRecoveryRationale(slotCoverage: number, anchorDensity: number, semanticScore: number): string {
|
|
390
|
+
if (slotCoverage >= 0.5 && anchorDensity >= 0.5) {
|
|
391
|
+
return "slot coverage and temporal anchors both supported this candidate";
|
|
392
|
+
}
|
|
393
|
+
if (slotCoverage >= 0.5) {
|
|
394
|
+
return "slot coverage lifted this candidate toward the query's subevents";
|
|
395
|
+
}
|
|
396
|
+
if (anchorDensity >= 0.5) {
|
|
397
|
+
return "temporal anchors lifted this candidate toward the query's date logic";
|
|
398
|
+
}
|
|
399
|
+
if (semanticScore >= 0.6) {
|
|
400
|
+
return "semantic similarity kept this candidate in the temporal pool";
|
|
401
|
+
}
|
|
402
|
+
return "candidate remained in the bounded temporal recovery pool";
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
function computeRecencyScore(item: SearchResult, now: number, recencyLambda: number): number {
|
|
406
|
+
const ts = typeof item.metadata.ts === "number" ? item.metadata.ts : now;
|
|
407
|
+
const ageSeconds = Math.max(0, now - ts) / 1000;
|
|
408
|
+
return Math.exp(-recencyLambda * ageSeconds);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
function normalizeTerms(text: string): string[] {
|
|
412
|
+
return text
|
|
413
|
+
.toLowerCase()
|
|
414
|
+
.split(/[^a-z0-9_]+/i)
|
|
415
|
+
.filter((term) => term.length > 0);
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
function touchTemporalAnchorCache(cacheKey: string, value: number): void {
|
|
419
|
+
if (temporalAnchorCache.has(cacheKey)) {
|
|
420
|
+
temporalAnchorCache.delete(cacheKey);
|
|
421
|
+
}
|
|
422
|
+
temporalAnchorCache.set(cacheKey, value);
|
|
423
|
+
if (temporalAnchorCache.size > TEMPORAL_ANCHOR_CACHE_MAX) {
|
|
424
|
+
const oldestKey = temporalAnchorCache.keys().next().value;
|
|
425
|
+
if (typeof oldestKey === "string") {
|
|
426
|
+
temporalAnchorCache.delete(oldestKey);
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
function clamp01(value: number): number {
|
|
432
|
+
return Math.min(1, Math.max(0, value));
|
|
433
|
+
}
|
package/src/tokens.ts
CHANGED
|
@@ -21,6 +21,22 @@ export function fitPromptBudget(items: SearchResult[], budget: number): SearchRe
|
|
|
21
21
|
return selected;
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
+
export function fitPromptBudgetFirstFit(items: SearchResult[], budget: number): SearchResult[] {
|
|
25
|
+
const selected: SearchResult[] = [];
|
|
26
|
+
let used = 0;
|
|
27
|
+
|
|
28
|
+
for (const item of items) {
|
|
29
|
+
const cost = estimateTokens(item.text);
|
|
30
|
+
if (used + cost > budget) {
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
selected.push(item);
|
|
34
|
+
used += cost;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return selected;
|
|
38
|
+
}
|
|
39
|
+
|
|
24
40
|
export function countTokens(messages: Array<{ content: string }>): number {
|
|
25
41
|
return messages.reduce((sum, msg) => sum + estimateTokens(msg.content), 0);
|
|
26
42
|
}
|
package/src/types.ts
CHANGED
|
@@ -203,12 +203,23 @@ export interface ContextAssembleResult {
|
|
|
203
203
|
id: string;
|
|
204
204
|
text: string;
|
|
205
205
|
selected: boolean;
|
|
206
|
+
tokenEstimate: number;
|
|
207
|
+
temporalAnchorDensity: number;
|
|
206
208
|
semanticScore: number;
|
|
209
|
+
slotCoverage?: number;
|
|
210
|
+
slotMatches?: string[];
|
|
207
211
|
lexicalCoverage: number;
|
|
208
212
|
recencyScore: number;
|
|
209
213
|
finalScore: number;
|
|
210
214
|
rationale: string;
|
|
211
215
|
}>;
|
|
216
|
+
recoveryReserveTokens?: number;
|
|
217
|
+
temporalQueryIndicator?: number;
|
|
218
|
+
temporalQueryActive?: boolean;
|
|
219
|
+
temporalQueryPatterns?: string[];
|
|
220
|
+
temporalSelectorApplied?: boolean;
|
|
221
|
+
temporalSelectorReason?: string;
|
|
222
|
+
temporalRecoverySlots?: string[];
|
|
212
223
|
};
|
|
213
224
|
}
|
|
214
225
|
|