@loreai/core 0.11.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/agents-file.d.ts +29 -8
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +1 -0
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +55 -0
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding.d.ts +15 -1
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +53 -5
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/index.d.ts +4 -4
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +799 -256
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/pattern-extract.d.ts +36 -0
- package/dist/bun/pattern-extract.d.ts.map +1 -0
- package/dist/bun/recall.d.ts +1 -0
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +13 -1
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/temporal.d.ts +15 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +41 -1
- package/dist/bun/types.d.ts.map +1 -1
- package/dist/bun/worker-model.d.ts +22 -0
- package/dist/bun/worker-model.d.ts.map +1 -1
- package/dist/node/agents-file.d.ts +29 -8
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +1 -0
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +55 -0
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding.d.ts +15 -1
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +53 -5
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/index.d.ts +4 -4
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +799 -256
- package/dist/node/index.js.map +4 -4
- package/dist/node/pattern-extract.d.ts +36 -0
- package/dist/node/pattern-extract.d.ts.map +1 -0
- package/dist/node/recall.d.ts +1 -0
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +13 -1
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/temporal.d.ts +15 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +41 -1
- package/dist/node/types.d.ts.map +1 -1
- package/dist/node/worker-model.d.ts +22 -0
- package/dist/node/worker-model.d.ts.map +1 -1
- package/dist/types/agents-file.d.ts +29 -8
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +1 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +55 -0
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +15 -1
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +53 -5
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/index.d.ts +4 -4
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/pattern-extract.d.ts +36 -0
- package/dist/types/pattern-extract.d.ts.map +1 -0
- package/dist/types/recall.d.ts +1 -0
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +13 -1
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/temporal.d.ts +15 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +41 -1
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/worker-model.d.ts +22 -0
- package/dist/types/worker-model.d.ts.map +1 -1
- package/package.json +3 -2
- package/src/agents-file.ts +111 -28
- package/src/config.ts +25 -18
- package/src/curator.ts +2 -2
- package/src/db.ts +83 -4
- package/src/distillation.ts +270 -27
- package/src/embedding.ts +158 -14
- package/src/gradient.ts +398 -227
- package/src/index.ts +13 -5
- package/src/pattern-extract.ts +108 -0
- package/src/recall.ts +142 -6
- package/src/search.ts +37 -1
- package/src/temporal.ts +39 -0
- package/src/types.ts +41 -1
- package/src/worker-model.ts +142 -5
package/dist/bun/index.js
CHANGED
|
@@ -131,6 +131,7 @@ __export(temporal_exports, {
|
|
|
131
131
|
search: () => search2,
|
|
132
132
|
searchScored: () => searchScored,
|
|
133
133
|
store: () => store,
|
|
134
|
+
temporalCnorm: () => temporalCnorm,
|
|
134
135
|
undistilled: () => undistilled,
|
|
135
136
|
undistilledCount: () => undistilledCount
|
|
136
137
|
});
|
|
@@ -145,6 +146,7 @@ function sha256(input) {
|
|
|
145
146
|
// src/db.ts
|
|
146
147
|
import { join, dirname } from "path";
|
|
147
148
|
import { mkdirSync } from "fs";
|
|
149
|
+
import { homedir } from "os";
|
|
148
150
|
var MIGRATIONS = [
|
|
149
151
|
`
|
|
150
152
|
-- Version 1: Initial schema
|
|
@@ -473,11 +475,27 @@ var MIGRATIONS = [
|
|
|
473
475
|
)
|
|
474
476
|
WHERE content LIKE '%' || char(10) || '[tool:%'
|
|
475
477
|
OR content LIKE '%' || char(10) || '[reasoning] %';
|
|
478
|
+
`,
|
|
479
|
+
`
|
|
480
|
+
-- Version 12: Context health diagnostic columns on distillations.
|
|
481
|
+
--
|
|
482
|
+
-- r_compression: k/\u221AN where k = distilled token count, N = source token
|
|
483
|
+
-- count. Values < 1.0 signal likely lossy compression. NULL for rows
|
|
484
|
+
-- created before this migration or for meta-distillations (gen > 0)
|
|
485
|
+
-- where the metric is not computed.
|
|
486
|
+
--
|
|
487
|
+
-- c_norm: normalized variance of relative-existence weights over source
|
|
488
|
+
-- message timestamps. Range [0, 1]; 0 = uniform distribution, 1 = attention
|
|
489
|
+
-- dominated by distant past. NULL for pre-migration rows or meta-distillations.
|
|
490
|
+
--
|
|
491
|
+
-- Both columns are nullable REALs \u2014 cheap to add, no backfill needed.
|
|
492
|
+
ALTER TABLE distillations ADD COLUMN r_compression REAL;
|
|
493
|
+
ALTER TABLE distillations ADD COLUMN c_norm REAL;
|
|
476
494
|
`
|
|
477
495
|
];
|
|
478
496
|
function dataDir() {
|
|
479
497
|
const xdg = process.env.XDG_DATA_HOME;
|
|
480
|
-
const base = xdg || join(
|
|
498
|
+
const base = xdg || join(homedir(), ".local", "share");
|
|
481
499
|
return join(base, "opencode-lore");
|
|
482
500
|
}
|
|
483
501
|
var instance;
|
|
@@ -508,16 +526,47 @@ function migrate(database) {
|
|
|
508
526
|
"SELECT name FROM sqlite_master WHERE type='table' AND name='schema_version'"
|
|
509
527
|
).get();
|
|
510
528
|
const current2 = row ? database.query("SELECT version FROM schema_version").get()?.version ?? 0 : 0;
|
|
511
|
-
if (current2 >= MIGRATIONS.length)
|
|
529
|
+
if (current2 >= MIGRATIONS.length) {
|
|
530
|
+
recoverMissingObjects(database);
|
|
531
|
+
return;
|
|
532
|
+
}
|
|
512
533
|
for (let i = current2; i < MIGRATIONS.length; i++) {
|
|
513
534
|
if (i === VACUUM_MIGRATION_INDEX) {
|
|
514
535
|
database.exec("PRAGMA auto_vacuum = INCREMENTAL");
|
|
515
536
|
database.exec("VACUUM");
|
|
516
537
|
} else {
|
|
517
|
-
|
|
538
|
+
try {
|
|
539
|
+
database.exec(MIGRATIONS[i]);
|
|
540
|
+
} catch (e) {
|
|
541
|
+
if (e instanceof Error && /duplicate column name/i.test(e.message)) {
|
|
542
|
+
const stripped = stripAppliedAlters(MIGRATIONS[i], database);
|
|
543
|
+
if (stripped.trim()) database.exec(stripped);
|
|
544
|
+
} else {
|
|
545
|
+
throw e;
|
|
546
|
+
}
|
|
547
|
+
}
|
|
518
548
|
}
|
|
519
549
|
}
|
|
520
550
|
database.exec(`UPDATE schema_version SET version = ${MIGRATIONS.length}`);
|
|
551
|
+
recoverMissingObjects(database);
|
|
552
|
+
}
|
|
553
|
+
function stripAppliedAlters(migration, database) {
|
|
554
|
+
return migration.replace(
|
|
555
|
+
/ALTER\s+TABLE\s+(\w+)\s+ADD\s+COLUMN\s+(\w+)\b[^;]*;/gi,
|
|
556
|
+
(match, table, column) => {
|
|
557
|
+
const cols = database.query(`PRAGMA table_info(${table})`).all();
|
|
558
|
+
if (cols.some((c) => c.name === column)) return "";
|
|
559
|
+
return match;
|
|
560
|
+
}
|
|
561
|
+
);
|
|
562
|
+
}
|
|
563
|
+
function recoverMissingObjects(database) {
|
|
564
|
+
database.exec(`
|
|
565
|
+
CREATE TABLE IF NOT EXISTS kv_meta (
|
|
566
|
+
key TEXT PRIMARY KEY,
|
|
567
|
+
value TEXT NOT NULL
|
|
568
|
+
);
|
|
569
|
+
`);
|
|
521
570
|
}
|
|
522
571
|
function close() {
|
|
523
572
|
if (instance) {
|
|
@@ -11242,14 +11291,24 @@ function reciprocalRankFusion(lists, k = 60) {
|
|
|
11242
11291
|
}
|
|
11243
11292
|
return [...scores.values()].sort((a, b) => b.score - a.score);
|
|
11244
11293
|
}
|
|
11245
|
-
|
|
11294
|
+
function exactTermMatchRank(items, getText, query) {
|
|
11295
|
+
const terms = filterTerms(query).map((t2) => t2.toLowerCase());
|
|
11296
|
+
if (!terms.length) return [];
|
|
11297
|
+
const scored = items.map((item) => {
|
|
11298
|
+
const text4 = getText(item).toLowerCase();
|
|
11299
|
+
const matches = terms.filter((t2) => text4.includes(t2)).length;
|
|
11300
|
+
return { item, matches };
|
|
11301
|
+
}).filter((s) => s.matches > 0).sort((a, b) => b.matches - a.matches);
|
|
11302
|
+
return scored.map((s) => s.item);
|
|
11303
|
+
}
|
|
11304
|
+
async function expandQuery(llm, query, model, sessionID) {
|
|
11246
11305
|
const TIMEOUT_MS = 3e3;
|
|
11247
11306
|
try {
|
|
11248
11307
|
const responseText = await Promise.race([
|
|
11249
11308
|
llm.prompt(
|
|
11250
11309
|
QUERY_EXPANSION_SYSTEM,
|
|
11251
11310
|
`Input: "${query}"`,
|
|
11252
|
-
{ model, workerID: "lore-query-expand" }
|
|
11311
|
+
{ model, workerID: "lore-query-expand", thinking: false, urgent: true, sessionID }
|
|
11253
11312
|
),
|
|
11254
11313
|
new Promise((resolve) => setTimeout(() => resolve(null), TIMEOUT_MS))
|
|
11255
11314
|
]);
|
|
@@ -11423,6 +11482,18 @@ function searchScored(input) {
|
|
|
11423
11482
|
return [];
|
|
11424
11483
|
}
|
|
11425
11484
|
}
|
|
11485
|
+
function temporalCnorm(timestamps, now = Date.now()) {
|
|
11486
|
+
const n = timestamps.length;
|
|
11487
|
+
if (n < 2) return 0;
|
|
11488
|
+
const durations = timestamps.map((t2) => now - t2);
|
|
11489
|
+
const totalDuration = durations.reduce((a, b) => a + b, 0);
|
|
11490
|
+
if (totalDuration <= 0) return 0;
|
|
11491
|
+
const weights = durations.map((d) => d / totalDuration);
|
|
11492
|
+
const uniform = 1 / n;
|
|
11493
|
+
const variance = weights.reduce((sum, w) => sum + (w - uniform) ** 2, 0) / n;
|
|
11494
|
+
const maxVariance = (n - 1) / (n * n);
|
|
11495
|
+
return maxVariance === 0 ? 0 : variance / maxVariance;
|
|
11496
|
+
}
|
|
11426
11497
|
function count(projectPath, sessionID) {
|
|
11427
11498
|
const pid = ensureProject(projectPath);
|
|
11428
11499
|
const query = sessionID ? "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND session_id = ?" : "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ?";
|
|
@@ -25647,11 +25718,15 @@ var LoreConfig = external_exports.object({
|
|
|
25647
25718
|
* Anthropic's April 23 postmortem identified dropping reasoning blocks as
|
|
25648
25719
|
* the root cause of forgetfulness/repetition.
|
|
25649
25720
|
*
|
|
25650
|
-
* `idleResumeMinutes` is the threshold in minutes. Default
|
|
25651
|
-
* Anthropic's
|
|
25721
|
+
* `idleResumeMinutes` is the threshold in minutes. Default 5 — matches
|
|
25722
|
+
* Anthropic's default-tier prompt cache TTL. After 5 min of inactivity the
|
|
25723
|
+
* upstream cache is cold, so preserving byte-identity wastes cache-write cost
|
|
25724
|
+
* for no benefit. Refreshing the caches on resume produces a better-fitting
|
|
25725
|
+
* window at the same cold-write price. Users on Anthropic's extended-cache
|
|
25726
|
+
* tier (1 h TTL) should set this to 60 in `.lore.json`.
|
|
25652
25727
|
* Set to 0 to disable the feature.
|
|
25653
25728
|
*/
|
|
25654
|
-
idleResumeMinutes: external_exports.number().min(0).max(24 * 60).default(
|
|
25729
|
+
idleResumeMinutes: external_exports.number().min(0).max(24 * 60).default(5),
|
|
25655
25730
|
distillation: external_exports.object({
|
|
25656
25731
|
minMessages: external_exports.number().min(3).default(5),
|
|
25657
25732
|
maxSegment: external_exports.number().min(5).default(30),
|
|
@@ -25702,34 +25777,37 @@ var LoreConfig = external_exports.object({
|
|
|
25702
25777
|
* before search, improving recall for ambiguous queries. */
|
|
25703
25778
|
queryExpansion: external_exports.boolean().default(false),
|
|
25704
25779
|
/** Vector embedding search.
|
|
25705
|
-
* Supports multiple providers:
|
|
25706
|
-
* "
|
|
25707
|
-
*
|
|
25708
|
-
*
|
|
25780
|
+
* Supports multiple providers:
|
|
25781
|
+
* - "local" (default): fastembed + ONNX Runtime, no API key needed.
|
|
25782
|
+
* Uses bge-small-en-v1.5 (384 dims). Model downloaded on first use (~33MB),
|
|
25783
|
+
* cached in ~/.cache/fastembed. ~150ms per query embed.
|
|
25784
|
+
* - "voyage": Voyage AI (VOYAGE_API_KEY, voyage-code-3, 1024 dims)
|
|
25785
|
+
* - "openai": OpenAI (OPENAI_API_KEY, text-embedding-3-small, 1536 dims)
|
|
25786
|
+
* Set enabled: false to explicitly disable even with a provider available. */
|
|
25709
25787
|
embeddings: external_exports.object({
|
|
25710
25788
|
/** Enable/disable vector embedding search. Default: true.
|
|
25711
|
-
* Set to false to explicitly disable
|
|
25789
|
+
* Set to false to explicitly disable. */
|
|
25712
25790
|
enabled: external_exports.boolean().default(true),
|
|
25713
|
-
/** Embedding provider. Default: "
|
|
25714
|
-
*
|
|
25791
|
+
/** Embedding provider. Default: "local".
|
|
25792
|
+
* - "local": fastembed + ONNX Runtime, no API key (default model: bge-small-en-v1.5, 384 dims)
|
|
25715
25793
|
* - "voyage": VOYAGE_API_KEY (default model: voyage-code-3, 1024 dims)
|
|
25716
25794
|
* - "openai": OPENAI_API_KEY (default model: text-embedding-3-small, 1536 dims) */
|
|
25717
|
-
provider: external_exports.enum(["voyage", "openai"]).default("
|
|
25795
|
+
provider: external_exports.enum(["local", "voyage", "openai"]).default("local"),
|
|
25718
25796
|
/** Model ID for the embedding provider. Default depends on provider. */
|
|
25719
|
-
model: external_exports.string().default("
|
|
25720
|
-
/** Embedding dimensions. Default: 1024. */
|
|
25721
|
-
dimensions: external_exports.number().min(
|
|
25797
|
+
model: external_exports.string().default("BGESmallENV15"),
|
|
25798
|
+
/** Embedding dimensions. Default: 384 (local) / 1024 (voyage) / 1536 (openai). */
|
|
25799
|
+
dimensions: external_exports.number().min(64).max(2048).default(384)
|
|
25722
25800
|
}).default({
|
|
25723
25801
|
enabled: true,
|
|
25724
|
-
provider: "
|
|
25725
|
-
model: "
|
|
25726
|
-
dimensions:
|
|
25802
|
+
provider: "local",
|
|
25803
|
+
model: "BGESmallENV15",
|
|
25804
|
+
dimensions: 384
|
|
25727
25805
|
})
|
|
25728
25806
|
}).default({
|
|
25729
25807
|
ftsWeights: { title: 6, content: 2, category: 3 },
|
|
25730
25808
|
recallLimit: 10,
|
|
25731
25809
|
queryExpansion: false,
|
|
25732
|
-
embeddings: { enabled: true, provider: "
|
|
25810
|
+
embeddings: { enabled: true, provider: "local", model: "BGESmallENV15", dimensions: 384 }
|
|
25733
25811
|
}),
|
|
25734
25812
|
crossProject: external_exports.boolean().default(false),
|
|
25735
25813
|
agentsFile: external_exports.object({
|
|
@@ -25767,6 +25845,7 @@ __export(embedding_exports, {
|
|
|
25767
25845
|
fromBlob: () => fromBlob,
|
|
25768
25846
|
isAvailable: () => isAvailable,
|
|
25769
25847
|
resetProvider: () => resetProvider,
|
|
25848
|
+
runStartupBackfill: () => runStartupBackfill,
|
|
25770
25849
|
toBlob: () => toBlob,
|
|
25771
25850
|
vectorSearch: () => vectorSearch,
|
|
25772
25851
|
vectorSearchDistillations: () => vectorSearchDistillations
|
|
@@ -25844,9 +25923,43 @@ var OpenAIProvider = class {
|
|
|
25844
25923
|
return sorted.map((d) => new Float32Array(d.embedding));
|
|
25845
25924
|
}
|
|
25846
25925
|
};
|
|
25847
|
-
var
|
|
25848
|
-
|
|
25849
|
-
|
|
25926
|
+
var LocalProvider = class {
|
|
25927
|
+
maxBatchSize = 256;
|
|
25928
|
+
model = null;
|
|
25929
|
+
initPromise = null;
|
|
25930
|
+
modelName;
|
|
25931
|
+
constructor(modelName) {
|
|
25932
|
+
this.modelName = modelName;
|
|
25933
|
+
}
|
|
25934
|
+
async getModel() {
|
|
25935
|
+
if (this.model) return this.model;
|
|
25936
|
+
if (!this.initPromise) {
|
|
25937
|
+
this.initPromise = (async () => {
|
|
25938
|
+
const { EmbeddingModel, FlagEmbedding } = await import("fastembed");
|
|
25939
|
+
const enumValue = EmbeddingModel[this.modelName];
|
|
25940
|
+
const m = await FlagEmbedding.init({
|
|
25941
|
+
model: enumValue ?? this.modelName
|
|
25942
|
+
});
|
|
25943
|
+
this.model = m;
|
|
25944
|
+
return m;
|
|
25945
|
+
})();
|
|
25946
|
+
}
|
|
25947
|
+
return this.initPromise;
|
|
25948
|
+
}
|
|
25949
|
+
async embed(texts, inputType) {
|
|
25950
|
+
const model = await this.getModel();
|
|
25951
|
+
if (inputType === "query" && texts.length === 1) {
|
|
25952
|
+
const vec = await model.queryEmbed(texts[0]);
|
|
25953
|
+
return [new Float32Array(vec)];
|
|
25954
|
+
}
|
|
25955
|
+
const results = [];
|
|
25956
|
+
for await (const batch of model.passageEmbed(texts)) {
|
|
25957
|
+
for (const vec of batch) {
|
|
25958
|
+
results.push(new Float32Array(vec));
|
|
25959
|
+
}
|
|
25960
|
+
}
|
|
25961
|
+
return results;
|
|
25962
|
+
}
|
|
25850
25963
|
};
|
|
25851
25964
|
var PROVIDER_ENV_KEYS = {
|
|
25852
25965
|
voyage: "VOYAGE_API_KEY",
|
|
@@ -25865,21 +25978,35 @@ function getProvider() {
|
|
|
25865
25978
|
return null;
|
|
25866
25979
|
}
|
|
25867
25980
|
const providerName = cfg.provider;
|
|
25868
|
-
const
|
|
25869
|
-
if (!apiKey) {
|
|
25870
|
-
cachedProvider = null;
|
|
25871
|
-
return null;
|
|
25872
|
-
}
|
|
25873
|
-
const defaults = PROVIDER_DEFAULTS[providerName];
|
|
25874
|
-
const model = cfg.model === defaults?.model ? cfg.model : cfg.model;
|
|
25875
|
-
const dimensions = cfg.dimensions;
|
|
25981
|
+
const model = cfg.model;
|
|
25876
25982
|
switch (providerName) {
|
|
25877
|
-
case "
|
|
25878
|
-
|
|
25983
|
+
case "local": {
|
|
25984
|
+
try {
|
|
25985
|
+
cachedProvider = new LocalProvider(model);
|
|
25986
|
+
} catch {
|
|
25987
|
+
info("local embedding provider unavailable (fastembed not installed)");
|
|
25988
|
+
cachedProvider = null;
|
|
25989
|
+
}
|
|
25990
|
+
break;
|
|
25991
|
+
}
|
|
25992
|
+
case "voyage": {
|
|
25993
|
+
const apiKey = getProviderApiKey(providerName);
|
|
25994
|
+
if (!apiKey) {
|
|
25995
|
+
cachedProvider = null;
|
|
25996
|
+
return null;
|
|
25997
|
+
}
|
|
25998
|
+
cachedProvider = new VoyageProvider(apiKey, model, cfg.dimensions);
|
|
25879
25999
|
break;
|
|
25880
|
-
|
|
25881
|
-
|
|
26000
|
+
}
|
|
26001
|
+
case "openai": {
|
|
26002
|
+
const apiKey = getProviderApiKey(providerName);
|
|
26003
|
+
if (!apiKey) {
|
|
26004
|
+
cachedProvider = null;
|
|
26005
|
+
return null;
|
|
26006
|
+
}
|
|
26007
|
+
cachedProvider = new OpenAIProvider(apiKey, model, cfg.dimensions);
|
|
25882
26008
|
break;
|
|
26009
|
+
}
|
|
25883
26010
|
default:
|
|
25884
26011
|
info(`unknown embedding provider: ${providerName}`);
|
|
25885
26012
|
cachedProvider = null;
|
|
@@ -25984,6 +26111,29 @@ function checkConfigChange() {
|
|
|
25984
26111
|
).run(EMBEDDING_CONFIG_KEY, current2, current2);
|
|
25985
26112
|
return true;
|
|
25986
26113
|
}
|
|
26114
|
+
async function runStartupBackfill() {
|
|
26115
|
+
if (!isAvailable()) return;
|
|
26116
|
+
const knowledgeEmbedded = await backfillEmbeddings();
|
|
26117
|
+
const distillationEmbedded = await backfillDistillationEmbeddings();
|
|
26118
|
+
const kTotal = db().query("SELECT COUNT(*) as n FROM knowledge WHERE confidence > 0.2").get().n;
|
|
26119
|
+
const kWithEmb = db().query(
|
|
26120
|
+
"SELECT COUNT(*) as n FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2"
|
|
26121
|
+
).get().n;
|
|
26122
|
+
const dTotal = db().query(
|
|
26123
|
+
"SELECT COUNT(*) as n FROM distillations WHERE archived = 0 AND observations != ''"
|
|
26124
|
+
).get().n;
|
|
26125
|
+
const dWithEmb = db().query(
|
|
26126
|
+
"SELECT COUNT(*) as n FROM distillations WHERE embedding IS NOT NULL AND archived = 0"
|
|
26127
|
+
).get().n;
|
|
26128
|
+
const parts = [];
|
|
26129
|
+
if (knowledgeEmbedded > 0 || distillationEmbedded > 0) {
|
|
26130
|
+
parts.push(`backfilled ${knowledgeEmbedded} knowledge + ${distillationEmbedded} distillations`);
|
|
26131
|
+
}
|
|
26132
|
+
parts.push(
|
|
26133
|
+
`coverage: knowledge ${kWithEmb}/${kTotal}, distillations ${dWithEmb}/${dTotal}`
|
|
26134
|
+
);
|
|
26135
|
+
info(`embedding startup: ${parts.join("; ")}`);
|
|
26136
|
+
}
|
|
25987
26137
|
async function backfillEmbeddings() {
|
|
25988
26138
|
checkConfigChange();
|
|
25989
26139
|
const provider = getProvider();
|
|
@@ -26740,6 +26890,9 @@ function check2(projectPath) {
|
|
|
26740
26890
|
// src/distillation.ts
|
|
26741
26891
|
var distillation_exports = {};
|
|
26742
26892
|
__export(distillation_exports, {
|
|
26893
|
+
backfillMetrics: () => backfillMetrics,
|
|
26894
|
+
compressionRatio: () => compressionRatio,
|
|
26895
|
+
detectSegments: () => detectSegments,
|
|
26743
26896
|
latestMetaObservations: () => latestMetaObservations,
|
|
26744
26897
|
loadForSession: () => loadForSession,
|
|
26745
26898
|
messagesToText: () => messagesToText,
|
|
@@ -26750,6 +26903,72 @@ __export(distillation_exports, {
|
|
|
26750
26903
|
workerSessionIDs: () => workerSessionIDs
|
|
26751
26904
|
});
|
|
26752
26905
|
|
|
26906
|
+
// src/pattern-extract.ts
|
|
26907
|
+
var pattern_extract_exports = {};
|
|
26908
|
+
__export(pattern_extract_exports, {
|
|
26909
|
+
extractPatterns: () => extractPatterns
|
|
26910
|
+
});
|
|
26911
|
+
var PATTERNS = [
|
|
26912
|
+
// Decision patterns
|
|
26913
|
+
{
|
|
26914
|
+
regex: /decided to (?:use |switch to |go with |adopt )(.+?)(?:\.|,|$)/gi,
|
|
26915
|
+
category: "decision",
|
|
26916
|
+
titleFn: (m) => `Decided to use ${m[1].trim()}`
|
|
26917
|
+
},
|
|
26918
|
+
{
|
|
26919
|
+
regex: /chose (.+?) over (.+?)(?:\.|,|$)/gi,
|
|
26920
|
+
category: "decision",
|
|
26921
|
+
titleFn: (m) => `Chose ${m[1].trim()} over ${m[2].trim()}`
|
|
26922
|
+
},
|
|
26923
|
+
{
|
|
26924
|
+
regex: /switched from (.+?) to (.+?)(?:\.|,|$)/gi,
|
|
26925
|
+
category: "decision",
|
|
26926
|
+
titleFn: (m) => `Switched from ${m[1].trim()} to ${m[2].trim()}`
|
|
26927
|
+
},
|
|
26928
|
+
{
|
|
26929
|
+
regex: /going with (.+?) (?:because|for|due to)(.+?)(?:\.|,|$)/gi,
|
|
26930
|
+
category: "decision",
|
|
26931
|
+
titleFn: (m) => `Going with ${m[1].trim()}`
|
|
26932
|
+
},
|
|
26933
|
+
{
|
|
26934
|
+
regex: /migrat(?:ed|ing) (?:from .+? )?to (.+?)(?:\.|,|$)/gi,
|
|
26935
|
+
category: "decision",
|
|
26936
|
+
titleFn: (m) => `Migrated to ${m[1].trim()}`
|
|
26937
|
+
},
|
|
26938
|
+
{
|
|
26939
|
+
regex: /adopted (.+?) (?:for|as|instead)(.+?)(?:\.|,|$)/gi,
|
|
26940
|
+
category: "decision",
|
|
26941
|
+
titleFn: (m) => `Adopted ${m[1].trim()}`
|
|
26942
|
+
},
|
|
26943
|
+
// Preference patterns
|
|
26944
|
+
{
|
|
26945
|
+
regex: /prefers? (.+?) (?:over|to|instead of|rather than) (.+?)(?:\.|,|$)/gi,
|
|
26946
|
+
category: "preference",
|
|
26947
|
+
titleFn: (m) => `Prefers ${m[1].trim()} over ${m[2].trim()}`
|
|
26948
|
+
},
|
|
26949
|
+
{
|
|
26950
|
+
regex: /(?:user |team |we )(?:always |usually |typically )(?:use|prefer|go with) (.+?)(?:\.|,|$)/gi,
|
|
26951
|
+
category: "preference",
|
|
26952
|
+
titleFn: (m) => `Typically uses ${m[1].trim()}`
|
|
26953
|
+
}
|
|
26954
|
+
];
|
|
26955
|
+
function extractPatterns(observations) {
|
|
26956
|
+
const results = [];
|
|
26957
|
+
const seen = /* @__PURE__ */ new Set();
|
|
26958
|
+
for (const { regex, category, titleFn } of PATTERNS) {
|
|
26959
|
+
regex.lastIndex = 0;
|
|
26960
|
+
let match;
|
|
26961
|
+
while ((match = regex.exec(observations)) !== null) {
|
|
26962
|
+
const title = titleFn(match);
|
|
26963
|
+
const key = title.toLowerCase();
|
|
26964
|
+
if (seen.has(key)) continue;
|
|
26965
|
+
seen.add(key);
|
|
26966
|
+
results.push({ category, title, content: match[0].trim() });
|
|
26967
|
+
}
|
|
26968
|
+
}
|
|
26969
|
+
return results;
|
|
26970
|
+
}
|
|
26971
|
+
|
|
26753
26972
|
// src/gradient.ts
|
|
26754
26973
|
function estimate2(text4) {
|
|
26755
26974
|
return Math.ceil(text4.length / 3);
|
|
@@ -26785,12 +27004,17 @@ function makeSessionState() {
|
|
|
26785
27004
|
lastWindowMessageIDs: /* @__PURE__ */ new Set(),
|
|
26786
27005
|
forceMinLayer: 0,
|
|
26787
27006
|
lastTransformEstimate: 0,
|
|
27007
|
+
ltmTokens: 0,
|
|
26788
27008
|
prefixCache: null,
|
|
26789
27009
|
rawWindowCache: null,
|
|
26790
27010
|
lastTurnAt: 0,
|
|
26791
27011
|
cameOutOfIdle: false,
|
|
27012
|
+
postIdleCompact: false,
|
|
26792
27013
|
consecutiveHighLayer: 0,
|
|
26793
|
-
lastPrefixHash: ""
|
|
27014
|
+
lastPrefixHash: "",
|
|
27015
|
+
bustCount: 0,
|
|
27016
|
+
transformCount: 0,
|
|
27017
|
+
distillationSnapshot: null
|
|
26794
27018
|
};
|
|
26795
27019
|
}
|
|
26796
27020
|
var sessionStates = /* @__PURE__ */ new Map();
|
|
@@ -26811,16 +27035,21 @@ function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
|
|
|
26811
27035
|
if (idleMs < thresholdMs) return { triggered: false };
|
|
26812
27036
|
state.prefixCache = null;
|
|
26813
27037
|
state.rawWindowCache = null;
|
|
27038
|
+
state.distillationSnapshot = null;
|
|
26814
27039
|
state.cameOutOfIdle = true;
|
|
27040
|
+
state.postIdleCompact = true;
|
|
26815
27041
|
return { triggered: true, idleMs };
|
|
26816
27042
|
}
|
|
27043
|
+
function getLastTurnAt(sessionID) {
|
|
27044
|
+
return sessionStates.get(sessionID)?.lastTurnAt ?? 0;
|
|
27045
|
+
}
|
|
26817
27046
|
function consumeCameOutOfIdle(sessionID) {
|
|
26818
27047
|
const state = sessionStates.get(sessionID);
|
|
26819
27048
|
if (!state || !state.cameOutOfIdle) return false;
|
|
26820
27049
|
state.cameOutOfIdle = false;
|
|
26821
27050
|
return true;
|
|
26822
27051
|
}
|
|
26823
|
-
var
|
|
27052
|
+
var ltmTokensFallback = 0;
|
|
26824
27053
|
function setModelLimits(limits) {
|
|
26825
27054
|
contextLimit = limits.context || 2e5;
|
|
26826
27055
|
outputReserved = Math.min(limits.output || 32e3, 32e3);
|
|
@@ -26833,11 +27062,18 @@ function computeLayer0Cap(targetCostPerTurn, cacheReadCostPerToken) {
|
|
|
26833
27062
|
const rawCap = Math.floor(targetCostPerTurn / cacheReadCostPerToken);
|
|
26834
27063
|
return Math.max(rawCap, MIN_LAYER0_FLOOR);
|
|
26835
27064
|
}
|
|
26836
|
-
function setLtmTokens(tokens) {
|
|
26837
|
-
|
|
27065
|
+
function setLtmTokens(tokens, sessionID) {
|
|
27066
|
+
if (sessionID) {
|
|
27067
|
+
getSessionState(sessionID).ltmTokens = tokens;
|
|
27068
|
+
}
|
|
27069
|
+
ltmTokensFallback = tokens;
|
|
26838
27070
|
}
|
|
26839
|
-
function getLtmTokens() {
|
|
26840
|
-
|
|
27071
|
+
function getLtmTokens(sessionID) {
|
|
27072
|
+
if (sessionID) {
|
|
27073
|
+
const state = sessionStates.get(sessionID);
|
|
27074
|
+
if (state) return state.ltmTokens;
|
|
27075
|
+
}
|
|
27076
|
+
return ltmTokensFallback;
|
|
26841
27077
|
}
|
|
26842
27078
|
function getLtmBudget(ltmFraction) {
|
|
26843
27079
|
const overhead = calibratedOverhead ?? FIRST_TURN_OVERHEAD;
|
|
@@ -26853,7 +27089,7 @@ function calibrate(actualInput, sessionID, messageCount) {
|
|
|
26853
27089
|
if (sessionID !== void 0) {
|
|
26854
27090
|
const state = getSessionState(sessionID);
|
|
26855
27091
|
state.lastKnownInput = actualInput;
|
|
26856
|
-
state.lastKnownLtm = ltmTokens;
|
|
27092
|
+
state.lastKnownLtm = state.ltmTokens;
|
|
26857
27093
|
if (messageCount !== void 0) state.lastKnownMessageCount = messageCount;
|
|
26858
27094
|
}
|
|
26859
27095
|
}
|
|
@@ -26884,7 +27120,9 @@ function inspectSessionState(sessionID) {
|
|
|
26884
27120
|
hasPrefixCache: state.prefixCache !== null,
|
|
26885
27121
|
hasRawWindowCache: state.rawWindowCache !== null,
|
|
26886
27122
|
cameOutOfIdle: state.cameOutOfIdle,
|
|
26887
|
-
|
|
27123
|
+
postIdleCompact: state.postIdleCompact,
|
|
27124
|
+
lastTurnAt: state.lastTurnAt,
|
|
27125
|
+
distillationSnapshot: state.distillationSnapshot
|
|
26888
27126
|
};
|
|
26889
27127
|
}
|
|
26890
27128
|
function setLastTurnAtForTest(sessionID, ms) {
|
|
@@ -26896,6 +27134,25 @@ function loadDistillations(projectPath, sessionID) {
|
|
|
26896
27134
|
const params = sessionID ? [pid, sessionID] : [pid];
|
|
26897
27135
|
return db().query(query).all(...params);
|
|
26898
27136
|
}
|
|
27137
|
+
function loadDistillationsCached(projectPath, sessionID, messages, sessState) {
|
|
27138
|
+
let lastUserMsgId = null;
|
|
27139
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
27140
|
+
if (messages[i].info.role === "user") {
|
|
27141
|
+
lastUserMsgId = messages[i].info.id;
|
|
27142
|
+
break;
|
|
27143
|
+
}
|
|
27144
|
+
}
|
|
27145
|
+
const snapshot = sessState.distillationSnapshot;
|
|
27146
|
+
if (snapshot && snapshot.lastUserMsgId === lastUserMsgId) {
|
|
27147
|
+
return snapshot.rows;
|
|
27148
|
+
}
|
|
27149
|
+
const rows = loadDistillations(projectPath, sessionID);
|
|
27150
|
+
sessState.distillationSnapshot = { rows, lastUserMsgId };
|
|
27151
|
+
info(
|
|
27152
|
+
`distillation refresh: ${rows.length} rows (user msg ${lastUserMsgId?.substring(0, 16) ?? "none"})`
|
|
27153
|
+
);
|
|
27154
|
+
return rows;
|
|
27155
|
+
}
|
|
26899
27156
|
function stripSystemReminders(text4) {
|
|
26900
27157
|
return text4.replace(/<system-reminder>[\s\S]*?<\/system-reminder>\n?/g, (match) => {
|
|
26901
27158
|
const inner = match.match(
|
|
@@ -26948,24 +27205,51 @@ function simpleHash(str) {
|
|
|
26948
27205
|
}
|
|
26949
27206
|
return hash2;
|
|
26950
27207
|
}
|
|
26951
|
-
function
|
|
27208
|
+
function extractReadRange(input) {
|
|
26952
27209
|
try {
|
|
26953
27210
|
const parsed = JSON.parse(input);
|
|
26954
|
-
|
|
27211
|
+
const path = parsed.path || parsed.filePath || parsed.file;
|
|
27212
|
+
if (!path) return void 0;
|
|
27213
|
+
const offset = typeof parsed.offset === "number" ? parsed.offset : void 0;
|
|
27214
|
+
const limit = typeof parsed.limit === "number" ? parsed.limit : void 0;
|
|
27215
|
+
return { path, offset, limit };
|
|
26955
27216
|
} catch {
|
|
26956
27217
|
const match = input.match(/(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/);
|
|
26957
|
-
return
|
|
27218
|
+
if (!match) return void 0;
|
|
27219
|
+
return { path: match[0], offset: void 0, limit: void 0 };
|
|
26958
27220
|
}
|
|
26959
27221
|
}
|
|
26960
|
-
function
|
|
27222
|
+
function laterReadCovers(later, earlier) {
|
|
27223
|
+
if (later.path !== earlier.path) return false;
|
|
27224
|
+
if (later.offset === void 0 && later.limit === void 0) return true;
|
|
27225
|
+
if (earlier.offset === void 0 && earlier.limit === void 0) return false;
|
|
27226
|
+
const laterStart = later.offset ?? 1;
|
|
27227
|
+
const earlierStart = earlier.offset ?? 1;
|
|
27228
|
+
if (later.limit === void 0) return laterStart <= earlierStart;
|
|
27229
|
+
if (earlier.limit === void 0) return false;
|
|
27230
|
+
const laterEnd = laterStart + later.limit;
|
|
27231
|
+
const earlierEnd = earlierStart + earlier.limit;
|
|
27232
|
+
return laterStart <= earlierStart && laterEnd >= earlierEnd;
|
|
27233
|
+
}
|
|
27234
|
+
function rangeLabel(range) {
|
|
27235
|
+
if (range.offset !== void 0 && range.limit !== void 0) {
|
|
27236
|
+
return ` lines ${range.offset}-${range.offset + range.limit - 1}`;
|
|
27237
|
+
}
|
|
27238
|
+
if (range.offset !== void 0) {
|
|
27239
|
+
return ` from line ${range.offset}`;
|
|
27240
|
+
}
|
|
27241
|
+
return "";
|
|
27242
|
+
}
|
|
27243
|
+
function dedupAnnotation(toolName, filePath, range) {
|
|
26961
27244
|
if (filePath) {
|
|
26962
|
-
|
|
27245
|
+
const rl = range ? rangeLabel(range) : "";
|
|
27246
|
+
return `[earlier read of ${filePath}${rl} \u2014 see latest read below for current content]`;
|
|
26963
27247
|
}
|
|
26964
27248
|
return `[duplicate output \u2014 same content as later ${toolName} in this session \u2014 use recall for details]`;
|
|
26965
27249
|
}
|
|
26966
27250
|
function deduplicateToolOutputs(messages, currentTurnIdx) {
|
|
26967
27251
|
const contentLatest = /* @__PURE__ */ new Map();
|
|
26968
|
-
const
|
|
27252
|
+
const fileReads = /* @__PURE__ */ new Map();
|
|
26969
27253
|
for (let i = 0; i < messages.length; i++) {
|
|
26970
27254
|
for (const part of messages[i].parts) {
|
|
26971
27255
|
if (!isToolPart(part) || part.state.status !== "completed") continue;
|
|
@@ -26975,8 +27259,15 @@ function deduplicateToolOutputs(messages, currentTurnIdx) {
|
|
|
26975
27259
|
contentLatest.set(key, i);
|
|
26976
27260
|
if (part.tool === "read_file" || part.tool === "read") {
|
|
26977
27261
|
const inputStr = typeof part.state.input === "string" ? part.state.input : JSON.stringify(part.state.input);
|
|
26978
|
-
const
|
|
26979
|
-
if (
|
|
27262
|
+
const range = extractReadRange(inputStr);
|
|
27263
|
+
if (range) {
|
|
27264
|
+
let entries = fileReads.get(range.path);
|
|
27265
|
+
if (!entries) {
|
|
27266
|
+
entries = [];
|
|
27267
|
+
fileReads.set(range.path, entries);
|
|
27268
|
+
}
|
|
27269
|
+
entries.push({ range, msgIdx: i });
|
|
27270
|
+
}
|
|
26980
27271
|
}
|
|
26981
27272
|
}
|
|
26982
27273
|
}
|
|
@@ -26990,20 +27281,30 @@ function deduplicateToolOutputs(messages, currentTurnIdx) {
|
|
|
26990
27281
|
if (!output || output.length < DEDUP_MIN_CHARS) return part;
|
|
26991
27282
|
const contentKey = `${part.tool}:${simpleHash(output)}`;
|
|
26992
27283
|
const isLatestContent = contentLatest.get(contentKey) === msgIdx;
|
|
26993
|
-
let
|
|
26994
|
-
let
|
|
27284
|
+
let readRange;
|
|
27285
|
+
let coveredByLater = false;
|
|
26995
27286
|
if (part.tool === "read_file" || part.tool === "read") {
|
|
26996
27287
|
const inputStr = typeof part.state.input === "string" ? part.state.input : JSON.stringify(part.state.input);
|
|
26997
|
-
|
|
26998
|
-
if (
|
|
27288
|
+
readRange = extractReadRange(inputStr);
|
|
27289
|
+
if (readRange) {
|
|
27290
|
+
const entries = fileReads.get(readRange.path);
|
|
27291
|
+
if (entries) {
|
|
27292
|
+
for (const entry of entries) {
|
|
27293
|
+
if (entry.msgIdx > msgIdx && laterReadCovers(entry.range, readRange)) {
|
|
27294
|
+
coveredByLater = true;
|
|
27295
|
+
break;
|
|
27296
|
+
}
|
|
27297
|
+
}
|
|
27298
|
+
}
|
|
27299
|
+
}
|
|
26999
27300
|
}
|
|
27000
|
-
if (isLatestContent &&
|
|
27301
|
+
if (isLatestContent && !coveredByLater) return part;
|
|
27001
27302
|
partsChanged = true;
|
|
27002
27303
|
return {
|
|
27003
27304
|
...part,
|
|
27004
27305
|
state: {
|
|
27005
27306
|
...part.state,
|
|
27006
|
-
output: dedupAnnotation(part.tool,
|
|
27307
|
+
output: dedupAnnotation(part.tool, readRange?.path, readRange)
|
|
27007
27308
|
}
|
|
27008
27309
|
};
|
|
27009
27310
|
});
|
|
@@ -27023,7 +27324,7 @@ function sanitizeToolParts(messages) {
|
|
|
27023
27324
|
const { status } = part.state;
|
|
27024
27325
|
if (status === "completed" || status === "error") return part;
|
|
27025
27326
|
partsChanged = true;
|
|
27026
|
-
const
|
|
27327
|
+
const existingStart = "time" in part.state ? part.state.time.start : 0;
|
|
27027
27328
|
return {
|
|
27028
27329
|
...part,
|
|
27029
27330
|
state: {
|
|
@@ -27032,8 +27333,8 @@ function sanitizeToolParts(messages) {
|
|
|
27032
27333
|
error: "[tool execution interrupted \u2014 session recovered]",
|
|
27033
27334
|
metadata: "metadata" in part.state ? part.state.metadata : void 0,
|
|
27034
27335
|
time: {
|
|
27035
|
-
start:
|
|
27036
|
-
end:
|
|
27336
|
+
start: existingStart,
|
|
27337
|
+
end: existingStart
|
|
27037
27338
|
}
|
|
27038
27339
|
}
|
|
27039
27340
|
};
|
|
@@ -27057,97 +27358,6 @@ function stripToolOutputs(parts) {
|
|
|
27057
27358
|
};
|
|
27058
27359
|
});
|
|
27059
27360
|
}
|
|
27060
|
-
function formatRelativeTime(date5, now) {
|
|
27061
|
-
const diffMs = now.getTime() - date5.getTime();
|
|
27062
|
-
const diffDays = Math.floor(diffMs / (1e3 * 60 * 60 * 24));
|
|
27063
|
-
if (diffDays === 0) return "today";
|
|
27064
|
-
if (diffDays === 1) return "yesterday";
|
|
27065
|
-
if (diffDays < 7) return `${diffDays} days ago`;
|
|
27066
|
-
if (diffDays < 14) return "1 week ago";
|
|
27067
|
-
if (diffDays < 30) return `${Math.floor(diffDays / 7)} weeks ago`;
|
|
27068
|
-
if (diffDays < 60) return "1 month ago";
|
|
27069
|
-
if (diffDays < 365) return `${Math.floor(diffDays / 30)} months ago`;
|
|
27070
|
-
return `${Math.floor(diffDays / 365)} year${Math.floor(diffDays / 365) > 1 ? "s" : ""} ago`;
|
|
27071
|
-
}
|
|
27072
|
-
function parseDateFromContent(s) {
|
|
27073
|
-
const simple = s.match(/([A-Z][a-z]+)\s+(\d{1,2}),?\s+(\d{4})/);
|
|
27074
|
-
if (simple) {
|
|
27075
|
-
const d = /* @__PURE__ */ new Date(`${simple[1]} ${simple[2]}, ${simple[3]}`);
|
|
27076
|
-
if (!isNaN(d.getTime())) return d;
|
|
27077
|
-
}
|
|
27078
|
-
const range = s.match(/([A-Z][a-z]+)\s+(\d{1,2})-\d{1,2},?\s+(\d{4})/);
|
|
27079
|
-
if (range) {
|
|
27080
|
-
const d = /* @__PURE__ */ new Date(`${range[1]} ${range[2]}, ${range[3]}`);
|
|
27081
|
-
if (!isNaN(d.getTime())) return d;
|
|
27082
|
-
}
|
|
27083
|
-
const vague = s.match(/(late|early|mid)[- ]?([A-Z][a-z]+)\s+(\d{4})/i);
|
|
27084
|
-
if (vague) {
|
|
27085
|
-
const day = vague[1].toLowerCase() === "early" ? 7 : vague[1].toLowerCase() === "late" ? 23 : 15;
|
|
27086
|
-
const d = /* @__PURE__ */ new Date(`${vague[2]} ${day}, ${vague[3]}`);
|
|
27087
|
-
if (!isNaN(d.getTime())) return d;
|
|
27088
|
-
}
|
|
27089
|
-
return null;
|
|
27090
|
-
}
|
|
27091
|
-
function expandInlineEstimatedDates(text4, now) {
|
|
27092
|
-
return text4.replace(
|
|
27093
|
-
/\(((?:meaning|estimated)\s+)([^)]+\d{4})\)/gi,
|
|
27094
|
-
(match, prefix, dateContent) => {
|
|
27095
|
-
const d = parseDateFromContent(dateContent);
|
|
27096
|
-
if (!d) return match;
|
|
27097
|
-
const rel = formatRelativeTime(d, now);
|
|
27098
|
-
const matchIdx = text4.indexOf(match);
|
|
27099
|
-
const lineStart = text4.lastIndexOf("\n", matchIdx) + 1;
|
|
27100
|
-
const linePrefix = text4.slice(lineStart, matchIdx);
|
|
27101
|
-
const isFutureIntent = /\b(?:will|plans?\s+to|planning\s+to|going\s+to|intends?\s+to)\b/i.test(
|
|
27102
|
-
linePrefix
|
|
27103
|
-
);
|
|
27104
|
-
if (d < now && isFutureIntent)
|
|
27105
|
-
return `(${prefix}${dateContent} \u2014 ${rel}, likely already happened)`;
|
|
27106
|
-
return `(${prefix}${dateContent} \u2014 ${rel})`;
|
|
27107
|
-
}
|
|
27108
|
-
);
|
|
27109
|
-
}
|
|
27110
|
-
function addRelativeTimeToObservations(text4, now) {
|
|
27111
|
-
const withInline = expandInlineEstimatedDates(text4, now);
|
|
27112
|
-
const dateHeaderRe = /^(Date:\s*)([A-Z][a-z]+ \d{1,2}, \d{4})$/gm;
|
|
27113
|
-
const found = [];
|
|
27114
|
-
let m;
|
|
27115
|
-
while ((m = dateHeaderRe.exec(withInline)) !== null) {
|
|
27116
|
-
const d = new Date(m[2]);
|
|
27117
|
-
if (!isNaN(d.getTime()))
|
|
27118
|
-
found.push({
|
|
27119
|
-
index: m.index,
|
|
27120
|
-
date: d,
|
|
27121
|
-
full: m[0],
|
|
27122
|
-
prefix: m[1],
|
|
27123
|
-
ds: m[2]
|
|
27124
|
-
});
|
|
27125
|
-
}
|
|
27126
|
-
if (!found.length) return withInline;
|
|
27127
|
-
let result = "";
|
|
27128
|
-
let last = 0;
|
|
27129
|
-
for (let i = 0; i < found.length; i++) {
|
|
27130
|
-
const curr = found[i];
|
|
27131
|
-
const prev = found[i - 1];
|
|
27132
|
-
result += withInline.slice(last, curr.index);
|
|
27133
|
-
if (prev) {
|
|
27134
|
-
const gapDays = Math.floor(
|
|
27135
|
-
(curr.date.getTime() - prev.date.getTime()) / 864e5
|
|
27136
|
-
);
|
|
27137
|
-
if (gapDays > 1) {
|
|
27138
|
-
const gap = gapDays < 7 ? `[${gapDays} days later]` : gapDays < 14 ? "[1 week later]" : gapDays < 30 ? `[${Math.floor(gapDays / 7)} weeks later]` : gapDays < 60 ? "[1 month later]" : `[${Math.floor(gapDays / 30)} months later]`;
|
|
27139
|
-
result += `
|
|
27140
|
-
${gap}
|
|
27141
|
-
|
|
27142
|
-
`;
|
|
27143
|
-
}
|
|
27144
|
-
}
|
|
27145
|
-
result += `${curr.prefix}${curr.ds} (${formatRelativeTime(curr.date, now)})`;
|
|
27146
|
-
last = curr.index + curr.full.length;
|
|
27147
|
-
}
|
|
27148
|
-
result += withInline.slice(last);
|
|
27149
|
-
return result;
|
|
27150
|
-
}
|
|
27151
27361
|
function buildPrefixMessages(formatted) {
|
|
27152
27362
|
return [
|
|
27153
27363
|
{
|
|
@@ -27204,12 +27414,7 @@ function buildPrefixMessages(formatted) {
|
|
|
27204
27414
|
}
|
|
27205
27415
|
function distilledPrefix(distillations) {
|
|
27206
27416
|
if (!distillations.length) return [];
|
|
27207
|
-
const
|
|
27208
|
-
const annotated = distillations.map((d) => ({
|
|
27209
|
-
...d,
|
|
27210
|
-
observations: addRelativeTimeToObservations(d.observations, now)
|
|
27211
|
-
}));
|
|
27212
|
-
const formatted = formatDistillations(annotated);
|
|
27417
|
+
const formatted = formatDistillations(distillations);
|
|
27213
27418
|
if (!formatted) return [];
|
|
27214
27419
|
return buildPrefixMessages(formatted);
|
|
27215
27420
|
}
|
|
@@ -27229,12 +27434,7 @@ function distilledPrefixCached(distillations, sessionID, sessState) {
|
|
|
27229
27434
|
};
|
|
27230
27435
|
}
|
|
27231
27436
|
const newRows = distillations.slice(prefixCache.rowCount);
|
|
27232
|
-
const
|
|
27233
|
-
const annotated2 = newRows.map((d) => ({
|
|
27234
|
-
...d,
|
|
27235
|
-
observations: addRelativeTimeToObservations(d.observations, now2)
|
|
27236
|
-
}));
|
|
27237
|
-
const deltaText = formatDistillations(annotated2);
|
|
27437
|
+
const deltaText = formatDistillations(newRows);
|
|
27238
27438
|
if (deltaText) {
|
|
27239
27439
|
const fullText2 = prefixCache.cachedText + "\n\n" + deltaText;
|
|
27240
27440
|
const messages2 = buildPrefixMessages(fullText2);
|
|
@@ -27250,12 +27450,7 @@ function distilledPrefixCached(distillations, sessionID, sessState) {
|
|
|
27250
27450
|
return { messages: messages2, tokens: tokens2 };
|
|
27251
27451
|
}
|
|
27252
27452
|
}
|
|
27253
|
-
const
|
|
27254
|
-
const annotated = distillations.map((d) => ({
|
|
27255
|
-
...d,
|
|
27256
|
-
observations: addRelativeTimeToObservations(d.observations, now)
|
|
27257
|
-
}));
|
|
27258
|
-
const fullText = formatDistillations(annotated);
|
|
27453
|
+
const fullText = formatDistillations(distillations);
|
|
27259
27454
|
if (!fullText) {
|
|
27260
27455
|
sessState.prefixCache = null;
|
|
27261
27456
|
return { messages: [], tokens: 0 };
|
|
@@ -27278,29 +27473,40 @@ function tryFitStable(input) {
|
|
|
27278
27473
|
const rawWindowCache = input.sessState.rawWindowCache;
|
|
27279
27474
|
const cacheValid = rawWindowCache !== null && rawWindowCache.sessionID === input.sessionID;
|
|
27280
27475
|
if (cacheValid) {
|
|
27281
|
-
const
|
|
27282
|
-
|
|
27476
|
+
const newMessages = Math.max(0, input.messages.length - rawWindowCache.pinnedTotalCount);
|
|
27477
|
+
const windowSize = rawWindowCache.pinnedRawCount + newMessages;
|
|
27478
|
+
const pinnedIdx = Math.max(0, input.messages.length - windowSize);
|
|
27479
|
+
const pinnedWindow = input.messages.slice(pinnedIdx);
|
|
27480
|
+
const pinnedTokens = pinnedWindow.reduce(
|
|
27481
|
+
(sum, m) => sum + estimateMessage(m),
|
|
27482
|
+
0
|
|
27283
27483
|
);
|
|
27284
|
-
|
|
27285
|
-
|
|
27286
|
-
|
|
27287
|
-
|
|
27288
|
-
|
|
27289
|
-
|
|
27290
|
-
|
|
27291
|
-
|
|
27292
|
-
|
|
27293
|
-
return parts !== msg.parts ? { info: msg.info, parts } : msg;
|
|
27294
|
-
});
|
|
27295
|
-
const total = input.prefixTokens + pinnedTokens;
|
|
27296
|
-
return {
|
|
27297
|
-
messages: [...input.prefix, ...processed],
|
|
27298
|
-
distilledTokens: input.prefixTokens,
|
|
27299
|
-
rawTokens: pinnedTokens,
|
|
27300
|
-
totalTokens: total
|
|
27484
|
+
const highWaterBudget = Math.max(rawWindowCache.pinnedBudget, input.rawBudget);
|
|
27485
|
+
const effectiveBudget = highWaterBudget * 1.15;
|
|
27486
|
+
if (pinnedTokens <= effectiveBudget) {
|
|
27487
|
+
if (pinnedTokens > rawWindowCache.pinnedBudget * 1.15) {
|
|
27488
|
+
input.sessState.rawWindowCache = {
|
|
27489
|
+
...rawWindowCache,
|
|
27490
|
+
pinnedRawCount: pinnedWindow.length,
|
|
27491
|
+
pinnedTotalCount: input.messages.length,
|
|
27492
|
+
pinnedBudget: input.rawBudget
|
|
27301
27493
|
};
|
|
27302
27494
|
}
|
|
27495
|
+
const processed = pinnedWindow.map((msg) => {
|
|
27496
|
+
const parts = cleanParts(msg.parts);
|
|
27497
|
+
return parts !== msg.parts ? { info: msg.info, parts } : msg;
|
|
27498
|
+
});
|
|
27499
|
+
const total = input.prefixTokens + pinnedTokens;
|
|
27500
|
+
return {
|
|
27501
|
+
messages: [...input.prefix, ...processed],
|
|
27502
|
+
distilledTokens: input.prefixTokens,
|
|
27503
|
+
rawTokens: pinnedTokens,
|
|
27504
|
+
totalTokens: total
|
|
27505
|
+
};
|
|
27303
27506
|
}
|
|
27507
|
+
info(
|
|
27508
|
+
`pin-overflow: session=${input.sessionID} pinnedTokens=${pinnedTokens} pinnedBudget=${rawWindowCache.pinnedBudget} effectiveBudget=${Math.round(effectiveBudget)} currentRawBudget=${input.rawBudget} windowSize=${pinnedWindow.length}`
|
|
27509
|
+
);
|
|
27304
27510
|
}
|
|
27305
27511
|
const result = tryFit({
|
|
27306
27512
|
messages: input.messages,
|
|
@@ -27311,11 +27517,13 @@ function tryFitStable(input) {
|
|
|
27311
27517
|
strip: "none"
|
|
27312
27518
|
});
|
|
27313
27519
|
if (result) {
|
|
27314
|
-
const
|
|
27315
|
-
if (
|
|
27520
|
+
const rawMessageCount = result.messages.length - input.prefix.length;
|
|
27521
|
+
if (rawMessageCount > 0) {
|
|
27316
27522
|
input.sessState.rawWindowCache = {
|
|
27317
27523
|
sessionID: input.sessionID,
|
|
27318
|
-
|
|
27524
|
+
pinnedRawCount: rawMessageCount,
|
|
27525
|
+
pinnedTotalCount: input.messages.length,
|
|
27526
|
+
pinnedBudget: input.rawBudget
|
|
27319
27527
|
};
|
|
27320
27528
|
}
|
|
27321
27529
|
}
|
|
@@ -27330,14 +27538,15 @@ function needsUrgentDistillation() {
|
|
|
27330
27538
|
function transformInner(input) {
|
|
27331
27539
|
const cfg = config2();
|
|
27332
27540
|
const overhead = getOverhead();
|
|
27541
|
+
const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
|
|
27542
|
+
const sessState = sid ? getSessionState(sid) : makeSessionState();
|
|
27543
|
+
const sessLtmTokens = sid ? sessState.ltmTokens : ltmTokensFallback;
|
|
27333
27544
|
const usable = Math.max(
|
|
27334
27545
|
0,
|
|
27335
|
-
contextLimit - outputReserved - overhead -
|
|
27546
|
+
contextLimit - outputReserved - overhead - sessLtmTokens
|
|
27336
27547
|
);
|
|
27337
27548
|
const distilledBudget = Math.floor(usable * cfg.budget.distilled);
|
|
27338
|
-
|
|
27339
|
-
const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
|
|
27340
|
-
const sessState = sid ? getSessionState(sid) : makeSessionState();
|
|
27549
|
+
let rawBudget = Math.floor(usable * cfg.budget.raw);
|
|
27341
27550
|
let effectiveMinLayer = sessState.forceMinLayer;
|
|
27342
27551
|
sessState.forceMinLayer = 0;
|
|
27343
27552
|
if (sid && effectiveMinLayer > 0) saveForceMinLayer(sid, 0);
|
|
@@ -27350,17 +27559,26 @@ function transformInner(input) {
|
|
|
27350
27559
|
return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
|
|
27351
27560
|
}
|
|
27352
27561
|
if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
|
|
27562
|
+
effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer);
|
|
27563
|
+
}
|
|
27564
|
+
const postIdleCompact = sessState.postIdleCompact;
|
|
27565
|
+
if (postIdleCompact) {
|
|
27566
|
+
sessState.postIdleCompact = false;
|
|
27353
27567
|
effectiveMinLayer = Math.max(effectiveMinLayer, 1);
|
|
27568
|
+
rawBudget = Math.floor(usable * 0.2);
|
|
27569
|
+
info(
|
|
27570
|
+
`post-idle compact: session=${sid} rawBudget=${rawBudget} (${Math.floor(usable * cfg.budget.raw)}\u2192${rawBudget})`
|
|
27571
|
+
);
|
|
27354
27572
|
}
|
|
27355
27573
|
let expectedInput;
|
|
27356
27574
|
if (calibrated) {
|
|
27357
27575
|
const newMessages = sessState.lastWindowMessageIDs.size > 0 ? input.messages.filter((m) => !sessState.lastWindowMessageIDs.has(m.info.id)) : input.messages.slice(-Math.max(0, input.messages.length - sessState.lastKnownMessageCount));
|
|
27358
27576
|
const newMsgTokens = newMessages.reduce((s, m) => s + estimateMessage(m), 0);
|
|
27359
|
-
const ltmDelta =
|
|
27577
|
+
const ltmDelta = sessLtmTokens - sessState.lastKnownLtm;
|
|
27360
27578
|
expectedInput = sessState.lastKnownInput + newMsgTokens + ltmDelta;
|
|
27361
27579
|
} else {
|
|
27362
27580
|
const messageTokens = input.messages.reduce((s, m) => s + estimateMessage(m), 0);
|
|
27363
|
-
expectedInput = messageTokens + overhead +
|
|
27581
|
+
expectedInput = messageTokens + overhead + sessLtmTokens;
|
|
27364
27582
|
}
|
|
27365
27583
|
const layer0Input = calibrated ? expectedInput : expectedInput * UNCALIBRATED_SAFETY;
|
|
27366
27584
|
let layer0Ceiling = maxLayer0Tokens > 0 ? Math.min(maxInput, maxLayer0Tokens) : maxInput;
|
|
@@ -27368,7 +27586,7 @@ function transformInner(input) {
|
|
|
27368
27586
|
layer0Ceiling = Math.floor(layer0Ceiling * 0.7);
|
|
27369
27587
|
}
|
|
27370
27588
|
if (effectiveMinLayer === 0 && layer0Input <= layer0Ceiling) {
|
|
27371
|
-
const messageTokens = calibrated ? expectedInput - (
|
|
27589
|
+
const messageTokens = calibrated ? expectedInput - (sessLtmTokens - sessState.lastKnownLtm) : expectedInput - overhead - sessLtmTokens;
|
|
27372
27590
|
return {
|
|
27373
27591
|
messages: input.messages,
|
|
27374
27592
|
layer: 0,
|
|
@@ -27382,7 +27600,7 @@ function transformInner(input) {
|
|
|
27382
27600
|
}
|
|
27383
27601
|
const turnStart = currentTurnStart(input.messages);
|
|
27384
27602
|
const dedupMessages = deduplicateToolOutputs(input.messages, turnStart);
|
|
27385
|
-
const distillations = sid ?
|
|
27603
|
+
const distillations = sid ? loadDistillationsCached(input.projectPath, sid, input.messages, sessState) : [];
|
|
27386
27604
|
const cached2 = sid ? distilledPrefixCached(distillations, sid, sessState) : (() => {
|
|
27387
27605
|
const msgs = distilledPrefix(distillations);
|
|
27388
27606
|
return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
|
|
@@ -27495,12 +27713,27 @@ function transform2(input) {
|
|
|
27495
27713
|
state.lastLayer = result.layer;
|
|
27496
27714
|
state.lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
|
|
27497
27715
|
state.lastTurnAt = Date.now();
|
|
27498
|
-
const
|
|
27499
|
-
|
|
27716
|
+
const prefixFingerprint = result.messages.slice(0, 5).map((m) => {
|
|
27717
|
+
const text4 = m.parts.map((p3) => {
|
|
27718
|
+
if (isTextPart(p3)) return p3.text?.slice(0, 40) ?? "";
|
|
27719
|
+
if (isReasoningPart(p3)) return p3.text?.slice(0, 40) ?? "";
|
|
27720
|
+
return p3.type;
|
|
27721
|
+
}).join("|");
|
|
27722
|
+
return `${m.info.role}:${text4.slice(0, 60)}`;
|
|
27723
|
+
}).join(",");
|
|
27724
|
+
const prefixHash = `${result.layer}:${prefixFingerprint}`;
|
|
27725
|
+
state.transformCount++;
|
|
27500
27726
|
if (state.lastPrefixHash && state.lastPrefixHash !== prefixHash) {
|
|
27727
|
+
state.bustCount++;
|
|
27728
|
+
const rate = state.bustCount / state.transformCount;
|
|
27501
27729
|
info(
|
|
27502
|
-
`cache-bust
|
|
27730
|
+
`cache-bust #${state.bustCount} (${(rate * 100).toFixed(0)}%): session=${sid} layer=${state.lastLayer}\u2192${result.layer} msgs=${state.lastTransformedCount}\u2192${result.messages.length} prefix=${state.lastPrefixHash.slice(0, 30)}\u2192${prefixHash.slice(0, 30)}`
|
|
27503
27731
|
);
|
|
27732
|
+
if (state.transformCount >= 20 && rate > 0.5) {
|
|
27733
|
+
warn(
|
|
27734
|
+
`HIGH BUST RATE: session ${sid} has ${(rate * 100).toFixed(0)}% bust rate (${state.bustCount}/${state.transformCount} transforms)`
|
|
27735
|
+
);
|
|
27736
|
+
}
|
|
27504
27737
|
}
|
|
27505
27738
|
state.lastPrefixHash = prefixHash;
|
|
27506
27739
|
if (result.layer >= 2) {
|
|
@@ -27597,25 +27830,50 @@ function isWorkerSession(sessionID) {
|
|
|
27597
27830
|
}
|
|
27598
27831
|
|
|
27599
27832
|
// src/distillation.ts
|
|
27833
|
+
function compressionRatio(distilledTokens, sourceTokens) {
|
|
27834
|
+
if (sourceTokens <= 0) return 0;
|
|
27835
|
+
return distilledTokens / Math.sqrt(sourceTokens);
|
|
27836
|
+
}
|
|
27600
27837
|
function detectSegments(messages, maxSegment) {
|
|
27601
27838
|
if (messages.length <= maxSegment) return [messages];
|
|
27602
|
-
|
|
27603
|
-
|
|
27604
|
-
|
|
27605
|
-
|
|
27606
|
-
|
|
27607
|
-
|
|
27608
|
-
|
|
27609
|
-
|
|
27610
|
-
|
|
27611
|
-
|
|
27612
|
-
|
|
27613
|
-
|
|
27614
|
-
|
|
27615
|
-
|
|
27839
|
+
return splitSegments(messages, maxSegment);
|
|
27840
|
+
}
|
|
27841
|
+
var MIN_SEGMENT = 3;
|
|
27842
|
+
var GAP_THRESHOLD_MULTIPLIER = 3;
|
|
27843
|
+
function splitSegments(messages, maxSegment) {
|
|
27844
|
+
if (messages.length <= maxSegment) return [messages];
|
|
27845
|
+
const splitIdx = findSplitIndex(messages, maxSegment);
|
|
27846
|
+
const left = messages.slice(0, splitIdx);
|
|
27847
|
+
const right = messages.slice(splitIdx);
|
|
27848
|
+
const result = splitSegments(left, maxSegment);
|
|
27849
|
+
if (right.length < MIN_SEGMENT) {
|
|
27850
|
+
result[result.length - 1].push(...right);
|
|
27851
|
+
} else {
|
|
27852
|
+
result.push(...splitSegments(right, maxSegment));
|
|
27853
|
+
}
|
|
27854
|
+
return result;
|
|
27855
|
+
}
|
|
27856
|
+
function findSplitIndex(messages, maxSegment) {
|
|
27857
|
+
const gaps = [];
|
|
27858
|
+
for (let i = 1; i < messages.length; i++) {
|
|
27859
|
+
gaps.push({
|
|
27860
|
+
index: i,
|
|
27861
|
+
gap: messages[i].created_at - messages[i - 1].created_at
|
|
27862
|
+
});
|
|
27863
|
+
}
|
|
27864
|
+
if (gaps.length === 0) return maxSegment;
|
|
27865
|
+
const sortedGaps = gaps.map((g) => g.gap).sort((a, b) => a - b);
|
|
27866
|
+
const medianGap = sortedGaps[Math.floor(sortedGaps.length / 2)];
|
|
27867
|
+
let bestGap = { index: -1, gap: 0 };
|
|
27868
|
+
for (const g of gaps) {
|
|
27869
|
+
if (g.gap > bestGap.gap && g.index >= MIN_SEGMENT && messages.length - g.index >= MIN_SEGMENT) {
|
|
27870
|
+
bestGap = g;
|
|
27616
27871
|
}
|
|
27617
27872
|
}
|
|
27618
|
-
|
|
27873
|
+
if (bestGap.index > 0 && bestGap.gap >= medianGap * GAP_THRESHOLD_MULTIPLIER) {
|
|
27874
|
+
return bestGap.index;
|
|
27875
|
+
}
|
|
27876
|
+
return maxSegment;
|
|
27619
27877
|
}
|
|
27620
27878
|
function formatTime(ms) {
|
|
27621
27879
|
const d = new Date(ms);
|
|
@@ -27694,7 +27952,7 @@ function parseSourceIds(raw) {
|
|
|
27694
27952
|
}
|
|
27695
27953
|
function loadForSession(projectPath, sessionID, includeArchived = false) {
|
|
27696
27954
|
const pid = ensureProject(projectPath);
|
|
27697
|
-
const sql = includeArchived ? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC" : "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
|
|
27955
|
+
const sql = includeArchived ? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC" : "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
|
|
27698
27956
|
const rows = db().query(sql).all(pid, sessionID);
|
|
27699
27957
|
return rows.map((r) => ({
|
|
27700
27958
|
...r,
|
|
@@ -27707,8 +27965,8 @@ function storeDistillation(input) {
|
|
|
27707
27965
|
const sourceJson = JSON.stringify(input.sourceIDs);
|
|
27708
27966
|
const tokens = Math.ceil(input.observations.length / 3);
|
|
27709
27967
|
db().query(
|
|
27710
|
-
`INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at)
|
|
27711
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
27968
|
+
`INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm)
|
|
27969
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
27712
27970
|
).run(
|
|
27713
27971
|
id,
|
|
27714
27972
|
pid,
|
|
@@ -27721,7 +27979,9 @@ function storeDistillation(input) {
|
|
|
27721
27979
|
sourceJson,
|
|
27722
27980
|
input.generation,
|
|
27723
27981
|
tokens,
|
|
27724
|
-
Date.now()
|
|
27982
|
+
Date.now(),
|
|
27983
|
+
input.rCompression ?? null,
|
|
27984
|
+
input.cNorm ?? null
|
|
27725
27985
|
);
|
|
27726
27986
|
return id;
|
|
27727
27987
|
}
|
|
@@ -27734,7 +27994,7 @@ function gen0Count(projectPath, sessionID) {
|
|
|
27734
27994
|
function loadGen0(projectPath, sessionID) {
|
|
27735
27995
|
const pid = ensureProject(projectPath);
|
|
27736
27996
|
const rows = db().query(
|
|
27737
|
-
"SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC"
|
|
27997
|
+
"SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC"
|
|
27738
27998
|
).all(pid, sessionID);
|
|
27739
27999
|
return rows.map((r) => ({
|
|
27740
28000
|
...r,
|
|
@@ -27801,7 +28061,8 @@ async function run(input) {
|
|
|
27801
28061
|
projectPath: input.projectPath,
|
|
27802
28062
|
sessionID: input.sessionID,
|
|
27803
28063
|
messages: segment,
|
|
27804
|
-
model: input.model
|
|
28064
|
+
model: input.model,
|
|
28065
|
+
urgent: input.urgent
|
|
27805
28066
|
});
|
|
27806
28067
|
if (result) {
|
|
27807
28068
|
distilled += segment.length;
|
|
@@ -27809,12 +28070,13 @@ async function run(input) {
|
|
|
27809
28070
|
}
|
|
27810
28071
|
}
|
|
27811
28072
|
}
|
|
27812
|
-
if (gen0Count(input.projectPath, input.sessionID) >= cfg.distillation.metaThreshold) {
|
|
28073
|
+
if (!input.skipMeta && gen0Count(input.projectPath, input.sessionID) >= cfg.distillation.metaThreshold) {
|
|
27813
28074
|
await metaDistill({
|
|
27814
28075
|
llm: input.llm,
|
|
27815
28076
|
projectPath: input.projectPath,
|
|
27816
28077
|
sessionID: input.sessionID,
|
|
27817
|
-
model: input.model
|
|
28078
|
+
model: input.model,
|
|
28079
|
+
urgent: input.urgent
|
|
27818
28080
|
});
|
|
27819
28081
|
rounds++;
|
|
27820
28082
|
}
|
|
@@ -27840,22 +28102,46 @@ async function distillSegment(input) {
|
|
|
27840
28102
|
const responseText = await input.llm.prompt(
|
|
27841
28103
|
DISTILLATION_SYSTEM,
|
|
27842
28104
|
userContent,
|
|
27843
|
-
{ model, workerID: "lore-distill" }
|
|
28105
|
+
{ model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID }
|
|
27844
28106
|
);
|
|
27845
28107
|
if (!responseText) return null;
|
|
27846
28108
|
const result = parseDistillationResult(responseText);
|
|
27847
28109
|
if (!result) return null;
|
|
28110
|
+
const distilledTokens = Math.ceil(result.observations.length / 3);
|
|
28111
|
+
const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
|
|
28112
|
+
const rComp = compressionRatio(distilledTokens, sourceTokens);
|
|
28113
|
+
const cNorm = temporalCnorm(input.messages.map((m) => m.created_at));
|
|
27848
28114
|
const distillId = storeDistillation({
|
|
27849
28115
|
projectPath: input.projectPath,
|
|
27850
28116
|
sessionID: input.sessionID,
|
|
27851
28117
|
observations: result.observations,
|
|
27852
28118
|
sourceIDs: input.messages.map((m) => m.id),
|
|
27853
|
-
generation: 0
|
|
28119
|
+
generation: 0,
|
|
28120
|
+
rCompression: rComp,
|
|
28121
|
+
cNorm
|
|
27854
28122
|
});
|
|
27855
28123
|
markDistilled(input.messages.map((m) => m.id));
|
|
28124
|
+
info(
|
|
28125
|
+
`distill segment: ${input.messages.length} msgs, ${sourceTokens}\u2192${distilledTokens} tokens, R=${rComp.toFixed(2)}, C_norm=${cNorm.toFixed(3)}`
|
|
28126
|
+
);
|
|
27856
28127
|
if (isAvailable()) {
|
|
27857
28128
|
embedDistillation(distillId, result.observations);
|
|
27858
28129
|
}
|
|
28130
|
+
if (config2().knowledge.enabled) {
|
|
28131
|
+
for (const pat of extractPatterns(result.observations)) {
|
|
28132
|
+
try {
|
|
28133
|
+
create({
|
|
28134
|
+
projectPath: input.projectPath,
|
|
28135
|
+
category: pat.category,
|
|
28136
|
+
title: pat.title,
|
|
28137
|
+
content: pat.content,
|
|
28138
|
+
session: input.sessionID,
|
|
28139
|
+
scope: "project"
|
|
28140
|
+
});
|
|
28141
|
+
} catch {
|
|
28142
|
+
}
|
|
28143
|
+
}
|
|
28144
|
+
}
|
|
27859
28145
|
return result;
|
|
27860
28146
|
}
|
|
27861
28147
|
async function metaDistill(input) {
|
|
@@ -27871,7 +28157,7 @@ async function metaDistill(input) {
|
|
|
27871
28157
|
const responseText = await input.llm.prompt(
|
|
27872
28158
|
RECURSIVE_SYSTEM,
|
|
27873
28159
|
userContent,
|
|
27874
|
-
{ model, workerID: "lore-distill" }
|
|
28160
|
+
{ model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID }
|
|
27875
28161
|
);
|
|
27876
28162
|
if (!responseText) return null;
|
|
27877
28163
|
const result = parseDistillationResult(responseText);
|
|
@@ -27900,8 +28186,54 @@ async function metaDistill(input) {
|
|
|
27900
28186
|
if (isAvailable()) {
|
|
27901
28187
|
embedDistillation(metaId, result.observations);
|
|
27902
28188
|
}
|
|
28189
|
+
if (config2().knowledge.enabled) {
|
|
28190
|
+
for (const pat of extractPatterns(result.observations)) {
|
|
28191
|
+
try {
|
|
28192
|
+
create({
|
|
28193
|
+
projectPath: input.projectPath,
|
|
28194
|
+
category: pat.category,
|
|
28195
|
+
title: pat.title,
|
|
28196
|
+
content: pat.content,
|
|
28197
|
+
session: input.sessionID,
|
|
28198
|
+
scope: "project"
|
|
28199
|
+
});
|
|
28200
|
+
} catch {
|
|
28201
|
+
}
|
|
28202
|
+
}
|
|
28203
|
+
}
|
|
27903
28204
|
return result;
|
|
27904
28205
|
}
|
|
28206
|
+
function backfillMetrics() {
|
|
28207
|
+
const rows = db().query(
|
|
28208
|
+
"SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL"
|
|
28209
|
+
).all();
|
|
28210
|
+
if (!rows.length) return 0;
|
|
28211
|
+
const update2 = db().prepare(
|
|
28212
|
+
"UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?"
|
|
28213
|
+
);
|
|
28214
|
+
let updated = 0;
|
|
28215
|
+
for (const row of rows) {
|
|
28216
|
+
const sourceIds = parseSourceIds(row.source_ids);
|
|
28217
|
+
if (!sourceIds.length) continue;
|
|
28218
|
+
const placeholders = sourceIds.map(() => "?").join(",");
|
|
28219
|
+
const sources = db().query(
|
|
28220
|
+
`SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`
|
|
28221
|
+
).all(...sourceIds);
|
|
28222
|
+
if (!sources.length) continue;
|
|
28223
|
+
const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
|
|
28224
|
+
const timestamps = sources.map((s) => s.created_at);
|
|
28225
|
+
const rComp = compressionRatio(row.token_count, sourceTokens);
|
|
28226
|
+
const cNorm = temporalCnorm(timestamps);
|
|
28227
|
+
update2.run(rComp, cNorm, row.id);
|
|
28228
|
+
updated++;
|
|
28229
|
+
}
|
|
28230
|
+
if (updated > 0) {
|
|
28231
|
+
info(
|
|
28232
|
+
`backfilled metrics for ${updated} distillations (${rows.length - updated} skipped \u2014 missing sources)`
|
|
28233
|
+
);
|
|
28234
|
+
}
|
|
28235
|
+
return updated;
|
|
28236
|
+
}
|
|
27905
28237
|
|
|
27906
28238
|
// src/curator.ts
|
|
27907
28239
|
var curator_exports = {};
|
|
@@ -27947,7 +28279,7 @@ async function run2(input) {
|
|
|
27947
28279
|
const responseText = await input.llm.prompt(
|
|
27948
28280
|
CURATOR_SYSTEM,
|
|
27949
28281
|
userContent,
|
|
27950
|
-
{ model, workerID: "lore-curator" }
|
|
28282
|
+
{ model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID }
|
|
27951
28283
|
);
|
|
27952
28284
|
if (!responseText) return { created: 0, updated: 0, deleted: 0 };
|
|
27953
28285
|
const ops = parseOps(responseText);
|
|
@@ -28017,7 +28349,7 @@ async function consolidate(input) {
|
|
|
28017
28349
|
const responseText = await input.llm.prompt(
|
|
28018
28350
|
CONSOLIDATION_SYSTEM,
|
|
28019
28351
|
userContent,
|
|
28020
|
-
{ model, workerID: "lore-curator" }
|
|
28352
|
+
{ model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID }
|
|
28021
28353
|
);
|
|
28022
28354
|
if (!responseText) return { updated: 0, deleted: 0 };
|
|
28023
28355
|
const ops = parseOps(responseText);
|
|
@@ -28043,12 +28375,39 @@ async function consolidate(input) {
|
|
|
28043
28375
|
}
|
|
28044
28376
|
|
|
28045
28377
|
// src/recall.ts
|
|
28378
|
+
function getTaggedText(tagged) {
|
|
28379
|
+
switch (tagged.source) {
|
|
28380
|
+
case "knowledge":
|
|
28381
|
+
case "cross-knowledge":
|
|
28382
|
+
return `${tagged.item.title} ${tagged.item.content}`;
|
|
28383
|
+
case "distillation":
|
|
28384
|
+
return tagged.item.observations;
|
|
28385
|
+
case "temporal":
|
|
28386
|
+
return tagged.item.content;
|
|
28387
|
+
case "lat-section":
|
|
28388
|
+
return `${tagged.item.heading} ${tagged.item.content}`;
|
|
28389
|
+
}
|
|
28390
|
+
}
|
|
28391
|
+
function taggedResultKey(r) {
|
|
28392
|
+
switch (r.source) {
|
|
28393
|
+
case "knowledge":
|
|
28394
|
+
return `k:${r.item.id}`;
|
|
28395
|
+
case "cross-knowledge":
|
|
28396
|
+
return `xk:${r.item.id}`;
|
|
28397
|
+
case "distillation":
|
|
28398
|
+
return `d:${r.item.id}`;
|
|
28399
|
+
case "temporal":
|
|
28400
|
+
return `t:${r.item.id}`;
|
|
28401
|
+
case "lat-section":
|
|
28402
|
+
return `lat:${r.item.id}`;
|
|
28403
|
+
}
|
|
28404
|
+
}
|
|
28046
28405
|
function searchDistillationsLike(input) {
|
|
28047
28406
|
const terms = input.query.toLowerCase().split(/\s+/).filter((term) => term.length > 1);
|
|
28048
28407
|
if (!terms.length) return [];
|
|
28049
28408
|
const conditions = terms.map(() => "LOWER(observations) LIKE ?").join(" AND ");
|
|
28050
28409
|
const likeParams = terms.map((term) => `%${term}%`);
|
|
28051
|
-
const sql = input.sessionID ? `SELECT id, observations, generation, created_at, session_id FROM distillations WHERE project_id = ? AND session_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT id, observations, generation, created_at, session_id FROM distillations WHERE project_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
|
|
28410
|
+
const sql = input.sessionID ? `SELECT id, observations, generation, created_at, session_id, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT id, observations, generation, created_at, session_id, c_norm FROM distillations WHERE project_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
|
|
28052
28411
|
const allParams = input.sessionID ? [input.pid, input.sessionID, ...likeParams, input.limit] : [input.pid, ...likeParams, input.limit];
|
|
28053
28412
|
return db().query(sql).all(...allParams);
|
|
28054
28413
|
}
|
|
@@ -28057,12 +28416,12 @@ function searchDistillationsScored(input) {
|
|
|
28057
28416
|
const limit = input.limit ?? 10;
|
|
28058
28417
|
const q = ftsQuery(input.query);
|
|
28059
28418
|
if (q === EMPTY_QUERY) return [];
|
|
28060
|
-
const ftsSQL = input.sessionID ? `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, rank
|
|
28419
|
+
const ftsSQL = input.sessionID ? `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, d.c_norm, rank
|
|
28061
28420
|
FROM distillation_fts f
|
|
28062
28421
|
CROSS JOIN distillations d ON d.rowid = f.rowid
|
|
28063
28422
|
WHERE distillation_fts MATCH ?
|
|
28064
28423
|
AND d.project_id = ? AND d.session_id = ?
|
|
28065
|
-
ORDER BY rank LIMIT ?` : `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, rank
|
|
28424
|
+
ORDER BY rank LIMIT ?` : `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, d.c_norm, rank
|
|
28066
28425
|
FROM distillation_fts f
|
|
28067
28426
|
CROSS JOIN distillations d ON d.rowid = f.rowid
|
|
28068
28427
|
WHERE distillation_fts MATCH ?
|
|
@@ -28147,7 +28506,7 @@ async function runRecall(input) {
|
|
|
28147
28506
|
let queries = [query];
|
|
28148
28507
|
if (searchConfig?.queryExpansion && llm) {
|
|
28149
28508
|
try {
|
|
28150
|
-
queries = await expandQuery(llm, query);
|
|
28509
|
+
queries = await expandQuery(llm, query, void 0, sessionID);
|
|
28151
28510
|
} catch (err) {
|
|
28152
28511
|
info("recall: query expansion failed, using original:", err);
|
|
28153
28512
|
}
|
|
@@ -28217,6 +28576,18 @@ async function runRecall(input) {
|
|
|
28217
28576
|
key: (r) => `t:${r.item.id}`
|
|
28218
28577
|
}
|
|
28219
28578
|
);
|
|
28579
|
+
if (temporalResults.length > 0) {
|
|
28580
|
+
const recencySorted = [...temporalResults].sort(
|
|
28581
|
+
(a, b) => b.created_at - a.created_at
|
|
28582
|
+
);
|
|
28583
|
+
allRrfLists.push({
|
|
28584
|
+
items: recencySorted.map((item) => ({
|
|
28585
|
+
source: "temporal",
|
|
28586
|
+
item
|
|
28587
|
+
})),
|
|
28588
|
+
key: (r) => `t:${r.item.id}`
|
|
28589
|
+
});
|
|
28590
|
+
}
|
|
28220
28591
|
}
|
|
28221
28592
|
if (isAvailable() && scope !== "session") {
|
|
28222
28593
|
try {
|
|
@@ -28244,7 +28615,7 @@ async function runRecall(input) {
|
|
|
28244
28615
|
const distVectorHits = vectorSearchDistillations(queryVec, limit);
|
|
28245
28616
|
const distVectorTagged = distVectorHits.map((hit) => {
|
|
28246
28617
|
const row = db().query(
|
|
28247
|
-
"SELECT id, observations, generation, created_at, session_id FROM distillations WHERE id = ?"
|
|
28618
|
+
"SELECT id, observations, generation, created_at, session_id, c_norm FROM distillations WHERE id = ?"
|
|
28248
28619
|
).get(hit.id);
|
|
28249
28620
|
if (!row) return null;
|
|
28250
28621
|
return {
|
|
@@ -28307,6 +28678,57 @@ async function runRecall(input) {
|
|
|
28307
28678
|
info("recall: cross-project knowledge search failed:", err);
|
|
28308
28679
|
}
|
|
28309
28680
|
}
|
|
28681
|
+
{
|
|
28682
|
+
const distillationCandidates = [];
|
|
28683
|
+
for (const list4 of allRrfLists) {
|
|
28684
|
+
for (const item of list4.items) {
|
|
28685
|
+
if (item.source !== "distillation") continue;
|
|
28686
|
+
const key = `d:${item.item.id}`;
|
|
28687
|
+
const d = item.item;
|
|
28688
|
+
const cNorm = d.c_norm ?? 0;
|
|
28689
|
+
const ageDays = Math.min(
|
|
28690
|
+
(Date.now() - d.created_at) / 864e5,
|
|
28691
|
+
90
|
|
28692
|
+
);
|
|
28693
|
+
const score = cNorm + ageDays / 90 * 0.1;
|
|
28694
|
+
distillationCandidates.push({ tagged: item, key, qualityScore: score });
|
|
28695
|
+
}
|
|
28696
|
+
}
|
|
28697
|
+
if (distillationCandidates.length > 1) {
|
|
28698
|
+
const seen = /* @__PURE__ */ new Set();
|
|
28699
|
+
const unique = distillationCandidates.filter((c) => {
|
|
28700
|
+
if (seen.has(c.key)) return false;
|
|
28701
|
+
seen.add(c.key);
|
|
28702
|
+
return true;
|
|
28703
|
+
});
|
|
28704
|
+
unique.sort((a, b) => a.qualityScore - b.qualityScore);
|
|
28705
|
+
allRrfLists.push({
|
|
28706
|
+
items: unique.map((c) => c.tagged),
|
|
28707
|
+
key: (r) => `d:${r.item.id}`
|
|
28708
|
+
});
|
|
28709
|
+
}
|
|
28710
|
+
}
|
|
28711
|
+
if (filterTerms(query).length > 0 && allRrfLists.length > 0) {
|
|
28712
|
+
const allCandidates = /* @__PURE__ */ new Map();
|
|
28713
|
+
for (const list4 of allRrfLists) {
|
|
28714
|
+
for (const item of list4.items) {
|
|
28715
|
+
const key = list4.key(item);
|
|
28716
|
+
if (!allCandidates.has(key)) allCandidates.set(key, item);
|
|
28717
|
+
}
|
|
28718
|
+
}
|
|
28719
|
+
const candidateEntries = [...allCandidates.entries()];
|
|
28720
|
+
const exactRanked = exactTermMatchRank(
|
|
28721
|
+
candidateEntries,
|
|
28722
|
+
([, tagged]) => getTaggedText(tagged),
|
|
28723
|
+
query
|
|
28724
|
+
);
|
|
28725
|
+
if (exactRanked.length) {
|
|
28726
|
+
allRrfLists.push({
|
|
28727
|
+
items: exactRanked.map(([, item]) => item),
|
|
28728
|
+
key: taggedResultKey
|
|
28729
|
+
});
|
|
28730
|
+
}
|
|
28731
|
+
}
|
|
28310
28732
|
const fused = reciprocalRankFusion(allRrfLists);
|
|
28311
28733
|
return formatFusedResults(fused, 20);
|
|
28312
28734
|
}
|
|
@@ -28318,7 +28740,7 @@ var RECALL_PARAM_DESCRIPTIONS = {
|
|
|
28318
28740
|
|
|
28319
28741
|
// src/agents-file.ts
|
|
28320
28742
|
import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync2 } from "fs";
|
|
28321
|
-
import { dirname as dirname2 } from "path";
|
|
28743
|
+
import { dirname as dirname2, join as join5 } from "path";
|
|
28322
28744
|
var LORE_SECTION_START = "<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/loreai) -->";
|
|
28323
28745
|
var LORE_SECTION_END = "<!-- End lore-managed section -->";
|
|
28324
28746
|
var ALL_START_MARKERS = [
|
|
@@ -28327,6 +28749,8 @@ var ALL_START_MARKERS = [
|
|
|
28327
28749
|
"<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/opencode-lore) -->",
|
|
28328
28750
|
"<!-- This section is auto-maintained by lore (https://github.com/BYK/opencode-lore) -->"
|
|
28329
28751
|
];
|
|
28752
|
+
var LORE_FILE = ".lore.md";
|
|
28753
|
+
var LORE_FILE_HEADER = "<!-- Managed by lore (https://github.com/BYK/loreai) \u2014 manual edits are imported on next session. -->";
|
|
28330
28754
|
var UUID_RE2 = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/;
|
|
28331
28755
|
var MARKER_RE = /^<!--\s*lore:([0-9a-f-]+)\s*-->$/;
|
|
28332
28756
|
function splitFile(fileContent) {
|
|
@@ -28436,8 +28860,9 @@ function buildSection(projectPath) {
|
|
|
28436
28860
|
return out.join("\n");
|
|
28437
28861
|
}
|
|
28438
28862
|
function exportToFile(input) {
|
|
28439
|
-
|
|
28440
|
-
const
|
|
28863
|
+
exportLoreFile(input.projectPath);
|
|
28864
|
+
const pointerBody = "\n## Long-term Knowledge\n\nFor long-term knowledge entries managed by [lore](https://github.com/BYK/loreai) (gotchas, patterns, decisions, architecture), see [`.lore.md`](.lore.md) in the project root.\n";
|
|
28865
|
+
const newSection = LORE_SECTION_START + pointerBody + LORE_SECTION_END + "\n";
|
|
28441
28866
|
let fileContent = "";
|
|
28442
28867
|
if (existsSync3(input.filePath)) {
|
|
28443
28868
|
fileContent = readFileSync3(input.filePath, "utf8");
|
|
@@ -28461,15 +28886,9 @@ function shouldImport(input) {
|
|
|
28461
28886
|
const expected = buildSection(input.projectPath);
|
|
28462
28887
|
return hashSection(section) !== hashSection(expected);
|
|
28463
28888
|
}
|
|
28464
|
-
function
|
|
28465
|
-
if (!existsSync3(input.filePath)) return;
|
|
28466
|
-
const fileContent = readFileSync3(input.filePath, "utf8");
|
|
28467
|
-
const { section, before } = splitFile(fileContent);
|
|
28468
|
-
const textToParse = section ?? fileContent;
|
|
28469
|
-
const fileEntries = parseEntriesFromSection(textToParse);
|
|
28470
|
-
if (!fileEntries.length) return;
|
|
28889
|
+
function _importEntries(entries, projectPath) {
|
|
28471
28890
|
const seenIds = /* @__PURE__ */ new Set();
|
|
28472
|
-
for (const entry of
|
|
28891
|
+
for (const entry of entries) {
|
|
28473
28892
|
if (entry.id !== null) {
|
|
28474
28893
|
if (seenIds.has(entry.id)) continue;
|
|
28475
28894
|
seenIds.add(entry.id);
|
|
@@ -28480,7 +28899,7 @@ function importFromFile(input) {
|
|
|
28480
28899
|
}
|
|
28481
28900
|
} else {
|
|
28482
28901
|
create({
|
|
28483
|
-
projectPath
|
|
28902
|
+
projectPath,
|
|
28484
28903
|
category: entry.category,
|
|
28485
28904
|
title: entry.title,
|
|
28486
28905
|
content: entry.content,
|
|
@@ -28490,13 +28909,13 @@ function importFromFile(input) {
|
|
|
28490
28909
|
});
|
|
28491
28910
|
}
|
|
28492
28911
|
} else {
|
|
28493
|
-
const existing = forProject(
|
|
28912
|
+
const existing = forProject(projectPath, true);
|
|
28494
28913
|
const titleMatch = existing.find(
|
|
28495
28914
|
(e) => e.title.toLowerCase() === entry.title.toLowerCase()
|
|
28496
28915
|
);
|
|
28497
28916
|
if (!titleMatch) {
|
|
28498
28917
|
create({
|
|
28499
|
-
projectPath
|
|
28918
|
+
projectPath,
|
|
28500
28919
|
category: entry.category,
|
|
28501
28920
|
title: entry.title,
|
|
28502
28921
|
content: entry.content,
|
|
@@ -28507,16 +28926,50 @@ function importFromFile(input) {
|
|
|
28507
28926
|
}
|
|
28508
28927
|
}
|
|
28509
28928
|
}
|
|
28929
|
+
function importFromFile(input) {
|
|
28930
|
+
if (!existsSync3(input.filePath)) return;
|
|
28931
|
+
const fileContent = readFileSync3(input.filePath, "utf8");
|
|
28932
|
+
const { section } = splitFile(fileContent);
|
|
28933
|
+
const textToParse = section ?? fileContent;
|
|
28934
|
+
const fileEntries = parseEntriesFromSection(textToParse);
|
|
28935
|
+
if (!fileEntries.length) return;
|
|
28936
|
+
_importEntries(fileEntries, input.projectPath);
|
|
28937
|
+
}
|
|
28938
|
+
function loreFileExists(projectPath) {
|
|
28939
|
+
return existsSync3(join5(projectPath, LORE_FILE));
|
|
28940
|
+
}
|
|
28941
|
+
function exportLoreFile(projectPath) {
|
|
28942
|
+
const sectionBody = buildSection(projectPath);
|
|
28943
|
+
const content3 = LORE_FILE_HEADER + "\n" + sectionBody;
|
|
28944
|
+
writeFileSync(join5(projectPath, LORE_FILE), content3, "utf8");
|
|
28945
|
+
}
|
|
28946
|
+
function shouldImportLoreFile(projectPath) {
|
|
28947
|
+
const fp = join5(projectPath, LORE_FILE);
|
|
28948
|
+
if (!existsSync3(fp)) return false;
|
|
28949
|
+
const fileContent = readFileSync3(fp, "utf8");
|
|
28950
|
+
const expected = LORE_FILE_HEADER + "\n" + buildSection(projectPath);
|
|
28951
|
+
return hashSection(fileContent) !== hashSection(expected);
|
|
28952
|
+
}
|
|
28953
|
+
function importLoreFile(projectPath) {
|
|
28954
|
+
const fp = join5(projectPath, LORE_FILE);
|
|
28955
|
+
if (!existsSync3(fp)) return;
|
|
28956
|
+
const fileContent = readFileSync3(fp, "utf8");
|
|
28957
|
+
const fileEntries = parseEntriesFromSection(fileContent);
|
|
28958
|
+
if (!fileEntries.length) return;
|
|
28959
|
+
_importEntries(fileEntries, projectPath);
|
|
28960
|
+
}
|
|
28510
28961
|
|
|
28511
28962
|
// src/worker-model.ts
|
|
28512
28963
|
var worker_model_exports = {};
|
|
28513
28964
|
__export(worker_model_exports, {
|
|
28514
28965
|
WORKER_JUDGE_SYSTEM: () => WORKER_JUDGE_SYSTEM,
|
|
28966
|
+
clearValidatedWorkerModel: () => clearValidatedWorkerModel,
|
|
28515
28967
|
computeModelFingerprint: () => computeModelFingerprint,
|
|
28516
28968
|
getValidatedWorkerModel: () => getValidatedWorkerModel,
|
|
28517
28969
|
isValidationStale: () => isValidationStale,
|
|
28518
28970
|
parseJudgeScore: () => parseJudgeScore,
|
|
28519
28971
|
resolveWorkerModel: () => resolveWorkerModel,
|
|
28972
|
+
runValidation: () => runValidation,
|
|
28520
28973
|
selectWorkerCandidates: () => selectWorkerCandidates,
|
|
28521
28974
|
storeValidatedWorkerModel: () => storeValidatedWorkerModel,
|
|
28522
28975
|
structuralCheck: () => structuralCheck,
|
|
@@ -28528,7 +28981,13 @@ function selectWorkerCandidates(sessionModel, providerModels) {
|
|
|
28528
28981
|
(m) => m.providerID === sessionModel.providerID && m.status === "active" && m.capabilities.input.text
|
|
28529
28982
|
);
|
|
28530
28983
|
if (eligible.length === 0) return [];
|
|
28531
|
-
const sorted = [...eligible].sort((a, b) =>
|
|
28984
|
+
const sorted = [...eligible].sort((a, b) => {
|
|
28985
|
+
const costDiff = a.cost.input - b.cost.input;
|
|
28986
|
+
if (costDiff !== 0) return costDiff;
|
|
28987
|
+
const aReasoning = a.capabilities.reasoning ? 1 : 0;
|
|
28988
|
+
const bReasoning = b.capabilities.reasoning ? 1 : 0;
|
|
28989
|
+
return aReasoning - bReasoning;
|
|
28990
|
+
});
|
|
28532
28991
|
const cheapest = sorted[0];
|
|
28533
28992
|
const belowSession = sorted.filter((m) => m.cost.input < sessionModel.cost.input).pop();
|
|
28534
28993
|
const candidates = /* @__PURE__ */ new Map();
|
|
@@ -28563,6 +29022,9 @@ function storeValidatedWorkerModel(result) {
|
|
|
28563
29022
|
"INSERT INTO kv_meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = ?"
|
|
28564
29023
|
).run(key, value, value);
|
|
28565
29024
|
}
|
|
29025
|
+
function clearValidatedWorkerModel(providerID) {
|
|
29026
|
+
db().query("DELETE FROM kv_meta WHERE key = ?").run(`${KV_PREFIX}${providerID}`);
|
|
29027
|
+
}
|
|
28566
29028
|
function isValidationStale(stored, currentFingerprint) {
|
|
28567
29029
|
if (!stored) return true;
|
|
28568
29030
|
return stored.fingerprint !== currentFingerprint;
|
|
@@ -28621,10 +29083,85 @@ function parseJudgeScore(response) {
|
|
|
28621
29083
|
if (!match) return null;
|
|
28622
29084
|
return parseInt(match[1], 10);
|
|
28623
29085
|
}
|
|
29086
|
+
async function runValidation(input) {
|
|
29087
|
+
const { llm, candidates, referenceObservations, sourceMessagesText, date: date5 } = input;
|
|
29088
|
+
const userPrompt = distillationUser({
|
|
29089
|
+
messages: sourceMessagesText,
|
|
29090
|
+
date: date5
|
|
29091
|
+
});
|
|
29092
|
+
for (const candidate of candidates) {
|
|
29093
|
+
if (candidate.id === input.sessionModelID) continue;
|
|
29094
|
+
let candidateObservations = null;
|
|
29095
|
+
try {
|
|
29096
|
+
const raw = await llm.prompt(DISTILLATION_SYSTEM, userPrompt, {
|
|
29097
|
+
model: { providerID: candidate.providerID, modelID: candidate.id },
|
|
29098
|
+
workerID: "lore-distill",
|
|
29099
|
+
thinking: false
|
|
29100
|
+
});
|
|
29101
|
+
if (raw) {
|
|
29102
|
+
const match = raw.match(/<observations>([\s\S]*?)<\/observations>/);
|
|
29103
|
+
candidateObservations = match ? match[1].trim() : raw.trim();
|
|
29104
|
+
}
|
|
29105
|
+
} catch (e) {
|
|
29106
|
+
warn(`worker model validation: candidate ${candidate.id} failed:`, e);
|
|
29107
|
+
continue;
|
|
29108
|
+
}
|
|
29109
|
+
const structural = structuralCheck(candidateObservations, referenceObservations);
|
|
29110
|
+
if (!structural.passed) {
|
|
29111
|
+
info(
|
|
29112
|
+
`worker model validation: ${candidate.id} failed structural check: ${structural.reason}`
|
|
29113
|
+
);
|
|
29114
|
+
continue;
|
|
29115
|
+
}
|
|
29116
|
+
let judgeScore = null;
|
|
29117
|
+
try {
|
|
29118
|
+
const judgeResponse = await llm.prompt(
|
|
29119
|
+
WORKER_JUDGE_SYSTEM,
|
|
29120
|
+
workerJudgeUser(referenceObservations, candidateObservations),
|
|
29121
|
+
{ workerID: "lore-distill", thinking: false }
|
|
29122
|
+
// use session model (no model override)
|
|
29123
|
+
);
|
|
29124
|
+
if (judgeResponse) {
|
|
29125
|
+
judgeScore = parseJudgeScore(judgeResponse);
|
|
29126
|
+
}
|
|
29127
|
+
} catch (e) {
|
|
29128
|
+
warn(`worker model validation: judge call failed for ${candidate.id}:`, e);
|
|
29129
|
+
}
|
|
29130
|
+
if (judgeScore !== null && judgeScore < 3) {
|
|
29131
|
+
info(
|
|
29132
|
+
`worker model validation: ${candidate.id} failed judge (score=${judgeScore})`
|
|
29133
|
+
);
|
|
29134
|
+
continue;
|
|
29135
|
+
}
|
|
29136
|
+
const fingerprint = computeModelFingerprint(
|
|
29137
|
+
input.providerID,
|
|
29138
|
+
input.sessionModelID,
|
|
29139
|
+
candidates.map((c) => c.id)
|
|
29140
|
+
);
|
|
29141
|
+
const result = {
|
|
29142
|
+
modelID: candidate.id,
|
|
29143
|
+
providerID: candidate.providerID,
|
|
29144
|
+
fingerprint,
|
|
29145
|
+
validatedAt: Date.now(),
|
|
29146
|
+
judgeScore
|
|
29147
|
+
};
|
|
29148
|
+
storeValidatedWorkerModel(result);
|
|
29149
|
+
info(
|
|
29150
|
+
`worker model validated: ${candidate.id} (judge=${judgeScore}) for provider ${input.providerID}`
|
|
29151
|
+
);
|
|
29152
|
+
return result;
|
|
29153
|
+
}
|
|
29154
|
+
clearValidatedWorkerModel(input.providerID);
|
|
29155
|
+
info(
|
|
29156
|
+
`worker model validation: no candidate passed for ${input.providerID} \u2014 cleared stale entry`
|
|
29157
|
+
);
|
|
29158
|
+
return null;
|
|
29159
|
+
}
|
|
28624
29160
|
function resolveWorkerModel(providerID, configWorkerModel, configModel) {
|
|
28625
29161
|
if (configWorkerModel) return configWorkerModel;
|
|
28626
29162
|
const validated = getValidatedWorkerModel(providerID);
|
|
28627
|
-
|
|
29163
|
+
const MAX_AGE_MS = 24 * 60 * 60 * 1e3;
|
|
29164
|
+
if (validated && Date.now() - validated.validatedAt <= MAX_AGE_MS) {
|
|
28628
29165
|
return { providerID: validated.providerID, modelID: validated.modelID };
|
|
28629
29166
|
}
|
|
28630
29167
|
return configModel;
|
|
@@ -28635,11 +29172,11 @@ export {
|
|
|
28635
29172
|
CURATOR_SYSTEM,
|
|
28636
29173
|
DISTILLATION_SYSTEM,
|
|
28637
29174
|
EMPTY_QUERY,
|
|
29175
|
+
LORE_FILE,
|
|
28638
29176
|
QUERY_EXPANSION_SYSTEM,
|
|
28639
29177
|
RECALL_PARAM_DESCRIPTIONS,
|
|
28640
29178
|
RECALL_TOOL_DESCRIPTION,
|
|
28641
29179
|
RECURSIVE_SYSTEM,
|
|
28642
|
-
WORKER_JUDGE_SYSTEM,
|
|
28643
29180
|
buildCompactPrompt,
|
|
28644
29181
|
calibrate,
|
|
28645
29182
|
close,
|
|
@@ -28654,7 +29191,9 @@ export {
|
|
|
28654
29191
|
distillationUser,
|
|
28655
29192
|
embedding_exports as embedding,
|
|
28656
29193
|
ensureProject,
|
|
29194
|
+
exactTermMatchRank,
|
|
28657
29195
|
expandQuery,
|
|
29196
|
+
exportLoreFile,
|
|
28658
29197
|
exportToFile,
|
|
28659
29198
|
extractTopTerms,
|
|
28660
29199
|
formatDistillations,
|
|
@@ -28663,10 +29202,12 @@ export {
|
|
|
28663
29202
|
ftsQueryOr,
|
|
28664
29203
|
getLastTransformEstimate,
|
|
28665
29204
|
getLastTransformedCount,
|
|
29205
|
+
getLastTurnAt,
|
|
28666
29206
|
getLtmBudget,
|
|
28667
29207
|
getLtmTokens,
|
|
28668
29208
|
h,
|
|
28669
29209
|
importFromFile,
|
|
29210
|
+
importLoreFile,
|
|
28670
29211
|
inline,
|
|
28671
29212
|
inspectSessionState,
|
|
28672
29213
|
isFirstRun,
|
|
@@ -28680,11 +29221,13 @@ export {
|
|
|
28680
29221
|
load,
|
|
28681
29222
|
loadForceMinLayer,
|
|
28682
29223
|
log_exports as log,
|
|
29224
|
+
loreFileExists,
|
|
28683
29225
|
ltm_exports as ltm,
|
|
28684
29226
|
needsUrgentDistillation,
|
|
28685
29227
|
normalize,
|
|
28686
29228
|
onIdleResume,
|
|
28687
29229
|
p,
|
|
29230
|
+
pattern_extract_exports as patternExtract,
|
|
28688
29231
|
projectId,
|
|
28689
29232
|
projectName,
|
|
28690
29233
|
reciprocalRankFusion,
|
|
@@ -28700,6 +29243,7 @@ export {
|
|
|
28700
29243
|
setMaxLayer0Tokens,
|
|
28701
29244
|
setModelLimits,
|
|
28702
29245
|
shouldImport,
|
|
29246
|
+
shouldImportLoreFile,
|
|
28703
29247
|
strong2 as strong,
|
|
28704
29248
|
t,
|
|
28705
29249
|
temporal_exports as temporal,
|
|
@@ -28707,7 +29251,6 @@ export {
|
|
|
28707
29251
|
transform2 as transform,
|
|
28708
29252
|
ul,
|
|
28709
29253
|
unescapeMarkdown,
|
|
28710
|
-
workerJudgeUser,
|
|
28711
29254
|
worker_model_exports as workerModel,
|
|
28712
29255
|
workerSessionIDs
|
|
28713
29256
|
};
|