@loreai/core 0.11.1 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/agents-file.d.ts +29 -8
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +1 -0
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +55 -0
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding.d.ts +15 -1
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +53 -5
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/index.d.ts +4 -4
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +799 -256
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/pattern-extract.d.ts +36 -0
- package/dist/bun/pattern-extract.d.ts.map +1 -0
- package/dist/bun/recall.d.ts +1 -0
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +13 -1
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/temporal.d.ts +15 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +41 -1
- package/dist/bun/types.d.ts.map +1 -1
- package/dist/bun/worker-model.d.ts +22 -0
- package/dist/bun/worker-model.d.ts.map +1 -1
- package/dist/node/agents-file.d.ts +29 -8
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +1 -0
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +55 -0
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding.d.ts +15 -1
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +53 -5
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/index.d.ts +4 -4
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +799 -256
- package/dist/node/index.js.map +4 -4
- package/dist/node/pattern-extract.d.ts +36 -0
- package/dist/node/pattern-extract.d.ts.map +1 -0
- package/dist/node/recall.d.ts +1 -0
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +13 -1
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/temporal.d.ts +15 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +41 -1
- package/dist/node/types.d.ts.map +1 -1
- package/dist/node/worker-model.d.ts +22 -0
- package/dist/node/worker-model.d.ts.map +1 -1
- package/dist/types/agents-file.d.ts +29 -8
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +1 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +55 -0
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +15 -1
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +53 -5
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/index.d.ts +4 -4
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/pattern-extract.d.ts +36 -0
- package/dist/types/pattern-extract.d.ts.map +1 -0
- package/dist/types/recall.d.ts +1 -0
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +13 -1
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/temporal.d.ts +15 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +41 -1
- package/dist/types/types.d.ts.map +1 -1
- package/dist/types/worker-model.d.ts +22 -0
- package/dist/types/worker-model.d.ts.map +1 -1
- package/package.json +3 -2
- package/src/agents-file.ts +111 -28
- package/src/config.ts +25 -18
- package/src/curator.ts +2 -2
- package/src/db.ts +83 -4
- package/src/distillation.ts +270 -27
- package/src/embedding.ts +158 -14
- package/src/gradient.ts +398 -227
- package/src/index.ts +13 -5
- package/src/pattern-extract.ts +108 -0
- package/src/recall.ts +142 -6
- package/src/search.ts +37 -1
- package/src/temporal.ts +39 -0
- package/src/types.ts +41 -1
- package/src/worker-model.ts +142 -5
package/dist/node/index.js
CHANGED
|
@@ -131,6 +131,7 @@ __export(temporal_exports, {
|
|
|
131
131
|
search: () => search2,
|
|
132
132
|
searchScored: () => searchScored,
|
|
133
133
|
store: () => store,
|
|
134
|
+
temporalCnorm: () => temporalCnorm,
|
|
134
135
|
undistilled: () => undistilled,
|
|
135
136
|
undistilledCount: () => undistilledCount
|
|
136
137
|
});
|
|
@@ -162,6 +163,7 @@ function sha256(input) {
|
|
|
162
163
|
// src/db.ts
|
|
163
164
|
import { join, dirname } from "path";
|
|
164
165
|
import { mkdirSync } from "fs";
|
|
166
|
+
import { homedir } from "os";
|
|
165
167
|
var MIGRATIONS = [
|
|
166
168
|
`
|
|
167
169
|
-- Version 1: Initial schema
|
|
@@ -490,11 +492,27 @@ var MIGRATIONS = [
|
|
|
490
492
|
)
|
|
491
493
|
WHERE content LIKE '%' || char(10) || '[tool:%'
|
|
492
494
|
OR content LIKE '%' || char(10) || '[reasoning] %';
|
|
495
|
+
`,
|
|
496
|
+
`
|
|
497
|
+
-- Version 12: Context health diagnostic columns on distillations.
|
|
498
|
+
--
|
|
499
|
+
-- r_compression: k/\u221AN where k = distilled token count, N = source token
|
|
500
|
+
-- count. Values < 1.0 signal likely lossy compression. NULL for rows
|
|
501
|
+
-- created before this migration or for meta-distillations (gen > 0)
|
|
502
|
+
-- where the metric is not computed.
|
|
503
|
+
--
|
|
504
|
+
-- c_norm: normalized variance of relative-existence weights over source
|
|
505
|
+
-- message timestamps. Range [0, 1]; 0 = uniform distribution, 1 = attention
|
|
506
|
+
-- dominated by distant past. NULL for pre-migration rows or meta-distillations.
|
|
507
|
+
--
|
|
508
|
+
-- Both columns are nullable REALs \u2014 cheap to add, no backfill needed.
|
|
509
|
+
ALTER TABLE distillations ADD COLUMN r_compression REAL;
|
|
510
|
+
ALTER TABLE distillations ADD COLUMN c_norm REAL;
|
|
493
511
|
`
|
|
494
512
|
];
|
|
495
513
|
function dataDir() {
|
|
496
514
|
const xdg = process.env.XDG_DATA_HOME;
|
|
497
|
-
const base = xdg || join(
|
|
515
|
+
const base = xdg || join(homedir(), ".local", "share");
|
|
498
516
|
return join(base, "opencode-lore");
|
|
499
517
|
}
|
|
500
518
|
var instance;
|
|
@@ -525,16 +543,47 @@ function migrate(database) {
|
|
|
525
543
|
"SELECT name FROM sqlite_master WHERE type='table' AND name='schema_version'"
|
|
526
544
|
).get();
|
|
527
545
|
const current2 = row ? database.query("SELECT version FROM schema_version").get()?.version ?? 0 : 0;
|
|
528
|
-
if (current2 >= MIGRATIONS.length)
|
|
546
|
+
if (current2 >= MIGRATIONS.length) {
|
|
547
|
+
recoverMissingObjects(database);
|
|
548
|
+
return;
|
|
549
|
+
}
|
|
529
550
|
for (let i = current2; i < MIGRATIONS.length; i++) {
|
|
530
551
|
if (i === VACUUM_MIGRATION_INDEX) {
|
|
531
552
|
database.exec("PRAGMA auto_vacuum = INCREMENTAL");
|
|
532
553
|
database.exec("VACUUM");
|
|
533
554
|
} else {
|
|
534
|
-
|
|
555
|
+
try {
|
|
556
|
+
database.exec(MIGRATIONS[i]);
|
|
557
|
+
} catch (e) {
|
|
558
|
+
if (e instanceof Error && /duplicate column name/i.test(e.message)) {
|
|
559
|
+
const stripped = stripAppliedAlters(MIGRATIONS[i], database);
|
|
560
|
+
if (stripped.trim()) database.exec(stripped);
|
|
561
|
+
} else {
|
|
562
|
+
throw e;
|
|
563
|
+
}
|
|
564
|
+
}
|
|
535
565
|
}
|
|
536
566
|
}
|
|
537
567
|
database.exec(`UPDATE schema_version SET version = ${MIGRATIONS.length}`);
|
|
568
|
+
recoverMissingObjects(database);
|
|
569
|
+
}
|
|
570
|
+
function stripAppliedAlters(migration, database) {
|
|
571
|
+
return migration.replace(
|
|
572
|
+
/ALTER\s+TABLE\s+(\w+)\s+ADD\s+COLUMN\s+(\w+)\b[^;]*;/gi,
|
|
573
|
+
(match, table, column) => {
|
|
574
|
+
const cols = database.query(`PRAGMA table_info(${table})`).all();
|
|
575
|
+
if (cols.some((c) => c.name === column)) return "";
|
|
576
|
+
return match;
|
|
577
|
+
}
|
|
578
|
+
);
|
|
579
|
+
}
|
|
580
|
+
function recoverMissingObjects(database) {
|
|
581
|
+
database.exec(`
|
|
582
|
+
CREATE TABLE IF NOT EXISTS kv_meta (
|
|
583
|
+
key TEXT PRIMARY KEY,
|
|
584
|
+
value TEXT NOT NULL
|
|
585
|
+
);
|
|
586
|
+
`);
|
|
538
587
|
}
|
|
539
588
|
function close() {
|
|
540
589
|
if (instance) {
|
|
@@ -11259,14 +11308,24 @@ function reciprocalRankFusion(lists, k = 60) {
|
|
|
11259
11308
|
}
|
|
11260
11309
|
return [...scores.values()].sort((a, b) => b.score - a.score);
|
|
11261
11310
|
}
|
|
11262
|
-
|
|
11311
|
+
function exactTermMatchRank(items, getText, query) {
|
|
11312
|
+
const terms = filterTerms(query).map((t2) => t2.toLowerCase());
|
|
11313
|
+
if (!terms.length) return [];
|
|
11314
|
+
const scored = items.map((item) => {
|
|
11315
|
+
const text4 = getText(item).toLowerCase();
|
|
11316
|
+
const matches = terms.filter((t2) => text4.includes(t2)).length;
|
|
11317
|
+
return { item, matches };
|
|
11318
|
+
}).filter((s) => s.matches > 0).sort((a, b) => b.matches - a.matches);
|
|
11319
|
+
return scored.map((s) => s.item);
|
|
11320
|
+
}
|
|
11321
|
+
async function expandQuery(llm, query, model, sessionID) {
|
|
11263
11322
|
const TIMEOUT_MS = 3e3;
|
|
11264
11323
|
try {
|
|
11265
11324
|
const responseText = await Promise.race([
|
|
11266
11325
|
llm.prompt(
|
|
11267
11326
|
QUERY_EXPANSION_SYSTEM,
|
|
11268
11327
|
`Input: "${query}"`,
|
|
11269
|
-
{ model, workerID: "lore-query-expand" }
|
|
11328
|
+
{ model, workerID: "lore-query-expand", thinking: false, urgent: true, sessionID }
|
|
11270
11329
|
),
|
|
11271
11330
|
new Promise((resolve) => setTimeout(() => resolve(null), TIMEOUT_MS))
|
|
11272
11331
|
]);
|
|
@@ -11440,6 +11499,18 @@ function searchScored(input) {
|
|
|
11440
11499
|
return [];
|
|
11441
11500
|
}
|
|
11442
11501
|
}
|
|
11502
|
+
function temporalCnorm(timestamps, now = Date.now()) {
|
|
11503
|
+
const n = timestamps.length;
|
|
11504
|
+
if (n < 2) return 0;
|
|
11505
|
+
const durations = timestamps.map((t2) => now - t2);
|
|
11506
|
+
const totalDuration = durations.reduce((a, b) => a + b, 0);
|
|
11507
|
+
if (totalDuration <= 0) return 0;
|
|
11508
|
+
const weights = durations.map((d) => d / totalDuration);
|
|
11509
|
+
const uniform = 1 / n;
|
|
11510
|
+
const variance = weights.reduce((sum, w) => sum + (w - uniform) ** 2, 0) / n;
|
|
11511
|
+
const maxVariance = (n - 1) / (n * n);
|
|
11512
|
+
return maxVariance === 0 ? 0 : variance / maxVariance;
|
|
11513
|
+
}
|
|
11443
11514
|
function count(projectPath, sessionID) {
|
|
11444
11515
|
const pid = ensureProject(projectPath);
|
|
11445
11516
|
const query = sessionID ? "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ? AND session_id = ?" : "SELECT COUNT(*) as count FROM temporal_messages WHERE project_id = ?";
|
|
@@ -25664,11 +25735,15 @@ var LoreConfig = external_exports.object({
|
|
|
25664
25735
|
* Anthropic's April 23 postmortem identified dropping reasoning blocks as
|
|
25665
25736
|
* the root cause of forgetfulness/repetition.
|
|
25666
25737
|
*
|
|
25667
|
-
* `idleResumeMinutes` is the threshold in minutes. Default
|
|
25668
|
-
* Anthropic's
|
|
25738
|
+
* `idleResumeMinutes` is the threshold in minutes. Default 5 — matches
|
|
25739
|
+
* Anthropic's default-tier prompt cache TTL. After 5 min of inactivity the
|
|
25740
|
+
* upstream cache is cold, so preserving byte-identity wastes cache-write cost
|
|
25741
|
+
* for no benefit. Refreshing the caches on resume produces a better-fitting
|
|
25742
|
+
* window at the same cold-write price. Users on Anthropic's extended-cache
|
|
25743
|
+
* tier (1 h TTL) should set this to 60 in `.lore.json`.
|
|
25669
25744
|
* Set to 0 to disable the feature.
|
|
25670
25745
|
*/
|
|
25671
|
-
idleResumeMinutes: external_exports.number().min(0).max(24 * 60).default(
|
|
25746
|
+
idleResumeMinutes: external_exports.number().min(0).max(24 * 60).default(5),
|
|
25672
25747
|
distillation: external_exports.object({
|
|
25673
25748
|
minMessages: external_exports.number().min(3).default(5),
|
|
25674
25749
|
maxSegment: external_exports.number().min(5).default(30),
|
|
@@ -25719,34 +25794,37 @@ var LoreConfig = external_exports.object({
|
|
|
25719
25794
|
* before search, improving recall for ambiguous queries. */
|
|
25720
25795
|
queryExpansion: external_exports.boolean().default(false),
|
|
25721
25796
|
/** Vector embedding search.
|
|
25722
|
-
* Supports multiple providers:
|
|
25723
|
-
* "
|
|
25724
|
-
*
|
|
25725
|
-
*
|
|
25797
|
+
* Supports multiple providers:
|
|
25798
|
+
* - "local" (default): fastembed + ONNX Runtime, no API key needed.
|
|
25799
|
+
* Uses bge-small-en-v1.5 (384 dims). Model downloaded on first use (~33MB),
|
|
25800
|
+
* cached in ~/.cache/fastembed. ~150ms per query embed.
|
|
25801
|
+
* - "voyage": Voyage AI (VOYAGE_API_KEY, voyage-code-3, 1024 dims)
|
|
25802
|
+
* - "openai": OpenAI (OPENAI_API_KEY, text-embedding-3-small, 1536 dims)
|
|
25803
|
+
* Set enabled: false to explicitly disable even with a provider available. */
|
|
25726
25804
|
embeddings: external_exports.object({
|
|
25727
25805
|
/** Enable/disable vector embedding search. Default: true.
|
|
25728
|
-
* Set to false to explicitly disable
|
|
25806
|
+
* Set to false to explicitly disable. */
|
|
25729
25807
|
enabled: external_exports.boolean().default(true),
|
|
25730
|
-
/** Embedding provider. Default: "
|
|
25731
|
-
*
|
|
25808
|
+
/** Embedding provider. Default: "local".
|
|
25809
|
+
* - "local": fastembed + ONNX Runtime, no API key (default model: bge-small-en-v1.5, 384 dims)
|
|
25732
25810
|
* - "voyage": VOYAGE_API_KEY (default model: voyage-code-3, 1024 dims)
|
|
25733
25811
|
* - "openai": OPENAI_API_KEY (default model: text-embedding-3-small, 1536 dims) */
|
|
25734
|
-
provider: external_exports.enum(["voyage", "openai"]).default("
|
|
25812
|
+
provider: external_exports.enum(["local", "voyage", "openai"]).default("local"),
|
|
25735
25813
|
/** Model ID for the embedding provider. Default depends on provider. */
|
|
25736
|
-
model: external_exports.string().default("
|
|
25737
|
-
/** Embedding dimensions. Default: 1024. */
|
|
25738
|
-
dimensions: external_exports.number().min(
|
|
25814
|
+
model: external_exports.string().default("BGESmallENV15"),
|
|
25815
|
+
/** Embedding dimensions. Default: 384 (local) / 1024 (voyage) / 1536 (openai). */
|
|
25816
|
+
dimensions: external_exports.number().min(64).max(2048).default(384)
|
|
25739
25817
|
}).default({
|
|
25740
25818
|
enabled: true,
|
|
25741
|
-
provider: "
|
|
25742
|
-
model: "
|
|
25743
|
-
dimensions:
|
|
25819
|
+
provider: "local",
|
|
25820
|
+
model: "BGESmallENV15",
|
|
25821
|
+
dimensions: 384
|
|
25744
25822
|
})
|
|
25745
25823
|
}).default({
|
|
25746
25824
|
ftsWeights: { title: 6, content: 2, category: 3 },
|
|
25747
25825
|
recallLimit: 10,
|
|
25748
25826
|
queryExpansion: false,
|
|
25749
|
-
embeddings: { enabled: true, provider: "
|
|
25827
|
+
embeddings: { enabled: true, provider: "local", model: "BGESmallENV15", dimensions: 384 }
|
|
25750
25828
|
}),
|
|
25751
25829
|
crossProject: external_exports.boolean().default(false),
|
|
25752
25830
|
agentsFile: external_exports.object({
|
|
@@ -25784,6 +25862,7 @@ __export(embedding_exports, {
|
|
|
25784
25862
|
fromBlob: () => fromBlob,
|
|
25785
25863
|
isAvailable: () => isAvailable,
|
|
25786
25864
|
resetProvider: () => resetProvider,
|
|
25865
|
+
runStartupBackfill: () => runStartupBackfill,
|
|
25787
25866
|
toBlob: () => toBlob,
|
|
25788
25867
|
vectorSearch: () => vectorSearch,
|
|
25789
25868
|
vectorSearchDistillations: () => vectorSearchDistillations
|
|
@@ -25861,9 +25940,43 @@ var OpenAIProvider = class {
|
|
|
25861
25940
|
return sorted.map((d) => new Float32Array(d.embedding));
|
|
25862
25941
|
}
|
|
25863
25942
|
};
|
|
25864
|
-
var
|
|
25865
|
-
|
|
25866
|
-
|
|
25943
|
+
var LocalProvider = class {
|
|
25944
|
+
maxBatchSize = 256;
|
|
25945
|
+
model = null;
|
|
25946
|
+
initPromise = null;
|
|
25947
|
+
modelName;
|
|
25948
|
+
constructor(modelName) {
|
|
25949
|
+
this.modelName = modelName;
|
|
25950
|
+
}
|
|
25951
|
+
async getModel() {
|
|
25952
|
+
if (this.model) return this.model;
|
|
25953
|
+
if (!this.initPromise) {
|
|
25954
|
+
this.initPromise = (async () => {
|
|
25955
|
+
const { EmbeddingModel, FlagEmbedding } = await import("fastembed");
|
|
25956
|
+
const enumValue = EmbeddingModel[this.modelName];
|
|
25957
|
+
const m = await FlagEmbedding.init({
|
|
25958
|
+
model: enumValue ?? this.modelName
|
|
25959
|
+
});
|
|
25960
|
+
this.model = m;
|
|
25961
|
+
return m;
|
|
25962
|
+
})();
|
|
25963
|
+
}
|
|
25964
|
+
return this.initPromise;
|
|
25965
|
+
}
|
|
25966
|
+
async embed(texts, inputType) {
|
|
25967
|
+
const model = await this.getModel();
|
|
25968
|
+
if (inputType === "query" && texts.length === 1) {
|
|
25969
|
+
const vec = await model.queryEmbed(texts[0]);
|
|
25970
|
+
return [new Float32Array(vec)];
|
|
25971
|
+
}
|
|
25972
|
+
const results = [];
|
|
25973
|
+
for await (const batch of model.passageEmbed(texts)) {
|
|
25974
|
+
for (const vec of batch) {
|
|
25975
|
+
results.push(new Float32Array(vec));
|
|
25976
|
+
}
|
|
25977
|
+
}
|
|
25978
|
+
return results;
|
|
25979
|
+
}
|
|
25867
25980
|
};
|
|
25868
25981
|
var PROVIDER_ENV_KEYS = {
|
|
25869
25982
|
voyage: "VOYAGE_API_KEY",
|
|
@@ -25882,21 +25995,35 @@ function getProvider() {
|
|
|
25882
25995
|
return null;
|
|
25883
25996
|
}
|
|
25884
25997
|
const providerName = cfg.provider;
|
|
25885
|
-
const
|
|
25886
|
-
if (!apiKey) {
|
|
25887
|
-
cachedProvider = null;
|
|
25888
|
-
return null;
|
|
25889
|
-
}
|
|
25890
|
-
const defaults = PROVIDER_DEFAULTS[providerName];
|
|
25891
|
-
const model = cfg.model === defaults?.model ? cfg.model : cfg.model;
|
|
25892
|
-
const dimensions = cfg.dimensions;
|
|
25998
|
+
const model = cfg.model;
|
|
25893
25999
|
switch (providerName) {
|
|
25894
|
-
case "
|
|
25895
|
-
|
|
26000
|
+
case "local": {
|
|
26001
|
+
try {
|
|
26002
|
+
cachedProvider = new LocalProvider(model);
|
|
26003
|
+
} catch {
|
|
26004
|
+
info("local embedding provider unavailable (fastembed not installed)");
|
|
26005
|
+
cachedProvider = null;
|
|
26006
|
+
}
|
|
26007
|
+
break;
|
|
26008
|
+
}
|
|
26009
|
+
case "voyage": {
|
|
26010
|
+
const apiKey = getProviderApiKey(providerName);
|
|
26011
|
+
if (!apiKey) {
|
|
26012
|
+
cachedProvider = null;
|
|
26013
|
+
return null;
|
|
26014
|
+
}
|
|
26015
|
+
cachedProvider = new VoyageProvider(apiKey, model, cfg.dimensions);
|
|
25896
26016
|
break;
|
|
25897
|
-
|
|
25898
|
-
|
|
26017
|
+
}
|
|
26018
|
+
case "openai": {
|
|
26019
|
+
const apiKey = getProviderApiKey(providerName);
|
|
26020
|
+
if (!apiKey) {
|
|
26021
|
+
cachedProvider = null;
|
|
26022
|
+
return null;
|
|
26023
|
+
}
|
|
26024
|
+
cachedProvider = new OpenAIProvider(apiKey, model, cfg.dimensions);
|
|
25899
26025
|
break;
|
|
26026
|
+
}
|
|
25900
26027
|
default:
|
|
25901
26028
|
info(`unknown embedding provider: ${providerName}`);
|
|
25902
26029
|
cachedProvider = null;
|
|
@@ -26001,6 +26128,29 @@ function checkConfigChange() {
|
|
|
26001
26128
|
).run(EMBEDDING_CONFIG_KEY, current2, current2);
|
|
26002
26129
|
return true;
|
|
26003
26130
|
}
|
|
26131
|
+
async function runStartupBackfill() {
|
|
26132
|
+
if (!isAvailable()) return;
|
|
26133
|
+
const knowledgeEmbedded = await backfillEmbeddings();
|
|
26134
|
+
const distillationEmbedded = await backfillDistillationEmbeddings();
|
|
26135
|
+
const kTotal = db().query("SELECT COUNT(*) as n FROM knowledge WHERE confidence > 0.2").get().n;
|
|
26136
|
+
const kWithEmb = db().query(
|
|
26137
|
+
"SELECT COUNT(*) as n FROM knowledge WHERE embedding IS NOT NULL AND confidence > 0.2"
|
|
26138
|
+
).get().n;
|
|
26139
|
+
const dTotal = db().query(
|
|
26140
|
+
"SELECT COUNT(*) as n FROM distillations WHERE archived = 0 AND observations != ''"
|
|
26141
|
+
).get().n;
|
|
26142
|
+
const dWithEmb = db().query(
|
|
26143
|
+
"SELECT COUNT(*) as n FROM distillations WHERE embedding IS NOT NULL AND archived = 0"
|
|
26144
|
+
).get().n;
|
|
26145
|
+
const parts = [];
|
|
26146
|
+
if (knowledgeEmbedded > 0 || distillationEmbedded > 0) {
|
|
26147
|
+
parts.push(`backfilled ${knowledgeEmbedded} knowledge + ${distillationEmbedded} distillations`);
|
|
26148
|
+
}
|
|
26149
|
+
parts.push(
|
|
26150
|
+
`coverage: knowledge ${kWithEmb}/${kTotal}, distillations ${dWithEmb}/${dTotal}`
|
|
26151
|
+
);
|
|
26152
|
+
info(`embedding startup: ${parts.join("; ")}`);
|
|
26153
|
+
}
|
|
26004
26154
|
async function backfillEmbeddings() {
|
|
26005
26155
|
checkConfigChange();
|
|
26006
26156
|
const provider = getProvider();
|
|
@@ -26757,6 +26907,9 @@ function check2(projectPath) {
|
|
|
26757
26907
|
// src/distillation.ts
|
|
26758
26908
|
var distillation_exports = {};
|
|
26759
26909
|
__export(distillation_exports, {
|
|
26910
|
+
backfillMetrics: () => backfillMetrics,
|
|
26911
|
+
compressionRatio: () => compressionRatio,
|
|
26912
|
+
detectSegments: () => detectSegments,
|
|
26760
26913
|
latestMetaObservations: () => latestMetaObservations,
|
|
26761
26914
|
loadForSession: () => loadForSession,
|
|
26762
26915
|
messagesToText: () => messagesToText,
|
|
@@ -26767,6 +26920,72 @@ __export(distillation_exports, {
|
|
|
26767
26920
|
workerSessionIDs: () => workerSessionIDs
|
|
26768
26921
|
});
|
|
26769
26922
|
|
|
26923
|
+
// src/pattern-extract.ts
|
|
26924
|
+
var pattern_extract_exports = {};
|
|
26925
|
+
__export(pattern_extract_exports, {
|
|
26926
|
+
extractPatterns: () => extractPatterns
|
|
26927
|
+
});
|
|
26928
|
+
var PATTERNS = [
|
|
26929
|
+
// Decision patterns
|
|
26930
|
+
{
|
|
26931
|
+
regex: /decided to (?:use |switch to |go with |adopt )(.+?)(?:\.|,|$)/gi,
|
|
26932
|
+
category: "decision",
|
|
26933
|
+
titleFn: (m) => `Decided to use ${m[1].trim()}`
|
|
26934
|
+
},
|
|
26935
|
+
{
|
|
26936
|
+
regex: /chose (.+?) over (.+?)(?:\.|,|$)/gi,
|
|
26937
|
+
category: "decision",
|
|
26938
|
+
titleFn: (m) => `Chose ${m[1].trim()} over ${m[2].trim()}`
|
|
26939
|
+
},
|
|
26940
|
+
{
|
|
26941
|
+
regex: /switched from (.+?) to (.+?)(?:\.|,|$)/gi,
|
|
26942
|
+
category: "decision",
|
|
26943
|
+
titleFn: (m) => `Switched from ${m[1].trim()} to ${m[2].trim()}`
|
|
26944
|
+
},
|
|
26945
|
+
{
|
|
26946
|
+
regex: /going with (.+?) (?:because|for|due to)(.+?)(?:\.|,|$)/gi,
|
|
26947
|
+
category: "decision",
|
|
26948
|
+
titleFn: (m) => `Going with ${m[1].trim()}`
|
|
26949
|
+
},
|
|
26950
|
+
{
|
|
26951
|
+
regex: /migrat(?:ed|ing) (?:from .+? )?to (.+?)(?:\.|,|$)/gi,
|
|
26952
|
+
category: "decision",
|
|
26953
|
+
titleFn: (m) => `Migrated to ${m[1].trim()}`
|
|
26954
|
+
},
|
|
26955
|
+
{
|
|
26956
|
+
regex: /adopted (.+?) (?:for|as|instead)(.+?)(?:\.|,|$)/gi,
|
|
26957
|
+
category: "decision",
|
|
26958
|
+
titleFn: (m) => `Adopted ${m[1].trim()}`
|
|
26959
|
+
},
|
|
26960
|
+
// Preference patterns
|
|
26961
|
+
{
|
|
26962
|
+
regex: /prefers? (.+?) (?:over|to|instead of|rather than) (.+?)(?:\.|,|$)/gi,
|
|
26963
|
+
category: "preference",
|
|
26964
|
+
titleFn: (m) => `Prefers ${m[1].trim()} over ${m[2].trim()}`
|
|
26965
|
+
},
|
|
26966
|
+
{
|
|
26967
|
+
regex: /(?:user |team |we )(?:always |usually |typically )(?:use|prefer|go with) (.+?)(?:\.|,|$)/gi,
|
|
26968
|
+
category: "preference",
|
|
26969
|
+
titleFn: (m) => `Typically uses ${m[1].trim()}`
|
|
26970
|
+
}
|
|
26971
|
+
];
|
|
26972
|
+
function extractPatterns(observations) {
|
|
26973
|
+
const results = [];
|
|
26974
|
+
const seen = /* @__PURE__ */ new Set();
|
|
26975
|
+
for (const { regex, category, titleFn } of PATTERNS) {
|
|
26976
|
+
regex.lastIndex = 0;
|
|
26977
|
+
let match;
|
|
26978
|
+
while ((match = regex.exec(observations)) !== null) {
|
|
26979
|
+
const title = titleFn(match);
|
|
26980
|
+
const key = title.toLowerCase();
|
|
26981
|
+
if (seen.has(key)) continue;
|
|
26982
|
+
seen.add(key);
|
|
26983
|
+
results.push({ category, title, content: match[0].trim() });
|
|
26984
|
+
}
|
|
26985
|
+
}
|
|
26986
|
+
return results;
|
|
26987
|
+
}
|
|
26988
|
+
|
|
26770
26989
|
// src/gradient.ts
|
|
26771
26990
|
function estimate2(text4) {
|
|
26772
26991
|
return Math.ceil(text4.length / 3);
|
|
@@ -26802,12 +27021,17 @@ function makeSessionState() {
|
|
|
26802
27021
|
lastWindowMessageIDs: /* @__PURE__ */ new Set(),
|
|
26803
27022
|
forceMinLayer: 0,
|
|
26804
27023
|
lastTransformEstimate: 0,
|
|
27024
|
+
ltmTokens: 0,
|
|
26805
27025
|
prefixCache: null,
|
|
26806
27026
|
rawWindowCache: null,
|
|
26807
27027
|
lastTurnAt: 0,
|
|
26808
27028
|
cameOutOfIdle: false,
|
|
27029
|
+
postIdleCompact: false,
|
|
26809
27030
|
consecutiveHighLayer: 0,
|
|
26810
|
-
lastPrefixHash: ""
|
|
27031
|
+
lastPrefixHash: "",
|
|
27032
|
+
bustCount: 0,
|
|
27033
|
+
transformCount: 0,
|
|
27034
|
+
distillationSnapshot: null
|
|
26811
27035
|
};
|
|
26812
27036
|
}
|
|
26813
27037
|
var sessionStates = /* @__PURE__ */ new Map();
|
|
@@ -26828,16 +27052,21 @@ function onIdleResume(sessionID, thresholdMs, now = Date.now()) {
|
|
|
26828
27052
|
if (idleMs < thresholdMs) return { triggered: false };
|
|
26829
27053
|
state.prefixCache = null;
|
|
26830
27054
|
state.rawWindowCache = null;
|
|
27055
|
+
state.distillationSnapshot = null;
|
|
26831
27056
|
state.cameOutOfIdle = true;
|
|
27057
|
+
state.postIdleCompact = true;
|
|
26832
27058
|
return { triggered: true, idleMs };
|
|
26833
27059
|
}
|
|
27060
|
+
function getLastTurnAt(sessionID) {
|
|
27061
|
+
return sessionStates.get(sessionID)?.lastTurnAt ?? 0;
|
|
27062
|
+
}
|
|
26834
27063
|
function consumeCameOutOfIdle(sessionID) {
|
|
26835
27064
|
const state = sessionStates.get(sessionID);
|
|
26836
27065
|
if (!state || !state.cameOutOfIdle) return false;
|
|
26837
27066
|
state.cameOutOfIdle = false;
|
|
26838
27067
|
return true;
|
|
26839
27068
|
}
|
|
26840
|
-
var
|
|
27069
|
+
var ltmTokensFallback = 0;
|
|
26841
27070
|
function setModelLimits(limits) {
|
|
26842
27071
|
contextLimit = limits.context || 2e5;
|
|
26843
27072
|
outputReserved = Math.min(limits.output || 32e3, 32e3);
|
|
@@ -26850,11 +27079,18 @@ function computeLayer0Cap(targetCostPerTurn, cacheReadCostPerToken) {
|
|
|
26850
27079
|
const rawCap = Math.floor(targetCostPerTurn / cacheReadCostPerToken);
|
|
26851
27080
|
return Math.max(rawCap, MIN_LAYER0_FLOOR);
|
|
26852
27081
|
}
|
|
26853
|
-
function setLtmTokens(tokens) {
|
|
26854
|
-
|
|
27082
|
+
function setLtmTokens(tokens, sessionID) {
|
|
27083
|
+
if (sessionID) {
|
|
27084
|
+
getSessionState(sessionID).ltmTokens = tokens;
|
|
27085
|
+
}
|
|
27086
|
+
ltmTokensFallback = tokens;
|
|
26855
27087
|
}
|
|
26856
|
-
function getLtmTokens() {
|
|
26857
|
-
|
|
27088
|
+
function getLtmTokens(sessionID) {
|
|
27089
|
+
if (sessionID) {
|
|
27090
|
+
const state = sessionStates.get(sessionID);
|
|
27091
|
+
if (state) return state.ltmTokens;
|
|
27092
|
+
}
|
|
27093
|
+
return ltmTokensFallback;
|
|
26858
27094
|
}
|
|
26859
27095
|
function getLtmBudget(ltmFraction) {
|
|
26860
27096
|
const overhead = calibratedOverhead ?? FIRST_TURN_OVERHEAD;
|
|
@@ -26870,7 +27106,7 @@ function calibrate(actualInput, sessionID, messageCount) {
|
|
|
26870
27106
|
if (sessionID !== void 0) {
|
|
26871
27107
|
const state = getSessionState(sessionID);
|
|
26872
27108
|
state.lastKnownInput = actualInput;
|
|
26873
|
-
state.lastKnownLtm = ltmTokens;
|
|
27109
|
+
state.lastKnownLtm = state.ltmTokens;
|
|
26874
27110
|
if (messageCount !== void 0) state.lastKnownMessageCount = messageCount;
|
|
26875
27111
|
}
|
|
26876
27112
|
}
|
|
@@ -26901,7 +27137,9 @@ function inspectSessionState(sessionID) {
|
|
|
26901
27137
|
hasPrefixCache: state.prefixCache !== null,
|
|
26902
27138
|
hasRawWindowCache: state.rawWindowCache !== null,
|
|
26903
27139
|
cameOutOfIdle: state.cameOutOfIdle,
|
|
26904
|
-
|
|
27140
|
+
postIdleCompact: state.postIdleCompact,
|
|
27141
|
+
lastTurnAt: state.lastTurnAt,
|
|
27142
|
+
distillationSnapshot: state.distillationSnapshot
|
|
26905
27143
|
};
|
|
26906
27144
|
}
|
|
26907
27145
|
function setLastTurnAtForTest(sessionID, ms) {
|
|
@@ -26913,6 +27151,25 @@ function loadDistillations(projectPath, sessionID) {
|
|
|
26913
27151
|
const params = sessionID ? [pid, sessionID] : [pid];
|
|
26914
27152
|
return db().query(query).all(...params);
|
|
26915
27153
|
}
|
|
27154
|
+
function loadDistillationsCached(projectPath, sessionID, messages, sessState) {
|
|
27155
|
+
let lastUserMsgId = null;
|
|
27156
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
27157
|
+
if (messages[i].info.role === "user") {
|
|
27158
|
+
lastUserMsgId = messages[i].info.id;
|
|
27159
|
+
break;
|
|
27160
|
+
}
|
|
27161
|
+
}
|
|
27162
|
+
const snapshot = sessState.distillationSnapshot;
|
|
27163
|
+
if (snapshot && snapshot.lastUserMsgId === lastUserMsgId) {
|
|
27164
|
+
return snapshot.rows;
|
|
27165
|
+
}
|
|
27166
|
+
const rows = loadDistillations(projectPath, sessionID);
|
|
27167
|
+
sessState.distillationSnapshot = { rows, lastUserMsgId };
|
|
27168
|
+
info(
|
|
27169
|
+
`distillation refresh: ${rows.length} rows (user msg ${lastUserMsgId?.substring(0, 16) ?? "none"})`
|
|
27170
|
+
);
|
|
27171
|
+
return rows;
|
|
27172
|
+
}
|
|
26916
27173
|
function stripSystemReminders(text4) {
|
|
26917
27174
|
return text4.replace(/<system-reminder>[\s\S]*?<\/system-reminder>\n?/g, (match) => {
|
|
26918
27175
|
const inner = match.match(
|
|
@@ -26965,24 +27222,51 @@ function simpleHash(str) {
|
|
|
26965
27222
|
}
|
|
26966
27223
|
return hash2;
|
|
26967
27224
|
}
|
|
26968
|
-
function
|
|
27225
|
+
function extractReadRange(input) {
|
|
26969
27226
|
try {
|
|
26970
27227
|
const parsed = JSON.parse(input);
|
|
26971
|
-
|
|
27228
|
+
const path = parsed.path || parsed.filePath || parsed.file;
|
|
27229
|
+
if (!path) return void 0;
|
|
27230
|
+
const offset = typeof parsed.offset === "number" ? parsed.offset : void 0;
|
|
27231
|
+
const limit = typeof parsed.limit === "number" ? parsed.limit : void 0;
|
|
27232
|
+
return { path, offset, limit };
|
|
26972
27233
|
} catch {
|
|
26973
27234
|
const match = input.match(/(?:[\w.-]+\/)+[\w.-]+\.\w{1,5}/);
|
|
26974
|
-
return
|
|
27235
|
+
if (!match) return void 0;
|
|
27236
|
+
return { path: match[0], offset: void 0, limit: void 0 };
|
|
26975
27237
|
}
|
|
26976
27238
|
}
|
|
26977
|
-
function
|
|
27239
|
+
function laterReadCovers(later, earlier) {
|
|
27240
|
+
if (later.path !== earlier.path) return false;
|
|
27241
|
+
if (later.offset === void 0 && later.limit === void 0) return true;
|
|
27242
|
+
if (earlier.offset === void 0 && earlier.limit === void 0) return false;
|
|
27243
|
+
const laterStart = later.offset ?? 1;
|
|
27244
|
+
const earlierStart = earlier.offset ?? 1;
|
|
27245
|
+
if (later.limit === void 0) return laterStart <= earlierStart;
|
|
27246
|
+
if (earlier.limit === void 0) return false;
|
|
27247
|
+
const laterEnd = laterStart + later.limit;
|
|
27248
|
+
const earlierEnd = earlierStart + earlier.limit;
|
|
27249
|
+
return laterStart <= earlierStart && laterEnd >= earlierEnd;
|
|
27250
|
+
}
|
|
27251
|
+
function rangeLabel(range) {
|
|
27252
|
+
if (range.offset !== void 0 && range.limit !== void 0) {
|
|
27253
|
+
return ` lines ${range.offset}-${range.offset + range.limit - 1}`;
|
|
27254
|
+
}
|
|
27255
|
+
if (range.offset !== void 0) {
|
|
27256
|
+
return ` from line ${range.offset}`;
|
|
27257
|
+
}
|
|
27258
|
+
return "";
|
|
27259
|
+
}
|
|
27260
|
+
function dedupAnnotation(toolName, filePath, range) {
|
|
26978
27261
|
if (filePath) {
|
|
26979
|
-
|
|
27262
|
+
const rl = range ? rangeLabel(range) : "";
|
|
27263
|
+
return `[earlier read of ${filePath}${rl} \u2014 see latest read below for current content]`;
|
|
26980
27264
|
}
|
|
26981
27265
|
return `[duplicate output \u2014 same content as later ${toolName} in this session \u2014 use recall for details]`;
|
|
26982
27266
|
}
|
|
26983
27267
|
function deduplicateToolOutputs(messages, currentTurnIdx) {
|
|
26984
27268
|
const contentLatest = /* @__PURE__ */ new Map();
|
|
26985
|
-
const
|
|
27269
|
+
const fileReads = /* @__PURE__ */ new Map();
|
|
26986
27270
|
for (let i = 0; i < messages.length; i++) {
|
|
26987
27271
|
for (const part of messages[i].parts) {
|
|
26988
27272
|
if (!isToolPart(part) || part.state.status !== "completed") continue;
|
|
@@ -26992,8 +27276,15 @@ function deduplicateToolOutputs(messages, currentTurnIdx) {
|
|
|
26992
27276
|
contentLatest.set(key, i);
|
|
26993
27277
|
if (part.tool === "read_file" || part.tool === "read") {
|
|
26994
27278
|
const inputStr = typeof part.state.input === "string" ? part.state.input : JSON.stringify(part.state.input);
|
|
26995
|
-
const
|
|
26996
|
-
if (
|
|
27279
|
+
const range = extractReadRange(inputStr);
|
|
27280
|
+
if (range) {
|
|
27281
|
+
let entries = fileReads.get(range.path);
|
|
27282
|
+
if (!entries) {
|
|
27283
|
+
entries = [];
|
|
27284
|
+
fileReads.set(range.path, entries);
|
|
27285
|
+
}
|
|
27286
|
+
entries.push({ range, msgIdx: i });
|
|
27287
|
+
}
|
|
26997
27288
|
}
|
|
26998
27289
|
}
|
|
26999
27290
|
}
|
|
@@ -27007,20 +27298,30 @@ function deduplicateToolOutputs(messages, currentTurnIdx) {
|
|
|
27007
27298
|
if (!output || output.length < DEDUP_MIN_CHARS) return part;
|
|
27008
27299
|
const contentKey = `${part.tool}:${simpleHash(output)}`;
|
|
27009
27300
|
const isLatestContent = contentLatest.get(contentKey) === msgIdx;
|
|
27010
|
-
let
|
|
27011
|
-
let
|
|
27301
|
+
let readRange;
|
|
27302
|
+
let coveredByLater = false;
|
|
27012
27303
|
if (part.tool === "read_file" || part.tool === "read") {
|
|
27013
27304
|
const inputStr = typeof part.state.input === "string" ? part.state.input : JSON.stringify(part.state.input);
|
|
27014
|
-
|
|
27015
|
-
if (
|
|
27305
|
+
readRange = extractReadRange(inputStr);
|
|
27306
|
+
if (readRange) {
|
|
27307
|
+
const entries = fileReads.get(readRange.path);
|
|
27308
|
+
if (entries) {
|
|
27309
|
+
for (const entry of entries) {
|
|
27310
|
+
if (entry.msgIdx > msgIdx && laterReadCovers(entry.range, readRange)) {
|
|
27311
|
+
coveredByLater = true;
|
|
27312
|
+
break;
|
|
27313
|
+
}
|
|
27314
|
+
}
|
|
27315
|
+
}
|
|
27316
|
+
}
|
|
27016
27317
|
}
|
|
27017
|
-
if (isLatestContent &&
|
|
27318
|
+
if (isLatestContent && !coveredByLater) return part;
|
|
27018
27319
|
partsChanged = true;
|
|
27019
27320
|
return {
|
|
27020
27321
|
...part,
|
|
27021
27322
|
state: {
|
|
27022
27323
|
...part.state,
|
|
27023
|
-
output: dedupAnnotation(part.tool,
|
|
27324
|
+
output: dedupAnnotation(part.tool, readRange?.path, readRange)
|
|
27024
27325
|
}
|
|
27025
27326
|
};
|
|
27026
27327
|
});
|
|
@@ -27040,7 +27341,7 @@ function sanitizeToolParts(messages) {
|
|
|
27040
27341
|
const { status } = part.state;
|
|
27041
27342
|
if (status === "completed" || status === "error") return part;
|
|
27042
27343
|
partsChanged = true;
|
|
27043
|
-
const
|
|
27344
|
+
const existingStart = "time" in part.state ? part.state.time.start : 0;
|
|
27044
27345
|
return {
|
|
27045
27346
|
...part,
|
|
27046
27347
|
state: {
|
|
@@ -27049,8 +27350,8 @@ function sanitizeToolParts(messages) {
|
|
|
27049
27350
|
error: "[tool execution interrupted \u2014 session recovered]",
|
|
27050
27351
|
metadata: "metadata" in part.state ? part.state.metadata : void 0,
|
|
27051
27352
|
time: {
|
|
27052
|
-
start:
|
|
27053
|
-
end:
|
|
27353
|
+
start: existingStart,
|
|
27354
|
+
end: existingStart
|
|
27054
27355
|
}
|
|
27055
27356
|
}
|
|
27056
27357
|
};
|
|
@@ -27074,97 +27375,6 @@ function stripToolOutputs(parts) {
|
|
|
27074
27375
|
};
|
|
27075
27376
|
});
|
|
27076
27377
|
}
|
|
27077
|
-
function formatRelativeTime(date5, now) {
|
|
27078
|
-
const diffMs = now.getTime() - date5.getTime();
|
|
27079
|
-
const diffDays = Math.floor(diffMs / (1e3 * 60 * 60 * 24));
|
|
27080
|
-
if (diffDays === 0) return "today";
|
|
27081
|
-
if (diffDays === 1) return "yesterday";
|
|
27082
|
-
if (diffDays < 7) return `${diffDays} days ago`;
|
|
27083
|
-
if (diffDays < 14) return "1 week ago";
|
|
27084
|
-
if (diffDays < 30) return `${Math.floor(diffDays / 7)} weeks ago`;
|
|
27085
|
-
if (diffDays < 60) return "1 month ago";
|
|
27086
|
-
if (diffDays < 365) return `${Math.floor(diffDays / 30)} months ago`;
|
|
27087
|
-
return `${Math.floor(diffDays / 365)} year${Math.floor(diffDays / 365) > 1 ? "s" : ""} ago`;
|
|
27088
|
-
}
|
|
27089
|
-
function parseDateFromContent(s) {
|
|
27090
|
-
const simple = s.match(/([A-Z][a-z]+)\s+(\d{1,2}),?\s+(\d{4})/);
|
|
27091
|
-
if (simple) {
|
|
27092
|
-
const d = /* @__PURE__ */ new Date(`${simple[1]} ${simple[2]}, ${simple[3]}`);
|
|
27093
|
-
if (!isNaN(d.getTime())) return d;
|
|
27094
|
-
}
|
|
27095
|
-
const range = s.match(/([A-Z][a-z]+)\s+(\d{1,2})-\d{1,2},?\s+(\d{4})/);
|
|
27096
|
-
if (range) {
|
|
27097
|
-
const d = /* @__PURE__ */ new Date(`${range[1]} ${range[2]}, ${range[3]}`);
|
|
27098
|
-
if (!isNaN(d.getTime())) return d;
|
|
27099
|
-
}
|
|
27100
|
-
const vague = s.match(/(late|early|mid)[- ]?([A-Z][a-z]+)\s+(\d{4})/i);
|
|
27101
|
-
if (vague) {
|
|
27102
|
-
const day = vague[1].toLowerCase() === "early" ? 7 : vague[1].toLowerCase() === "late" ? 23 : 15;
|
|
27103
|
-
const d = /* @__PURE__ */ new Date(`${vague[2]} ${day}, ${vague[3]}`);
|
|
27104
|
-
if (!isNaN(d.getTime())) return d;
|
|
27105
|
-
}
|
|
27106
|
-
return null;
|
|
27107
|
-
}
|
|
27108
|
-
function expandInlineEstimatedDates(text4, now) {
|
|
27109
|
-
return text4.replace(
|
|
27110
|
-
/\(((?:meaning|estimated)\s+)([^)]+\d{4})\)/gi,
|
|
27111
|
-
(match, prefix, dateContent) => {
|
|
27112
|
-
const d = parseDateFromContent(dateContent);
|
|
27113
|
-
if (!d) return match;
|
|
27114
|
-
const rel = formatRelativeTime(d, now);
|
|
27115
|
-
const matchIdx = text4.indexOf(match);
|
|
27116
|
-
const lineStart = text4.lastIndexOf("\n", matchIdx) + 1;
|
|
27117
|
-
const linePrefix = text4.slice(lineStart, matchIdx);
|
|
27118
|
-
const isFutureIntent = /\b(?:will|plans?\s+to|planning\s+to|going\s+to|intends?\s+to)\b/i.test(
|
|
27119
|
-
linePrefix
|
|
27120
|
-
);
|
|
27121
|
-
if (d < now && isFutureIntent)
|
|
27122
|
-
return `(${prefix}${dateContent} \u2014 ${rel}, likely already happened)`;
|
|
27123
|
-
return `(${prefix}${dateContent} \u2014 ${rel})`;
|
|
27124
|
-
}
|
|
27125
|
-
);
|
|
27126
|
-
}
|
|
27127
|
-
function addRelativeTimeToObservations(text4, now) {
|
|
27128
|
-
const withInline = expandInlineEstimatedDates(text4, now);
|
|
27129
|
-
const dateHeaderRe = /^(Date:\s*)([A-Z][a-z]+ \d{1,2}, \d{4})$/gm;
|
|
27130
|
-
const found = [];
|
|
27131
|
-
let m;
|
|
27132
|
-
while ((m = dateHeaderRe.exec(withInline)) !== null) {
|
|
27133
|
-
const d = new Date(m[2]);
|
|
27134
|
-
if (!isNaN(d.getTime()))
|
|
27135
|
-
found.push({
|
|
27136
|
-
index: m.index,
|
|
27137
|
-
date: d,
|
|
27138
|
-
full: m[0],
|
|
27139
|
-
prefix: m[1],
|
|
27140
|
-
ds: m[2]
|
|
27141
|
-
});
|
|
27142
|
-
}
|
|
27143
|
-
if (!found.length) return withInline;
|
|
27144
|
-
let result = "";
|
|
27145
|
-
let last = 0;
|
|
27146
|
-
for (let i = 0; i < found.length; i++) {
|
|
27147
|
-
const curr = found[i];
|
|
27148
|
-
const prev = found[i - 1];
|
|
27149
|
-
result += withInline.slice(last, curr.index);
|
|
27150
|
-
if (prev) {
|
|
27151
|
-
const gapDays = Math.floor(
|
|
27152
|
-
(curr.date.getTime() - prev.date.getTime()) / 864e5
|
|
27153
|
-
);
|
|
27154
|
-
if (gapDays > 1) {
|
|
27155
|
-
const gap = gapDays < 7 ? `[${gapDays} days later]` : gapDays < 14 ? "[1 week later]" : gapDays < 30 ? `[${Math.floor(gapDays / 7)} weeks later]` : gapDays < 60 ? "[1 month later]" : `[${Math.floor(gapDays / 30)} months later]`;
|
|
27156
|
-
result += `
|
|
27157
|
-
${gap}
|
|
27158
|
-
|
|
27159
|
-
`;
|
|
27160
|
-
}
|
|
27161
|
-
}
|
|
27162
|
-
result += `${curr.prefix}${curr.ds} (${formatRelativeTime(curr.date, now)})`;
|
|
27163
|
-
last = curr.index + curr.full.length;
|
|
27164
|
-
}
|
|
27165
|
-
result += withInline.slice(last);
|
|
27166
|
-
return result;
|
|
27167
|
-
}
|
|
27168
27378
|
function buildPrefixMessages(formatted) {
|
|
27169
27379
|
return [
|
|
27170
27380
|
{
|
|
@@ -27221,12 +27431,7 @@ function buildPrefixMessages(formatted) {
|
|
|
27221
27431
|
}
|
|
27222
27432
|
function distilledPrefix(distillations) {
|
|
27223
27433
|
if (!distillations.length) return [];
|
|
27224
|
-
const
|
|
27225
|
-
const annotated = distillations.map((d) => ({
|
|
27226
|
-
...d,
|
|
27227
|
-
observations: addRelativeTimeToObservations(d.observations, now)
|
|
27228
|
-
}));
|
|
27229
|
-
const formatted = formatDistillations(annotated);
|
|
27434
|
+
const formatted = formatDistillations(distillations);
|
|
27230
27435
|
if (!formatted) return [];
|
|
27231
27436
|
return buildPrefixMessages(formatted);
|
|
27232
27437
|
}
|
|
@@ -27246,12 +27451,7 @@ function distilledPrefixCached(distillations, sessionID, sessState) {
|
|
|
27246
27451
|
};
|
|
27247
27452
|
}
|
|
27248
27453
|
const newRows = distillations.slice(prefixCache.rowCount);
|
|
27249
|
-
const
|
|
27250
|
-
const annotated2 = newRows.map((d) => ({
|
|
27251
|
-
...d,
|
|
27252
|
-
observations: addRelativeTimeToObservations(d.observations, now2)
|
|
27253
|
-
}));
|
|
27254
|
-
const deltaText = formatDistillations(annotated2);
|
|
27454
|
+
const deltaText = formatDistillations(newRows);
|
|
27255
27455
|
if (deltaText) {
|
|
27256
27456
|
const fullText2 = prefixCache.cachedText + "\n\n" + deltaText;
|
|
27257
27457
|
const messages2 = buildPrefixMessages(fullText2);
|
|
@@ -27267,12 +27467,7 @@ function distilledPrefixCached(distillations, sessionID, sessState) {
|
|
|
27267
27467
|
return { messages: messages2, tokens: tokens2 };
|
|
27268
27468
|
}
|
|
27269
27469
|
}
|
|
27270
|
-
const
|
|
27271
|
-
const annotated = distillations.map((d) => ({
|
|
27272
|
-
...d,
|
|
27273
|
-
observations: addRelativeTimeToObservations(d.observations, now)
|
|
27274
|
-
}));
|
|
27275
|
-
const fullText = formatDistillations(annotated);
|
|
27470
|
+
const fullText = formatDistillations(distillations);
|
|
27276
27471
|
if (!fullText) {
|
|
27277
27472
|
sessState.prefixCache = null;
|
|
27278
27473
|
return { messages: [], tokens: 0 };
|
|
@@ -27295,29 +27490,40 @@ function tryFitStable(input) {
|
|
|
27295
27490
|
const rawWindowCache = input.sessState.rawWindowCache;
|
|
27296
27491
|
const cacheValid = rawWindowCache !== null && rawWindowCache.sessionID === input.sessionID;
|
|
27297
27492
|
if (cacheValid) {
|
|
27298
|
-
const
|
|
27299
|
-
|
|
27493
|
+
const newMessages = Math.max(0, input.messages.length - rawWindowCache.pinnedTotalCount);
|
|
27494
|
+
const windowSize = rawWindowCache.pinnedRawCount + newMessages;
|
|
27495
|
+
const pinnedIdx = Math.max(0, input.messages.length - windowSize);
|
|
27496
|
+
const pinnedWindow = input.messages.slice(pinnedIdx);
|
|
27497
|
+
const pinnedTokens = pinnedWindow.reduce(
|
|
27498
|
+
(sum, m) => sum + estimateMessage(m),
|
|
27499
|
+
0
|
|
27300
27500
|
);
|
|
27301
|
-
|
|
27302
|
-
|
|
27303
|
-
|
|
27304
|
-
|
|
27305
|
-
|
|
27306
|
-
|
|
27307
|
-
|
|
27308
|
-
|
|
27309
|
-
|
|
27310
|
-
return parts !== msg.parts ? { info: msg.info, parts } : msg;
|
|
27311
|
-
});
|
|
27312
|
-
const total = input.prefixTokens + pinnedTokens;
|
|
27313
|
-
return {
|
|
27314
|
-
messages: [...input.prefix, ...processed],
|
|
27315
|
-
distilledTokens: input.prefixTokens,
|
|
27316
|
-
rawTokens: pinnedTokens,
|
|
27317
|
-
totalTokens: total
|
|
27501
|
+
const highWaterBudget = Math.max(rawWindowCache.pinnedBudget, input.rawBudget);
|
|
27502
|
+
const effectiveBudget = highWaterBudget * 1.15;
|
|
27503
|
+
if (pinnedTokens <= effectiveBudget) {
|
|
27504
|
+
if (pinnedTokens > rawWindowCache.pinnedBudget * 1.15) {
|
|
27505
|
+
input.sessState.rawWindowCache = {
|
|
27506
|
+
...rawWindowCache,
|
|
27507
|
+
pinnedRawCount: pinnedWindow.length,
|
|
27508
|
+
pinnedTotalCount: input.messages.length,
|
|
27509
|
+
pinnedBudget: input.rawBudget
|
|
27318
27510
|
};
|
|
27319
27511
|
}
|
|
27512
|
+
const processed = pinnedWindow.map((msg) => {
|
|
27513
|
+
const parts = cleanParts(msg.parts);
|
|
27514
|
+
return parts !== msg.parts ? { info: msg.info, parts } : msg;
|
|
27515
|
+
});
|
|
27516
|
+
const total = input.prefixTokens + pinnedTokens;
|
|
27517
|
+
return {
|
|
27518
|
+
messages: [...input.prefix, ...processed],
|
|
27519
|
+
distilledTokens: input.prefixTokens,
|
|
27520
|
+
rawTokens: pinnedTokens,
|
|
27521
|
+
totalTokens: total
|
|
27522
|
+
};
|
|
27320
27523
|
}
|
|
27524
|
+
info(
|
|
27525
|
+
`pin-overflow: session=${input.sessionID} pinnedTokens=${pinnedTokens} pinnedBudget=${rawWindowCache.pinnedBudget} effectiveBudget=${Math.round(effectiveBudget)} currentRawBudget=${input.rawBudget} windowSize=${pinnedWindow.length}`
|
|
27526
|
+
);
|
|
27321
27527
|
}
|
|
27322
27528
|
const result = tryFit({
|
|
27323
27529
|
messages: input.messages,
|
|
@@ -27328,11 +27534,13 @@ function tryFitStable(input) {
|
|
|
27328
27534
|
strip: "none"
|
|
27329
27535
|
});
|
|
27330
27536
|
if (result) {
|
|
27331
|
-
const
|
|
27332
|
-
if (
|
|
27537
|
+
const rawMessageCount = result.messages.length - input.prefix.length;
|
|
27538
|
+
if (rawMessageCount > 0) {
|
|
27333
27539
|
input.sessState.rawWindowCache = {
|
|
27334
27540
|
sessionID: input.sessionID,
|
|
27335
|
-
|
|
27541
|
+
pinnedRawCount: rawMessageCount,
|
|
27542
|
+
pinnedTotalCount: input.messages.length,
|
|
27543
|
+
pinnedBudget: input.rawBudget
|
|
27336
27544
|
};
|
|
27337
27545
|
}
|
|
27338
27546
|
}
|
|
@@ -27347,14 +27555,15 @@ function needsUrgentDistillation() {
|
|
|
27347
27555
|
function transformInner(input) {
|
|
27348
27556
|
const cfg = config2();
|
|
27349
27557
|
const overhead = getOverhead();
|
|
27558
|
+
const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
|
|
27559
|
+
const sessState = sid ? getSessionState(sid) : makeSessionState();
|
|
27560
|
+
const sessLtmTokens = sid ? sessState.ltmTokens : ltmTokensFallback;
|
|
27350
27561
|
const usable = Math.max(
|
|
27351
27562
|
0,
|
|
27352
|
-
contextLimit - outputReserved - overhead -
|
|
27563
|
+
contextLimit - outputReserved - overhead - sessLtmTokens
|
|
27353
27564
|
);
|
|
27354
27565
|
const distilledBudget = Math.floor(usable * cfg.budget.distilled);
|
|
27355
|
-
|
|
27356
|
-
const sid = input.sessionID ?? input.messages[0]?.info.sessionID;
|
|
27357
|
-
const sessState = sid ? getSessionState(sid) : makeSessionState();
|
|
27566
|
+
let rawBudget = Math.floor(usable * cfg.budget.raw);
|
|
27358
27567
|
let effectiveMinLayer = sessState.forceMinLayer;
|
|
27359
27568
|
sessState.forceMinLayer = 0;
|
|
27360
27569
|
if (sid && effectiveMinLayer > 0) saveForceMinLayer(sid, 0);
|
|
@@ -27367,17 +27576,26 @@ function transformInner(input) {
|
|
|
27367
27576
|
return result.totalTokens * UNCALIBRATED_SAFETY <= maxInput;
|
|
27368
27577
|
}
|
|
27369
27578
|
if (calibrated && sessState.lastLayer >= 1 && input.messages.length >= sessState.lastKnownMessageCount) {
|
|
27579
|
+
effectiveMinLayer = Math.max(effectiveMinLayer, sessState.lastLayer);
|
|
27580
|
+
}
|
|
27581
|
+
const postIdleCompact = sessState.postIdleCompact;
|
|
27582
|
+
if (postIdleCompact) {
|
|
27583
|
+
sessState.postIdleCompact = false;
|
|
27370
27584
|
effectiveMinLayer = Math.max(effectiveMinLayer, 1);
|
|
27585
|
+
rawBudget = Math.floor(usable * 0.2);
|
|
27586
|
+
info(
|
|
27587
|
+
`post-idle compact: session=${sid} rawBudget=${rawBudget} (${Math.floor(usable * cfg.budget.raw)}\u2192${rawBudget})`
|
|
27588
|
+
);
|
|
27371
27589
|
}
|
|
27372
27590
|
let expectedInput;
|
|
27373
27591
|
if (calibrated) {
|
|
27374
27592
|
const newMessages = sessState.lastWindowMessageIDs.size > 0 ? input.messages.filter((m) => !sessState.lastWindowMessageIDs.has(m.info.id)) : input.messages.slice(-Math.max(0, input.messages.length - sessState.lastKnownMessageCount));
|
|
27375
27593
|
const newMsgTokens = newMessages.reduce((s, m) => s + estimateMessage(m), 0);
|
|
27376
|
-
const ltmDelta =
|
|
27594
|
+
const ltmDelta = sessLtmTokens - sessState.lastKnownLtm;
|
|
27377
27595
|
expectedInput = sessState.lastKnownInput + newMsgTokens + ltmDelta;
|
|
27378
27596
|
} else {
|
|
27379
27597
|
const messageTokens = input.messages.reduce((s, m) => s + estimateMessage(m), 0);
|
|
27380
|
-
expectedInput = messageTokens + overhead +
|
|
27598
|
+
expectedInput = messageTokens + overhead + sessLtmTokens;
|
|
27381
27599
|
}
|
|
27382
27600
|
const layer0Input = calibrated ? expectedInput : expectedInput * UNCALIBRATED_SAFETY;
|
|
27383
27601
|
let layer0Ceiling = maxLayer0Tokens > 0 ? Math.min(maxInput, maxLayer0Tokens) : maxInput;
|
|
@@ -27385,7 +27603,7 @@ function transformInner(input) {
|
|
|
27385
27603
|
layer0Ceiling = Math.floor(layer0Ceiling * 0.7);
|
|
27386
27604
|
}
|
|
27387
27605
|
if (effectiveMinLayer === 0 && layer0Input <= layer0Ceiling) {
|
|
27388
|
-
const messageTokens = calibrated ? expectedInput - (
|
|
27606
|
+
const messageTokens = calibrated ? expectedInput - (sessLtmTokens - sessState.lastKnownLtm) : expectedInput - overhead - sessLtmTokens;
|
|
27389
27607
|
return {
|
|
27390
27608
|
messages: input.messages,
|
|
27391
27609
|
layer: 0,
|
|
@@ -27399,7 +27617,7 @@ function transformInner(input) {
|
|
|
27399
27617
|
}
|
|
27400
27618
|
const turnStart = currentTurnStart(input.messages);
|
|
27401
27619
|
const dedupMessages = deduplicateToolOutputs(input.messages, turnStart);
|
|
27402
|
-
const distillations = sid ?
|
|
27620
|
+
const distillations = sid ? loadDistillationsCached(input.projectPath, sid, input.messages, sessState) : [];
|
|
27403
27621
|
const cached2 = sid ? distilledPrefixCached(distillations, sid, sessState) : (() => {
|
|
27404
27622
|
const msgs = distilledPrefix(distillations);
|
|
27405
27623
|
return { messages: msgs, tokens: msgs.reduce((sum, m) => sum + estimateMessage(m), 0) };
|
|
@@ -27512,12 +27730,27 @@ function transform2(input) {
|
|
|
27512
27730
|
state.lastLayer = result.layer;
|
|
27513
27731
|
state.lastWindowMessageIDs = new Set(result.messages.map((m) => m.info.id));
|
|
27514
27732
|
state.lastTurnAt = Date.now();
|
|
27515
|
-
const
|
|
27516
|
-
|
|
27733
|
+
const prefixFingerprint = result.messages.slice(0, 5).map((m) => {
|
|
27734
|
+
const text4 = m.parts.map((p3) => {
|
|
27735
|
+
if (isTextPart(p3)) return p3.text?.slice(0, 40) ?? "";
|
|
27736
|
+
if (isReasoningPart(p3)) return p3.text?.slice(0, 40) ?? "";
|
|
27737
|
+
return p3.type;
|
|
27738
|
+
}).join("|");
|
|
27739
|
+
return `${m.info.role}:${text4.slice(0, 60)}`;
|
|
27740
|
+
}).join(",");
|
|
27741
|
+
const prefixHash = `${result.layer}:${prefixFingerprint}`;
|
|
27742
|
+
state.transformCount++;
|
|
27517
27743
|
if (state.lastPrefixHash && state.lastPrefixHash !== prefixHash) {
|
|
27744
|
+
state.bustCount++;
|
|
27745
|
+
const rate = state.bustCount / state.transformCount;
|
|
27518
27746
|
info(
|
|
27519
|
-
`cache-bust
|
|
27747
|
+
`cache-bust #${state.bustCount} (${(rate * 100).toFixed(0)}%): session=${sid} layer=${state.lastLayer}\u2192${result.layer} msgs=${state.lastTransformedCount}\u2192${result.messages.length} prefix=${state.lastPrefixHash.slice(0, 30)}\u2192${prefixHash.slice(0, 30)}`
|
|
27520
27748
|
);
|
|
27749
|
+
if (state.transformCount >= 20 && rate > 0.5) {
|
|
27750
|
+
warn(
|
|
27751
|
+
`HIGH BUST RATE: session ${sid} has ${(rate * 100).toFixed(0)}% bust rate (${state.bustCount}/${state.transformCount} transforms)`
|
|
27752
|
+
);
|
|
27753
|
+
}
|
|
27521
27754
|
}
|
|
27522
27755
|
state.lastPrefixHash = prefixHash;
|
|
27523
27756
|
if (result.layer >= 2) {
|
|
@@ -27614,25 +27847,50 @@ function isWorkerSession(sessionID) {
|
|
|
27614
27847
|
}
|
|
27615
27848
|
|
|
27616
27849
|
// src/distillation.ts
|
|
27850
|
+
function compressionRatio(distilledTokens, sourceTokens) {
|
|
27851
|
+
if (sourceTokens <= 0) return 0;
|
|
27852
|
+
return distilledTokens / Math.sqrt(sourceTokens);
|
|
27853
|
+
}
|
|
27617
27854
|
function detectSegments(messages, maxSegment) {
|
|
27618
27855
|
if (messages.length <= maxSegment) return [messages];
|
|
27619
|
-
|
|
27620
|
-
|
|
27621
|
-
|
|
27622
|
-
|
|
27623
|
-
|
|
27624
|
-
|
|
27625
|
-
|
|
27626
|
-
|
|
27627
|
-
|
|
27628
|
-
|
|
27629
|
-
|
|
27630
|
-
|
|
27631
|
-
|
|
27632
|
-
|
|
27856
|
+
return splitSegments(messages, maxSegment);
|
|
27857
|
+
}
|
|
27858
|
+
var MIN_SEGMENT = 3;
|
|
27859
|
+
var GAP_THRESHOLD_MULTIPLIER = 3;
|
|
27860
|
+
function splitSegments(messages, maxSegment) {
|
|
27861
|
+
if (messages.length <= maxSegment) return [messages];
|
|
27862
|
+
const splitIdx = findSplitIndex(messages, maxSegment);
|
|
27863
|
+
const left = messages.slice(0, splitIdx);
|
|
27864
|
+
const right = messages.slice(splitIdx);
|
|
27865
|
+
const result = splitSegments(left, maxSegment);
|
|
27866
|
+
if (right.length < MIN_SEGMENT) {
|
|
27867
|
+
result[result.length - 1].push(...right);
|
|
27868
|
+
} else {
|
|
27869
|
+
result.push(...splitSegments(right, maxSegment));
|
|
27870
|
+
}
|
|
27871
|
+
return result;
|
|
27872
|
+
}
|
|
27873
|
+
function findSplitIndex(messages, maxSegment) {
|
|
27874
|
+
const gaps = [];
|
|
27875
|
+
for (let i = 1; i < messages.length; i++) {
|
|
27876
|
+
gaps.push({
|
|
27877
|
+
index: i,
|
|
27878
|
+
gap: messages[i].created_at - messages[i - 1].created_at
|
|
27879
|
+
});
|
|
27880
|
+
}
|
|
27881
|
+
if (gaps.length === 0) return maxSegment;
|
|
27882
|
+
const sortedGaps = gaps.map((g) => g.gap).sort((a, b) => a - b);
|
|
27883
|
+
const medianGap = sortedGaps[Math.floor(sortedGaps.length / 2)];
|
|
27884
|
+
let bestGap = { index: -1, gap: 0 };
|
|
27885
|
+
for (const g of gaps) {
|
|
27886
|
+
if (g.gap > bestGap.gap && g.index >= MIN_SEGMENT && messages.length - g.index >= MIN_SEGMENT) {
|
|
27887
|
+
bestGap = g;
|
|
27633
27888
|
}
|
|
27634
27889
|
}
|
|
27635
|
-
|
|
27890
|
+
if (bestGap.index > 0 && bestGap.gap >= medianGap * GAP_THRESHOLD_MULTIPLIER) {
|
|
27891
|
+
return bestGap.index;
|
|
27892
|
+
}
|
|
27893
|
+
return maxSegment;
|
|
27636
27894
|
}
|
|
27637
27895
|
function formatTime(ms) {
|
|
27638
27896
|
const d = new Date(ms);
|
|
@@ -27711,7 +27969,7 @@ function parseSourceIds(raw) {
|
|
|
27711
27969
|
}
|
|
27712
27970
|
function loadForSession(projectPath, sessionID, includeArchived = false) {
|
|
27713
27971
|
const pid = ensureProject(projectPath);
|
|
27714
|
-
const sql = includeArchived ? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC" : "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
|
|
27972
|
+
const sql = includeArchived ? "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? ORDER BY created_at ASC" : "SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND archived = 0 ORDER BY created_at ASC";
|
|
27715
27973
|
const rows = db().query(sql).all(pid, sessionID);
|
|
27716
27974
|
return rows.map((r) => ({
|
|
27717
27975
|
...r,
|
|
@@ -27724,8 +27982,8 @@ function storeDistillation(input) {
|
|
|
27724
27982
|
const sourceJson = JSON.stringify(input.sourceIDs);
|
|
27725
27983
|
const tokens = Math.ceil(input.observations.length / 3);
|
|
27726
27984
|
db().query(
|
|
27727
|
-
`INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at)
|
|
27728
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
27985
|
+
`INSERT INTO distillations (id, project_id, session_id, narrative, facts, observations, source_ids, generation, token_count, created_at, r_compression, c_norm)
|
|
27986
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`
|
|
27729
27987
|
).run(
|
|
27730
27988
|
id,
|
|
27731
27989
|
pid,
|
|
@@ -27738,7 +27996,9 @@ function storeDistillation(input) {
|
|
|
27738
27996
|
sourceJson,
|
|
27739
27997
|
input.generation,
|
|
27740
27998
|
tokens,
|
|
27741
|
-
Date.now()
|
|
27999
|
+
Date.now(),
|
|
28000
|
+
input.rCompression ?? null,
|
|
28001
|
+
input.cNorm ?? null
|
|
27742
28002
|
);
|
|
27743
28003
|
return id;
|
|
27744
28004
|
}
|
|
@@ -27751,7 +28011,7 @@ function gen0Count(projectPath, sessionID) {
|
|
|
27751
28011
|
function loadGen0(projectPath, sessionID) {
|
|
27752
28012
|
const pid = ensureProject(projectPath);
|
|
27753
28013
|
const rows = db().query(
|
|
27754
|
-
"SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC"
|
|
28014
|
+
"SELECT id, project_id, session_id, observations, source_ids, generation, token_count, created_at, r_compression, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND generation = 0 AND archived = 0 ORDER BY created_at ASC"
|
|
27755
28015
|
).all(pid, sessionID);
|
|
27756
28016
|
return rows.map((r) => ({
|
|
27757
28017
|
...r,
|
|
@@ -27818,7 +28078,8 @@ async function run(input) {
|
|
|
27818
28078
|
projectPath: input.projectPath,
|
|
27819
28079
|
sessionID: input.sessionID,
|
|
27820
28080
|
messages: segment,
|
|
27821
|
-
model: input.model
|
|
28081
|
+
model: input.model,
|
|
28082
|
+
urgent: input.urgent
|
|
27822
28083
|
});
|
|
27823
28084
|
if (result) {
|
|
27824
28085
|
distilled += segment.length;
|
|
@@ -27826,12 +28087,13 @@ async function run(input) {
|
|
|
27826
28087
|
}
|
|
27827
28088
|
}
|
|
27828
28089
|
}
|
|
27829
|
-
if (gen0Count(input.projectPath, input.sessionID) >= cfg.distillation.metaThreshold) {
|
|
28090
|
+
if (!input.skipMeta && gen0Count(input.projectPath, input.sessionID) >= cfg.distillation.metaThreshold) {
|
|
27830
28091
|
await metaDistill({
|
|
27831
28092
|
llm: input.llm,
|
|
27832
28093
|
projectPath: input.projectPath,
|
|
27833
28094
|
sessionID: input.sessionID,
|
|
27834
|
-
model: input.model
|
|
28095
|
+
model: input.model,
|
|
28096
|
+
urgent: input.urgent
|
|
27835
28097
|
});
|
|
27836
28098
|
rounds++;
|
|
27837
28099
|
}
|
|
@@ -27857,22 +28119,46 @@ async function distillSegment(input) {
|
|
|
27857
28119
|
const responseText = await input.llm.prompt(
|
|
27858
28120
|
DISTILLATION_SYSTEM,
|
|
27859
28121
|
userContent,
|
|
27860
|
-
{ model, workerID: "lore-distill" }
|
|
28122
|
+
{ model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID }
|
|
27861
28123
|
);
|
|
27862
28124
|
if (!responseText) return null;
|
|
27863
28125
|
const result = parseDistillationResult(responseText);
|
|
27864
28126
|
if (!result) return null;
|
|
28127
|
+
const distilledTokens = Math.ceil(result.observations.length / 3);
|
|
28128
|
+
const sourceTokens = input.messages.reduce((sum, m) => sum + m.tokens, 0);
|
|
28129
|
+
const rComp = compressionRatio(distilledTokens, sourceTokens);
|
|
28130
|
+
const cNorm = temporalCnorm(input.messages.map((m) => m.created_at));
|
|
27865
28131
|
const distillId = storeDistillation({
|
|
27866
28132
|
projectPath: input.projectPath,
|
|
27867
28133
|
sessionID: input.sessionID,
|
|
27868
28134
|
observations: result.observations,
|
|
27869
28135
|
sourceIDs: input.messages.map((m) => m.id),
|
|
27870
|
-
generation: 0
|
|
28136
|
+
generation: 0,
|
|
28137
|
+
rCompression: rComp,
|
|
28138
|
+
cNorm
|
|
27871
28139
|
});
|
|
27872
28140
|
markDistilled(input.messages.map((m) => m.id));
|
|
28141
|
+
info(
|
|
28142
|
+
`distill segment: ${input.messages.length} msgs, ${sourceTokens}\u2192${distilledTokens} tokens, R=${rComp.toFixed(2)}, C_norm=${cNorm.toFixed(3)}`
|
|
28143
|
+
);
|
|
27873
28144
|
if (isAvailable()) {
|
|
27874
28145
|
embedDistillation(distillId, result.observations);
|
|
27875
28146
|
}
|
|
28147
|
+
if (config2().knowledge.enabled) {
|
|
28148
|
+
for (const pat of extractPatterns(result.observations)) {
|
|
28149
|
+
try {
|
|
28150
|
+
create({
|
|
28151
|
+
projectPath: input.projectPath,
|
|
28152
|
+
category: pat.category,
|
|
28153
|
+
title: pat.title,
|
|
28154
|
+
content: pat.content,
|
|
28155
|
+
session: input.sessionID,
|
|
28156
|
+
scope: "project"
|
|
28157
|
+
});
|
|
28158
|
+
} catch {
|
|
28159
|
+
}
|
|
28160
|
+
}
|
|
28161
|
+
}
|
|
27876
28162
|
return result;
|
|
27877
28163
|
}
|
|
27878
28164
|
async function metaDistill(input) {
|
|
@@ -27888,7 +28174,7 @@ async function metaDistill(input) {
|
|
|
27888
28174
|
const responseText = await input.llm.prompt(
|
|
27889
28175
|
RECURSIVE_SYSTEM,
|
|
27890
28176
|
userContent,
|
|
27891
|
-
{ model, workerID: "lore-distill" }
|
|
28177
|
+
{ model, workerID: "lore-distill", thinking: false, urgent: input.urgent, sessionID: input.sessionID }
|
|
27892
28178
|
);
|
|
27893
28179
|
if (!responseText) return null;
|
|
27894
28180
|
const result = parseDistillationResult(responseText);
|
|
@@ -27917,8 +28203,54 @@ async function metaDistill(input) {
|
|
|
27917
28203
|
if (isAvailable()) {
|
|
27918
28204
|
embedDistillation(metaId, result.observations);
|
|
27919
28205
|
}
|
|
28206
|
+
if (config2().knowledge.enabled) {
|
|
28207
|
+
for (const pat of extractPatterns(result.observations)) {
|
|
28208
|
+
try {
|
|
28209
|
+
create({
|
|
28210
|
+
projectPath: input.projectPath,
|
|
28211
|
+
category: pat.category,
|
|
28212
|
+
title: pat.title,
|
|
28213
|
+
content: pat.content,
|
|
28214
|
+
session: input.sessionID,
|
|
28215
|
+
scope: "project"
|
|
28216
|
+
});
|
|
28217
|
+
} catch {
|
|
28218
|
+
}
|
|
28219
|
+
}
|
|
28220
|
+
}
|
|
27920
28221
|
return result;
|
|
27921
28222
|
}
|
|
28223
|
+
function backfillMetrics() {
|
|
28224
|
+
const rows = db().query(
|
|
28225
|
+
"SELECT id, source_ids, token_count FROM distillations WHERE r_compression IS NULL"
|
|
28226
|
+
).all();
|
|
28227
|
+
if (!rows.length) return 0;
|
|
28228
|
+
const update2 = db().prepare(
|
|
28229
|
+
"UPDATE distillations SET r_compression = ?, c_norm = ? WHERE id = ?"
|
|
28230
|
+
);
|
|
28231
|
+
let updated = 0;
|
|
28232
|
+
for (const row of rows) {
|
|
28233
|
+
const sourceIds = parseSourceIds(row.source_ids);
|
|
28234
|
+
if (!sourceIds.length) continue;
|
|
28235
|
+
const placeholders = sourceIds.map(() => "?").join(",");
|
|
28236
|
+
const sources = db().query(
|
|
28237
|
+
`SELECT tokens, created_at FROM temporal_messages WHERE id IN (${placeholders})`
|
|
28238
|
+
).all(...sourceIds);
|
|
28239
|
+
if (!sources.length) continue;
|
|
28240
|
+
const sourceTokens = sources.reduce((sum, s) => sum + s.tokens, 0);
|
|
28241
|
+
const timestamps = sources.map((s) => s.created_at);
|
|
28242
|
+
const rComp = compressionRatio(row.token_count, sourceTokens);
|
|
28243
|
+
const cNorm = temporalCnorm(timestamps);
|
|
28244
|
+
update2.run(rComp, cNorm, row.id);
|
|
28245
|
+
updated++;
|
|
28246
|
+
}
|
|
28247
|
+
if (updated > 0) {
|
|
28248
|
+
info(
|
|
28249
|
+
`backfilled metrics for ${updated} distillations (${rows.length - updated} skipped \u2014 missing sources)`
|
|
28250
|
+
);
|
|
28251
|
+
}
|
|
28252
|
+
return updated;
|
|
28253
|
+
}
|
|
27922
28254
|
|
|
27923
28255
|
// src/curator.ts
|
|
27924
28256
|
var curator_exports = {};
|
|
@@ -27964,7 +28296,7 @@ async function run2(input) {
|
|
|
27964
28296
|
const responseText = await input.llm.prompt(
|
|
27965
28297
|
CURATOR_SYSTEM,
|
|
27966
28298
|
userContent,
|
|
27967
|
-
{ model, workerID: "lore-curator" }
|
|
28299
|
+
{ model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID }
|
|
27968
28300
|
);
|
|
27969
28301
|
if (!responseText) return { created: 0, updated: 0, deleted: 0 };
|
|
27970
28302
|
const ops = parseOps(responseText);
|
|
@@ -28034,7 +28366,7 @@ async function consolidate(input) {
|
|
|
28034
28366
|
const responseText = await input.llm.prompt(
|
|
28035
28367
|
CONSOLIDATION_SYSTEM,
|
|
28036
28368
|
userContent,
|
|
28037
|
-
{ model, workerID: "lore-curator" }
|
|
28369
|
+
{ model, workerID: "lore-curator", thinking: false, sessionID: input.sessionID }
|
|
28038
28370
|
);
|
|
28039
28371
|
if (!responseText) return { updated: 0, deleted: 0 };
|
|
28040
28372
|
const ops = parseOps(responseText);
|
|
@@ -28060,12 +28392,39 @@ async function consolidate(input) {
|
|
|
28060
28392
|
}
|
|
28061
28393
|
|
|
28062
28394
|
// src/recall.ts
|
|
28395
|
+
function getTaggedText(tagged) {
|
|
28396
|
+
switch (tagged.source) {
|
|
28397
|
+
case "knowledge":
|
|
28398
|
+
case "cross-knowledge":
|
|
28399
|
+
return `${tagged.item.title} ${tagged.item.content}`;
|
|
28400
|
+
case "distillation":
|
|
28401
|
+
return tagged.item.observations;
|
|
28402
|
+
case "temporal":
|
|
28403
|
+
return tagged.item.content;
|
|
28404
|
+
case "lat-section":
|
|
28405
|
+
return `${tagged.item.heading} ${tagged.item.content}`;
|
|
28406
|
+
}
|
|
28407
|
+
}
|
|
28408
|
+
function taggedResultKey(r) {
|
|
28409
|
+
switch (r.source) {
|
|
28410
|
+
case "knowledge":
|
|
28411
|
+
return `k:${r.item.id}`;
|
|
28412
|
+
case "cross-knowledge":
|
|
28413
|
+
return `xk:${r.item.id}`;
|
|
28414
|
+
case "distillation":
|
|
28415
|
+
return `d:${r.item.id}`;
|
|
28416
|
+
case "temporal":
|
|
28417
|
+
return `t:${r.item.id}`;
|
|
28418
|
+
case "lat-section":
|
|
28419
|
+
return `lat:${r.item.id}`;
|
|
28420
|
+
}
|
|
28421
|
+
}
|
|
28063
28422
|
function searchDistillationsLike(input) {
|
|
28064
28423
|
const terms = input.query.toLowerCase().split(/\s+/).filter((term) => term.length > 1);
|
|
28065
28424
|
if (!terms.length) return [];
|
|
28066
28425
|
const conditions = terms.map(() => "LOWER(observations) LIKE ?").join(" AND ");
|
|
28067
28426
|
const likeParams = terms.map((term) => `%${term}%`);
|
|
28068
|
-
const sql = input.sessionID ? `SELECT id, observations, generation, created_at, session_id FROM distillations WHERE project_id = ? AND session_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT id, observations, generation, created_at, session_id FROM distillations WHERE project_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
|
|
28427
|
+
const sql = input.sessionID ? `SELECT id, observations, generation, created_at, session_id, c_norm FROM distillations WHERE project_id = ? AND session_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?` : `SELECT id, observations, generation, created_at, session_id, c_norm FROM distillations WHERE project_id = ? AND ${conditions} ORDER BY created_at DESC LIMIT ?`;
|
|
28069
28428
|
const allParams = input.sessionID ? [input.pid, input.sessionID, ...likeParams, input.limit] : [input.pid, ...likeParams, input.limit];
|
|
28070
28429
|
return db().query(sql).all(...allParams);
|
|
28071
28430
|
}
|
|
@@ -28074,12 +28433,12 @@ function searchDistillationsScored(input) {
|
|
|
28074
28433
|
const limit = input.limit ?? 10;
|
|
28075
28434
|
const q = ftsQuery(input.query);
|
|
28076
28435
|
if (q === EMPTY_QUERY) return [];
|
|
28077
|
-
const ftsSQL = input.sessionID ? `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, rank
|
|
28436
|
+
const ftsSQL = input.sessionID ? `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, d.c_norm, rank
|
|
28078
28437
|
FROM distillation_fts f
|
|
28079
28438
|
CROSS JOIN distillations d ON d.rowid = f.rowid
|
|
28080
28439
|
WHERE distillation_fts MATCH ?
|
|
28081
28440
|
AND d.project_id = ? AND d.session_id = ?
|
|
28082
|
-
ORDER BY rank LIMIT ?` : `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, rank
|
|
28441
|
+
ORDER BY rank LIMIT ?` : `SELECT d.id, d.observations, d.generation, d.created_at, d.session_id, d.c_norm, rank
|
|
28083
28442
|
FROM distillation_fts f
|
|
28084
28443
|
CROSS JOIN distillations d ON d.rowid = f.rowid
|
|
28085
28444
|
WHERE distillation_fts MATCH ?
|
|
@@ -28164,7 +28523,7 @@ async function runRecall(input) {
|
|
|
28164
28523
|
let queries = [query];
|
|
28165
28524
|
if (searchConfig?.queryExpansion && llm) {
|
|
28166
28525
|
try {
|
|
28167
|
-
queries = await expandQuery(llm, query);
|
|
28526
|
+
queries = await expandQuery(llm, query, void 0, sessionID);
|
|
28168
28527
|
} catch (err) {
|
|
28169
28528
|
info("recall: query expansion failed, using original:", err);
|
|
28170
28529
|
}
|
|
@@ -28234,6 +28593,18 @@ async function runRecall(input) {
|
|
|
28234
28593
|
key: (r) => `t:${r.item.id}`
|
|
28235
28594
|
}
|
|
28236
28595
|
);
|
|
28596
|
+
if (temporalResults.length > 0) {
|
|
28597
|
+
const recencySorted = [...temporalResults].sort(
|
|
28598
|
+
(a, b) => b.created_at - a.created_at
|
|
28599
|
+
);
|
|
28600
|
+
allRrfLists.push({
|
|
28601
|
+
items: recencySorted.map((item) => ({
|
|
28602
|
+
source: "temporal",
|
|
28603
|
+
item
|
|
28604
|
+
})),
|
|
28605
|
+
key: (r) => `t:${r.item.id}`
|
|
28606
|
+
});
|
|
28607
|
+
}
|
|
28237
28608
|
}
|
|
28238
28609
|
if (isAvailable() && scope !== "session") {
|
|
28239
28610
|
try {
|
|
@@ -28261,7 +28632,7 @@ async function runRecall(input) {
|
|
|
28261
28632
|
const distVectorHits = vectorSearchDistillations(queryVec, limit);
|
|
28262
28633
|
const distVectorTagged = distVectorHits.map((hit) => {
|
|
28263
28634
|
const row = db().query(
|
|
28264
|
-
"SELECT id, observations, generation, created_at, session_id FROM distillations WHERE id = ?"
|
|
28635
|
+
"SELECT id, observations, generation, created_at, session_id, c_norm FROM distillations WHERE id = ?"
|
|
28265
28636
|
).get(hit.id);
|
|
28266
28637
|
if (!row) return null;
|
|
28267
28638
|
return {
|
|
@@ -28324,6 +28695,57 @@ async function runRecall(input) {
|
|
|
28324
28695
|
info("recall: cross-project knowledge search failed:", err);
|
|
28325
28696
|
}
|
|
28326
28697
|
}
|
|
28698
|
+
{
|
|
28699
|
+
const distillationCandidates = [];
|
|
28700
|
+
for (const list4 of allRrfLists) {
|
|
28701
|
+
for (const item of list4.items) {
|
|
28702
|
+
if (item.source !== "distillation") continue;
|
|
28703
|
+
const key = `d:${item.item.id}`;
|
|
28704
|
+
const d = item.item;
|
|
28705
|
+
const cNorm = d.c_norm ?? 0;
|
|
28706
|
+
const ageDays = Math.min(
|
|
28707
|
+
(Date.now() - d.created_at) / 864e5,
|
|
28708
|
+
90
|
|
28709
|
+
);
|
|
28710
|
+
const score = cNorm + ageDays / 90 * 0.1;
|
|
28711
|
+
distillationCandidates.push({ tagged: item, key, qualityScore: score });
|
|
28712
|
+
}
|
|
28713
|
+
}
|
|
28714
|
+
if (distillationCandidates.length > 1) {
|
|
28715
|
+
const seen = /* @__PURE__ */ new Set();
|
|
28716
|
+
const unique = distillationCandidates.filter((c) => {
|
|
28717
|
+
if (seen.has(c.key)) return false;
|
|
28718
|
+
seen.add(c.key);
|
|
28719
|
+
return true;
|
|
28720
|
+
});
|
|
28721
|
+
unique.sort((a, b) => a.qualityScore - b.qualityScore);
|
|
28722
|
+
allRrfLists.push({
|
|
28723
|
+
items: unique.map((c) => c.tagged),
|
|
28724
|
+
key: (r) => `d:${r.item.id}`
|
|
28725
|
+
});
|
|
28726
|
+
}
|
|
28727
|
+
}
|
|
28728
|
+
if (filterTerms(query).length > 0 && allRrfLists.length > 0) {
|
|
28729
|
+
const allCandidates = /* @__PURE__ */ new Map();
|
|
28730
|
+
for (const list4 of allRrfLists) {
|
|
28731
|
+
for (const item of list4.items) {
|
|
28732
|
+
const key = list4.key(item);
|
|
28733
|
+
if (!allCandidates.has(key)) allCandidates.set(key, item);
|
|
28734
|
+
}
|
|
28735
|
+
}
|
|
28736
|
+
const candidateEntries = [...allCandidates.entries()];
|
|
28737
|
+
const exactRanked = exactTermMatchRank(
|
|
28738
|
+
candidateEntries,
|
|
28739
|
+
([, tagged]) => getTaggedText(tagged),
|
|
28740
|
+
query
|
|
28741
|
+
);
|
|
28742
|
+
if (exactRanked.length) {
|
|
28743
|
+
allRrfLists.push({
|
|
28744
|
+
items: exactRanked.map(([, item]) => item),
|
|
28745
|
+
key: taggedResultKey
|
|
28746
|
+
});
|
|
28747
|
+
}
|
|
28748
|
+
}
|
|
28327
28749
|
const fused = reciprocalRankFusion(allRrfLists);
|
|
28328
28750
|
return formatFusedResults(fused, 20);
|
|
28329
28751
|
}
|
|
@@ -28335,7 +28757,7 @@ var RECALL_PARAM_DESCRIPTIONS = {
|
|
|
28335
28757
|
|
|
28336
28758
|
// src/agents-file.ts
|
|
28337
28759
|
import { existsSync as existsSync3, readFileSync as readFileSync3, writeFileSync, mkdirSync as mkdirSync2 } from "fs";
|
|
28338
|
-
import { dirname as dirname2 } from "path";
|
|
28760
|
+
import { dirname as dirname2, join as join5 } from "path";
|
|
28339
28761
|
var LORE_SECTION_START = "<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/loreai) -->";
|
|
28340
28762
|
var LORE_SECTION_END = "<!-- End lore-managed section -->";
|
|
28341
28763
|
var ALL_START_MARKERS = [
|
|
@@ -28344,6 +28766,8 @@ var ALL_START_MARKERS = [
|
|
|
28344
28766
|
"<!-- This section is maintained by the coding agent via lore (https://github.com/BYK/opencode-lore) -->",
|
|
28345
28767
|
"<!-- This section is auto-maintained by lore (https://github.com/BYK/opencode-lore) -->"
|
|
28346
28768
|
];
|
|
28769
|
+
var LORE_FILE = ".lore.md";
|
|
28770
|
+
var LORE_FILE_HEADER = "<!-- Managed by lore (https://github.com/BYK/loreai) \u2014 manual edits are imported on next session. -->";
|
|
28347
28771
|
var UUID_RE2 = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/;
|
|
28348
28772
|
var MARKER_RE = /^<!--\s*lore:([0-9a-f-]+)\s*-->$/;
|
|
28349
28773
|
function splitFile(fileContent) {
|
|
@@ -28453,8 +28877,9 @@ function buildSection(projectPath) {
|
|
|
28453
28877
|
return out.join("\n");
|
|
28454
28878
|
}
|
|
28455
28879
|
function exportToFile(input) {
|
|
28456
|
-
|
|
28457
|
-
const
|
|
28880
|
+
exportLoreFile(input.projectPath);
|
|
28881
|
+
const pointerBody = "\n## Long-term Knowledge\n\nFor long-term knowledge entries managed by [lore](https://github.com/BYK/loreai) (gotchas, patterns, decisions, architecture), see [`.lore.md`](.lore.md) in the project root.\n";
|
|
28882
|
+
const newSection = LORE_SECTION_START + pointerBody + LORE_SECTION_END + "\n";
|
|
28458
28883
|
let fileContent = "";
|
|
28459
28884
|
if (existsSync3(input.filePath)) {
|
|
28460
28885
|
fileContent = readFileSync3(input.filePath, "utf8");
|
|
@@ -28478,15 +28903,9 @@ function shouldImport(input) {
|
|
|
28478
28903
|
const expected = buildSection(input.projectPath);
|
|
28479
28904
|
return hashSection(section) !== hashSection(expected);
|
|
28480
28905
|
}
|
|
28481
|
-
function
|
|
28482
|
-
if (!existsSync3(input.filePath)) return;
|
|
28483
|
-
const fileContent = readFileSync3(input.filePath, "utf8");
|
|
28484
|
-
const { section, before } = splitFile(fileContent);
|
|
28485
|
-
const textToParse = section ?? fileContent;
|
|
28486
|
-
const fileEntries = parseEntriesFromSection(textToParse);
|
|
28487
|
-
if (!fileEntries.length) return;
|
|
28906
|
+
function _importEntries(entries, projectPath) {
|
|
28488
28907
|
const seenIds = /* @__PURE__ */ new Set();
|
|
28489
|
-
for (const entry of
|
|
28908
|
+
for (const entry of entries) {
|
|
28490
28909
|
if (entry.id !== null) {
|
|
28491
28910
|
if (seenIds.has(entry.id)) continue;
|
|
28492
28911
|
seenIds.add(entry.id);
|
|
@@ -28497,7 +28916,7 @@ function importFromFile(input) {
|
|
|
28497
28916
|
}
|
|
28498
28917
|
} else {
|
|
28499
28918
|
create({
|
|
28500
|
-
projectPath
|
|
28919
|
+
projectPath,
|
|
28501
28920
|
category: entry.category,
|
|
28502
28921
|
title: entry.title,
|
|
28503
28922
|
content: entry.content,
|
|
@@ -28507,13 +28926,13 @@ function importFromFile(input) {
|
|
|
28507
28926
|
});
|
|
28508
28927
|
}
|
|
28509
28928
|
} else {
|
|
28510
|
-
const existing = forProject(
|
|
28929
|
+
const existing = forProject(projectPath, true);
|
|
28511
28930
|
const titleMatch = existing.find(
|
|
28512
28931
|
(e) => e.title.toLowerCase() === entry.title.toLowerCase()
|
|
28513
28932
|
);
|
|
28514
28933
|
if (!titleMatch) {
|
|
28515
28934
|
create({
|
|
28516
|
-
projectPath
|
|
28935
|
+
projectPath,
|
|
28517
28936
|
category: entry.category,
|
|
28518
28937
|
title: entry.title,
|
|
28519
28938
|
content: entry.content,
|
|
@@ -28524,16 +28943,50 @@ function importFromFile(input) {
|
|
|
28524
28943
|
}
|
|
28525
28944
|
}
|
|
28526
28945
|
}
|
|
28946
|
+
function importFromFile(input) {
|
|
28947
|
+
if (!existsSync3(input.filePath)) return;
|
|
28948
|
+
const fileContent = readFileSync3(input.filePath, "utf8");
|
|
28949
|
+
const { section } = splitFile(fileContent);
|
|
28950
|
+
const textToParse = section ?? fileContent;
|
|
28951
|
+
const fileEntries = parseEntriesFromSection(textToParse);
|
|
28952
|
+
if (!fileEntries.length) return;
|
|
28953
|
+
_importEntries(fileEntries, input.projectPath);
|
|
28954
|
+
}
|
|
28955
|
+
function loreFileExists(projectPath) {
|
|
28956
|
+
return existsSync3(join5(projectPath, LORE_FILE));
|
|
28957
|
+
}
|
|
28958
|
+
function exportLoreFile(projectPath) {
|
|
28959
|
+
const sectionBody = buildSection(projectPath);
|
|
28960
|
+
const content3 = LORE_FILE_HEADER + "\n" + sectionBody;
|
|
28961
|
+
writeFileSync(join5(projectPath, LORE_FILE), content3, "utf8");
|
|
28962
|
+
}
|
|
28963
|
+
function shouldImportLoreFile(projectPath) {
|
|
28964
|
+
const fp = join5(projectPath, LORE_FILE);
|
|
28965
|
+
if (!existsSync3(fp)) return false;
|
|
28966
|
+
const fileContent = readFileSync3(fp, "utf8");
|
|
28967
|
+
const expected = LORE_FILE_HEADER + "\n" + buildSection(projectPath);
|
|
28968
|
+
return hashSection(fileContent) !== hashSection(expected);
|
|
28969
|
+
}
|
|
28970
|
+
function importLoreFile(projectPath) {
|
|
28971
|
+
const fp = join5(projectPath, LORE_FILE);
|
|
28972
|
+
if (!existsSync3(fp)) return;
|
|
28973
|
+
const fileContent = readFileSync3(fp, "utf8");
|
|
28974
|
+
const fileEntries = parseEntriesFromSection(fileContent);
|
|
28975
|
+
if (!fileEntries.length) return;
|
|
28976
|
+
_importEntries(fileEntries, projectPath);
|
|
28977
|
+
}
|
|
28527
28978
|
|
|
28528
28979
|
// src/worker-model.ts
|
|
28529
28980
|
var worker_model_exports = {};
|
|
28530
28981
|
__export(worker_model_exports, {
|
|
28531
28982
|
WORKER_JUDGE_SYSTEM: () => WORKER_JUDGE_SYSTEM,
|
|
28983
|
+
clearValidatedWorkerModel: () => clearValidatedWorkerModel,
|
|
28532
28984
|
computeModelFingerprint: () => computeModelFingerprint,
|
|
28533
28985
|
getValidatedWorkerModel: () => getValidatedWorkerModel,
|
|
28534
28986
|
isValidationStale: () => isValidationStale,
|
|
28535
28987
|
parseJudgeScore: () => parseJudgeScore,
|
|
28536
28988
|
resolveWorkerModel: () => resolveWorkerModel,
|
|
28989
|
+
runValidation: () => runValidation,
|
|
28537
28990
|
selectWorkerCandidates: () => selectWorkerCandidates,
|
|
28538
28991
|
storeValidatedWorkerModel: () => storeValidatedWorkerModel,
|
|
28539
28992
|
structuralCheck: () => structuralCheck,
|
|
@@ -28545,7 +28998,13 @@ function selectWorkerCandidates(sessionModel, providerModels) {
|
|
|
28545
28998
|
(m) => m.providerID === sessionModel.providerID && m.status === "active" && m.capabilities.input.text
|
|
28546
28999
|
);
|
|
28547
29000
|
if (eligible.length === 0) return [];
|
|
28548
|
-
const sorted = [...eligible].sort((a, b) =>
|
|
29001
|
+
const sorted = [...eligible].sort((a, b) => {
|
|
29002
|
+
const costDiff = a.cost.input - b.cost.input;
|
|
29003
|
+
if (costDiff !== 0) return costDiff;
|
|
29004
|
+
const aReasoning = a.capabilities.reasoning ? 1 : 0;
|
|
29005
|
+
const bReasoning = b.capabilities.reasoning ? 1 : 0;
|
|
29006
|
+
return aReasoning - bReasoning;
|
|
29007
|
+
});
|
|
28549
29008
|
const cheapest = sorted[0];
|
|
28550
29009
|
const belowSession = sorted.filter((m) => m.cost.input < sessionModel.cost.input).pop();
|
|
28551
29010
|
const candidates = /* @__PURE__ */ new Map();
|
|
@@ -28580,6 +29039,9 @@ function storeValidatedWorkerModel(result) {
|
|
|
28580
29039
|
"INSERT INTO kv_meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = ?"
|
|
28581
29040
|
).run(key, value, value);
|
|
28582
29041
|
}
|
|
29042
|
+
function clearValidatedWorkerModel(providerID) {
|
|
29043
|
+
db().query("DELETE FROM kv_meta WHERE key = ?").run(`${KV_PREFIX}${providerID}`);
|
|
29044
|
+
}
|
|
28583
29045
|
function isValidationStale(stored, currentFingerprint) {
|
|
28584
29046
|
if (!stored) return true;
|
|
28585
29047
|
return stored.fingerprint !== currentFingerprint;
|
|
@@ -28638,10 +29100,85 @@ function parseJudgeScore(response) {
|
|
|
28638
29100
|
if (!match) return null;
|
|
28639
29101
|
return parseInt(match[1], 10);
|
|
28640
29102
|
}
|
|
29103
|
+
async function runValidation(input) {
|
|
29104
|
+
const { llm, candidates, referenceObservations, sourceMessagesText, date: date5 } = input;
|
|
29105
|
+
const userPrompt = distillationUser({
|
|
29106
|
+
messages: sourceMessagesText,
|
|
29107
|
+
date: date5
|
|
29108
|
+
});
|
|
29109
|
+
for (const candidate of candidates) {
|
|
29110
|
+
if (candidate.id === input.sessionModelID) continue;
|
|
29111
|
+
let candidateObservations = null;
|
|
29112
|
+
try {
|
|
29113
|
+
const raw = await llm.prompt(DISTILLATION_SYSTEM, userPrompt, {
|
|
29114
|
+
model: { providerID: candidate.providerID, modelID: candidate.id },
|
|
29115
|
+
workerID: "lore-distill",
|
|
29116
|
+
thinking: false
|
|
29117
|
+
});
|
|
29118
|
+
if (raw) {
|
|
29119
|
+
const match = raw.match(/<observations>([\s\S]*?)<\/observations>/);
|
|
29120
|
+
candidateObservations = match ? match[1].trim() : raw.trim();
|
|
29121
|
+
}
|
|
29122
|
+
} catch (e) {
|
|
29123
|
+
warn(`worker model validation: candidate ${candidate.id} failed:`, e);
|
|
29124
|
+
continue;
|
|
29125
|
+
}
|
|
29126
|
+
const structural = structuralCheck(candidateObservations, referenceObservations);
|
|
29127
|
+
if (!structural.passed) {
|
|
29128
|
+
info(
|
|
29129
|
+
`worker model validation: ${candidate.id} failed structural check: ${structural.reason}`
|
|
29130
|
+
);
|
|
29131
|
+
continue;
|
|
29132
|
+
}
|
|
29133
|
+
let judgeScore = null;
|
|
29134
|
+
try {
|
|
29135
|
+
const judgeResponse = await llm.prompt(
|
|
29136
|
+
WORKER_JUDGE_SYSTEM,
|
|
29137
|
+
workerJudgeUser(referenceObservations, candidateObservations),
|
|
29138
|
+
{ workerID: "lore-distill", thinking: false }
|
|
29139
|
+
// use session model (no model override)
|
|
29140
|
+
);
|
|
29141
|
+
if (judgeResponse) {
|
|
29142
|
+
judgeScore = parseJudgeScore(judgeResponse);
|
|
29143
|
+
}
|
|
29144
|
+
} catch (e) {
|
|
29145
|
+
warn(`worker model validation: judge call failed for ${candidate.id}:`, e);
|
|
29146
|
+
}
|
|
29147
|
+
if (judgeScore !== null && judgeScore < 3) {
|
|
29148
|
+
info(
|
|
29149
|
+
`worker model validation: ${candidate.id} failed judge (score=${judgeScore})`
|
|
29150
|
+
);
|
|
29151
|
+
continue;
|
|
29152
|
+
}
|
|
29153
|
+
const fingerprint = computeModelFingerprint(
|
|
29154
|
+
input.providerID,
|
|
29155
|
+
input.sessionModelID,
|
|
29156
|
+
candidates.map((c) => c.id)
|
|
29157
|
+
);
|
|
29158
|
+
const result = {
|
|
29159
|
+
modelID: candidate.id,
|
|
29160
|
+
providerID: candidate.providerID,
|
|
29161
|
+
fingerprint,
|
|
29162
|
+
validatedAt: Date.now(),
|
|
29163
|
+
judgeScore
|
|
29164
|
+
};
|
|
29165
|
+
storeValidatedWorkerModel(result);
|
|
29166
|
+
info(
|
|
29167
|
+
`worker model validated: ${candidate.id} (judge=${judgeScore}) for provider ${input.providerID}`
|
|
29168
|
+
);
|
|
29169
|
+
return result;
|
|
29170
|
+
}
|
|
29171
|
+
clearValidatedWorkerModel(input.providerID);
|
|
29172
|
+
info(
|
|
29173
|
+
`worker model validation: no candidate passed for ${input.providerID} \u2014 cleared stale entry`
|
|
29174
|
+
);
|
|
29175
|
+
return null;
|
|
29176
|
+
}
|
|
28641
29177
|
function resolveWorkerModel(providerID, configWorkerModel, configModel) {
|
|
28642
29178
|
if (configWorkerModel) return configWorkerModel;
|
|
28643
29179
|
const validated = getValidatedWorkerModel(providerID);
|
|
28644
|
-
|
|
29180
|
+
const MAX_AGE_MS = 24 * 60 * 60 * 1e3;
|
|
29181
|
+
if (validated && Date.now() - validated.validatedAt <= MAX_AGE_MS) {
|
|
28645
29182
|
return { providerID: validated.providerID, modelID: validated.modelID };
|
|
28646
29183
|
}
|
|
28647
29184
|
return configModel;
|
|
@@ -28652,11 +29189,11 @@ export {
|
|
|
28652
29189
|
CURATOR_SYSTEM,
|
|
28653
29190
|
DISTILLATION_SYSTEM,
|
|
28654
29191
|
EMPTY_QUERY,
|
|
29192
|
+
LORE_FILE,
|
|
28655
29193
|
QUERY_EXPANSION_SYSTEM,
|
|
28656
29194
|
RECALL_PARAM_DESCRIPTIONS,
|
|
28657
29195
|
RECALL_TOOL_DESCRIPTION,
|
|
28658
29196
|
RECURSIVE_SYSTEM,
|
|
28659
|
-
WORKER_JUDGE_SYSTEM,
|
|
28660
29197
|
buildCompactPrompt,
|
|
28661
29198
|
calibrate,
|
|
28662
29199
|
close,
|
|
@@ -28671,7 +29208,9 @@ export {
|
|
|
28671
29208
|
distillationUser,
|
|
28672
29209
|
embedding_exports as embedding,
|
|
28673
29210
|
ensureProject,
|
|
29211
|
+
exactTermMatchRank,
|
|
28674
29212
|
expandQuery,
|
|
29213
|
+
exportLoreFile,
|
|
28675
29214
|
exportToFile,
|
|
28676
29215
|
extractTopTerms,
|
|
28677
29216
|
formatDistillations,
|
|
@@ -28680,10 +29219,12 @@ export {
|
|
|
28680
29219
|
ftsQueryOr,
|
|
28681
29220
|
getLastTransformEstimate,
|
|
28682
29221
|
getLastTransformedCount,
|
|
29222
|
+
getLastTurnAt,
|
|
28683
29223
|
getLtmBudget,
|
|
28684
29224
|
getLtmTokens,
|
|
28685
29225
|
h,
|
|
28686
29226
|
importFromFile,
|
|
29227
|
+
importLoreFile,
|
|
28687
29228
|
inline,
|
|
28688
29229
|
inspectSessionState,
|
|
28689
29230
|
isFirstRun,
|
|
@@ -28697,11 +29238,13 @@ export {
|
|
|
28697
29238
|
load,
|
|
28698
29239
|
loadForceMinLayer,
|
|
28699
29240
|
log_exports as log,
|
|
29241
|
+
loreFileExists,
|
|
28700
29242
|
ltm_exports as ltm,
|
|
28701
29243
|
needsUrgentDistillation,
|
|
28702
29244
|
normalize,
|
|
28703
29245
|
onIdleResume,
|
|
28704
29246
|
p,
|
|
29247
|
+
pattern_extract_exports as patternExtract,
|
|
28705
29248
|
projectId,
|
|
28706
29249
|
projectName,
|
|
28707
29250
|
reciprocalRankFusion,
|
|
@@ -28717,6 +29260,7 @@ export {
|
|
|
28717
29260
|
setMaxLayer0Tokens,
|
|
28718
29261
|
setModelLimits,
|
|
28719
29262
|
shouldImport,
|
|
29263
|
+
shouldImportLoreFile,
|
|
28720
29264
|
strong2 as strong,
|
|
28721
29265
|
t,
|
|
28722
29266
|
temporal_exports as temporal,
|
|
@@ -28724,7 +29268,6 @@ export {
|
|
|
28724
29268
|
transform2 as transform,
|
|
28725
29269
|
ul,
|
|
28726
29270
|
unescapeMarkdown,
|
|
28727
|
-
workerJudgeUser,
|
|
28728
29271
|
worker_model_exports as workerModel,
|
|
28729
29272
|
workerSessionIDs
|
|
28730
29273
|
};
|