@chendpoc/pi-memory 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +180 -0
- package/dist/adapters/piComplete.d.ts +17 -0
- package/dist/adapters/piComplete.d.ts.map +1 -0
- package/dist/adapters/piComplete.js +169 -0
- package/dist/adapters/piComplete.js.map +1 -0
- package/dist/bundle/install.d.ts +34 -0
- package/dist/bundle/install.d.ts.map +1 -0
- package/dist/bundle/install.js +183 -0
- package/dist/bundle/install.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +245 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +27 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +49 -0
- package/dist/config.js.map +1 -0
- package/dist/errclass.d.ts +7 -0
- package/dist/errclass.d.ts.map +1 -0
- package/dist/errclass.js +32 -0
- package/dist/errclass.js.map +1 -0
- package/dist/extension.d.ts +24 -0
- package/dist/extension.d.ts.map +1 -0
- package/dist/extension.js +7 -0
- package/dist/extension.js.map +1 -0
- package/dist/fallback/index.d.ts +11 -0
- package/dist/fallback/index.d.ts.map +1 -0
- package/dist/fallback/index.js +16 -0
- package/dist/fallback/index.js.map +1 -0
- package/dist/fallback/llmRerank.d.ts +19 -0
- package/dist/fallback/llmRerank.d.ts.map +1 -0
- package/dist/fallback/llmRerank.js +60 -0
- package/dist/fallback/llmRerank.js.map +1 -0
- package/dist/fallback/memoryMd.d.ts +6 -0
- package/dist/fallback/memoryMd.d.ts.map +1 -0
- package/dist/fallback/memoryMd.js +35 -0
- package/dist/fallback/memoryMd.js.map +1 -0
- package/dist/fallback/sessionIndex.d.ts +35 -0
- package/dist/fallback/sessionIndex.d.ts.map +1 -0
- package/dist/fallback/sessionIndex.js +222 -0
- package/dist/fallback/sessionIndex.js.map +1 -0
- package/dist/fallback/sessionSearch.d.ts +18 -0
- package/dist/fallback/sessionSearch.d.ts.map +1 -0
- package/dist/fallback/sessionSearch.js +161 -0
- package/dist/fallback/sessionSearch.js.map +1 -0
- package/dist/index.d.ts +25 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +24 -0
- package/dist/index.js.map +1 -0
- package/dist/paths.d.ts +7 -0
- package/dist/paths.d.ts.map +1 -0
- package/dist/paths.js +26 -0
- package/dist/paths.js.map +1 -0
- package/dist/pi-extension.d.ts +6 -0
- package/dist/pi-extension.d.ts.map +1 -0
- package/dist/pi-extension.js +224 -0
- package/dist/pi-extension.js.map +1 -0
- package/dist/preflight/detectIntents.d.ts +102 -0
- package/dist/preflight/detectIntents.d.ts.map +1 -0
- package/dist/preflight/detectIntents.js +624 -0
- package/dist/preflight/detectIntents.js.map +1 -0
- package/dist/preflight/hook.d.ts +58 -0
- package/dist/preflight/hook.d.ts.map +1 -0
- package/dist/preflight/hook.js +77 -0
- package/dist/preflight/hook.js.map +1 -0
- package/dist/preflight/render.d.ts +21 -0
- package/dist/preflight/render.d.ts.map +1 -0
- package/dist/preflight/render.js +132 -0
- package/dist/preflight/render.js.map +1 -0
- package/dist/preflight/strip.d.ts +11 -0
- package/dist/preflight/strip.d.ts.map +1 -0
- package/dist/preflight/strip.js +46 -0
- package/dist/preflight/strip.js.map +1 -0
- package/dist/service.d.ts +56 -0
- package/dist/service.d.ts.map +1 -0
- package/dist/service.js +158 -0
- package/dist/service.js.map +1 -0
- package/dist/sidecar/bundle.d.ts +19 -0
- package/dist/sidecar/bundle.d.ts.map +1 -0
- package/dist/sidecar/bundle.js +39 -0
- package/dist/sidecar/bundle.js.map +1 -0
- package/dist/sidecar/client.d.ts +17 -0
- package/dist/sidecar/client.d.ts.map +1 -0
- package/dist/sidecar/client.js +107 -0
- package/dist/sidecar/client.js.map +1 -0
- package/dist/sidecar/process.d.ts +14 -0
- package/dist/sidecar/process.d.ts.map +1 -0
- package/dist/sidecar/process.js +126 -0
- package/dist/sidecar/process.js.map +1 -0
- package/dist/tools/memoryAppend.d.ts +37 -0
- package/dist/tools/memoryAppend.d.ts.map +1 -0
- package/dist/tools/memoryAppend.js +99 -0
- package/dist/tools/memoryAppend.js.map +1 -0
- package/dist/tools/memoryRecall.d.ts +113 -0
- package/dist/tools/memoryRecall.d.ts.map +1 -0
- package/dist/tools/memoryRecall.js +325 -0
- package/dist/tools/memoryRecall.js.map +1 -0
- package/dist/trainer/bundleBuilder.d.ts +30 -0
- package/dist/trainer/bundleBuilder.d.ts.map +1 -0
- package/dist/trainer/bundleBuilder.js +106 -0
- package/dist/trainer/bundleBuilder.js.map +1 -0
- package/dist/trainer/bundleLoader.d.ts +12 -0
- package/dist/trainer/bundleLoader.d.ts.map +1 -0
- package/dist/trainer/bundleLoader.js +59 -0
- package/dist/trainer/bundleLoader.js.map +1 -0
- package/dist/trainer/deltaMerge.d.ts +38 -0
- package/dist/trainer/deltaMerge.d.ts.map +1 -0
- package/dist/trainer/deltaMerge.js +183 -0
- package/dist/trainer/deltaMerge.js.map +1 -0
- package/dist/trainer/entityResolver.d.ts +27 -0
- package/dist/trainer/entityResolver.d.ts.map +1 -0
- package/dist/trainer/entityResolver.js +92 -0
- package/dist/trainer/entityResolver.js.map +1 -0
- package/dist/trainer/extractFacts.d.ts +67 -0
- package/dist/trainer/extractFacts.d.ts.map +1 -0
- package/dist/trainer/extractFacts.js +213 -0
- package/dist/trainer/extractFacts.js.map +1 -0
- package/dist/trainer/index.d.ts +54 -0
- package/dist/trainer/index.d.ts.map +1 -0
- package/dist/trainer/index.js +82 -0
- package/dist/trainer/index.js.map +1 -0
- package/dist/trainer/llmExtractor.d.ts +16 -0
- package/dist/trainer/llmExtractor.d.ts.map +1 -0
- package/dist/trainer/llmExtractor.js +146 -0
- package/dist/trainer/llmExtractor.js.map +1 -0
- package/dist/trainer/marker.d.ts +10 -0
- package/dist/trainer/marker.d.ts.map +1 -0
- package/dist/trainer/marker.js +28 -0
- package/dist/trainer/marker.js.map +1 -0
- package/dist/trainer/scheduler.d.ts +31 -0
- package/dist/trainer/scheduler.d.ts.map +1 -0
- package/dist/trainer/scheduler.js +72 -0
- package/dist/trainer/scheduler.js.map +1 -0
- package/dist/trainer/sessionLoader.d.ts +23 -0
- package/dist/trainer/sessionLoader.d.ts.map +1 -0
- package/dist/trainer/sessionLoader.js +106 -0
- package/dist/trainer/sessionLoader.js.map +1 -0
- package/dist/types.d.ts +135 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +8 -0
- package/dist/types.js.map +1 -0
- package/package.json +78 -0
- package/src/adapters/piComplete.ts +233 -0
- package/src/bundle/install.ts +206 -0
- package/src/cli.ts +254 -0
- package/src/config.ts +92 -0
- package/src/errclass.ts +37 -0
- package/src/extension.ts +23 -0
- package/src/fallback/index.ts +24 -0
- package/src/fallback/llmRerank.ts +90 -0
- package/src/fallback/memoryMd.ts +36 -0
- package/src/fallback/sessionIndex.ts +289 -0
- package/src/fallback/sessionSearch.ts +181 -0
- package/src/index.ts +213 -0
- package/src/paths.ts +28 -0
- package/src/pi-extension.ts +276 -0
- package/src/preflight/detectIntents.ts +654 -0
- package/src/preflight/hook.ts +136 -0
- package/src/preflight/render.ts +185 -0
- package/src/preflight/strip.ts +50 -0
- package/src/service.ts +202 -0
- package/src/sidecar/bundle.ts +52 -0
- package/src/sidecar/client.ts +166 -0
- package/src/sidecar/process.ts +145 -0
- package/src/tools/memoryAppend.ts +113 -0
- package/src/tools/memoryRecall.ts +364 -0
- package/src/trainer/bundleBuilder.ts +192 -0
- package/src/trainer/bundleLoader.ts +105 -0
- package/src/trainer/deltaMerge.ts +221 -0
- package/src/trainer/entityResolver.ts +140 -0
- package/src/trainer/extractFacts.ts +312 -0
- package/src/trainer/index.ts +147 -0
- package/src/trainer/llmExtractor.ts +206 -0
- package/src/trainer/marker.ts +30 -0
- package/src/trainer/scheduler.ts +104 -0
- package/src/trainer/sessionLoader.ts +139 -0
- package/src/types.ts +168 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
import { installBundle } from "../bundle/install.js";
|
|
2
|
+
import { defaultBundleRoot, defaultSessionsDir } from "../paths.js";
|
|
3
|
+
|
|
4
|
+
import { loadSessions } from "./sessionLoader.js";
|
|
5
|
+
import { extractFactsFromSessions, type ExtractFactsOptions } from "./extractFacts.js";
|
|
6
|
+
import { resolveEntities } from "./entityResolver.js";
|
|
7
|
+
import { buildBundle, type BuildBundleResult } from "./bundleBuilder.js";
|
|
8
|
+
import { readMarker, writeMarker } from "./marker.js";
|
|
9
|
+
import { loadExistingBundle } from "./bundleLoader.js";
|
|
10
|
+
import { deltaMerge, type DeltaLog } from "./deltaMerge.js";
|
|
11
|
+
|
|
12
|
+
export interface TrainBundleConfig {
|
|
13
|
+
sessionsDir?: string;
|
|
14
|
+
bundleRoot?: string;
|
|
15
|
+
/** Ignore marker and rebuild from all sessions. */
|
|
16
|
+
full?: boolean;
|
|
17
|
+
/** Show what would be extracted without writing. */
|
|
18
|
+
dryRun?: boolean;
|
|
19
|
+
/** Bundle version to stamp (default "0.6.0"). */
|
|
20
|
+
bundleVersion?: string;
|
|
21
|
+
/** Extraction options (optional LLM extractor). */
|
|
22
|
+
extractOpts?: ExtractFactsOptions;
|
|
23
|
+
/** Skip delta merge — full rebuild even if existing bundle present (default false). */
|
|
24
|
+
noMerge?: boolean;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface TrainBundleResult {
|
|
28
|
+
sessionsProcessed: number;
|
|
29
|
+
entityCount: number;
|
|
30
|
+
relationCount: number;
|
|
31
|
+
eventCount: number;
|
|
32
|
+
bundleResult?: BuildBundleResult;
|
|
33
|
+
installResult?: {
|
|
34
|
+
bundle_ts: string;
|
|
35
|
+
bundle_version: string;
|
|
36
|
+
installed_dir: string;
|
|
37
|
+
files_copied: number;
|
|
38
|
+
};
|
|
39
|
+
dryRun: boolean;
|
|
40
|
+
/** Delta operation log when merge was performed. */
|
|
41
|
+
delta?: DeltaLog;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Full training pipeline:
|
|
46
|
+
* load existing bundle → extract new → delta merge → build → install → update marker.
|
|
47
|
+
*
|
|
48
|
+
* When `noMerge` is true (or no existing bundle), falls back to full rebuild.
|
|
49
|
+
*/
|
|
50
|
+
export async function trainBundle(
|
|
51
|
+
config: TrainBundleConfig = {},
|
|
52
|
+
): Promise<TrainBundleResult> {
|
|
53
|
+
const sessionsDir = config.sessionsDir ?? defaultSessionsDir();
|
|
54
|
+
const bundleRoot = config.bundleRoot ?? defaultBundleRoot();
|
|
55
|
+
|
|
56
|
+
let modifiedAfter: Date | null = null;
|
|
57
|
+
if (!config.full) {
|
|
58
|
+
modifiedAfter = await readMarker(bundleRoot);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const sessions = await loadSessions({ sessionsDir, modifiedAfter });
|
|
62
|
+
|
|
63
|
+
if (sessions.length === 0) {
|
|
64
|
+
return {
|
|
65
|
+
sessionsProcessed: 0,
|
|
66
|
+
entityCount: 0,
|
|
67
|
+
relationCount: 0,
|
|
68
|
+
eventCount: 0,
|
|
69
|
+
dryRun: config.dryRun ?? false,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const extracted = await extractFactsFromSessions(sessions, config.extractOpts);
|
|
74
|
+
const newGraph = resolveEntities(extracted.entities, extracted.relations);
|
|
75
|
+
|
|
76
|
+
let finalGraph = newGraph;
|
|
77
|
+
let finalEvents = extracted.events;
|
|
78
|
+
let deltaLog: DeltaLog | undefined;
|
|
79
|
+
|
|
80
|
+
if (!config.noMerge) {
|
|
81
|
+
const existingBundle = await loadExistingBundle(bundleRoot);
|
|
82
|
+
if (existingBundle) {
|
|
83
|
+
const merged = deltaMerge(
|
|
84
|
+
existingBundle,
|
|
85
|
+
{ graph: newGraph, events: extracted.events },
|
|
86
|
+
);
|
|
87
|
+
finalGraph = merged.graph;
|
|
88
|
+
finalEvents = merged.events;
|
|
89
|
+
deltaLog = merged.delta;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (config.dryRun) {
|
|
94
|
+
return {
|
|
95
|
+
sessionsProcessed: sessions.length,
|
|
96
|
+
entityCount: finalGraph.entities.length,
|
|
97
|
+
relationCount: finalGraph.relations.length,
|
|
98
|
+
eventCount: finalEvents.length,
|
|
99
|
+
dryRun: true,
|
|
100
|
+
delta: deltaLog,
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const bundleResult = await buildBundle(
|
|
105
|
+
{ graph: finalGraph, events: finalEvents },
|
|
106
|
+
{ outputDir: bundleRoot, bundleVersion: config.bundleVersion },
|
|
107
|
+
);
|
|
108
|
+
|
|
109
|
+
const installResult = await installBundle({
|
|
110
|
+
bundleRoot,
|
|
111
|
+
sourceDir: bundleResult.bundleDir,
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
const latestMtime = sessions.reduce(
|
|
115
|
+
(max, s) => (s.modifiedAt > max ? s.modifiedAt : max),
|
|
116
|
+
sessions[0]!.modifiedAt,
|
|
117
|
+
);
|
|
118
|
+
await writeMarker(bundleRoot, latestMtime);
|
|
119
|
+
|
|
120
|
+
return {
|
|
121
|
+
sessionsProcessed: sessions.length,
|
|
122
|
+
entityCount: bundleResult.stats.entityCount,
|
|
123
|
+
relationCount: bundleResult.stats.edgeCount,
|
|
124
|
+
eventCount: bundleResult.stats.eventCount,
|
|
125
|
+
bundleResult,
|
|
126
|
+
installResult,
|
|
127
|
+
dryRun: false,
|
|
128
|
+
delta: deltaLog,
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
export { loadSessions } from "./sessionLoader.js";
|
|
133
|
+
export type { LoadedSession, SessionTurn, SessionLoaderOptions } from "./sessionLoader.js";
|
|
134
|
+
export { extractFacts, extractFactsFromSessions, RELATION_CATALOG, ALL_RELATIONS } from "./extractFacts.js";
|
|
135
|
+
export type {
|
|
136
|
+
ExtractedEntity, ExtractedRelation, ExtractedEvent,
|
|
137
|
+
ExtractionResult, EntityType, LLMFactExtractor, ExtractFactsOptions,
|
|
138
|
+
} from "./extractFacts.js";
|
|
139
|
+
export { resolveEntities } from "./entityResolver.js";
|
|
140
|
+
export type { ResolvedEntity, ResolvedRelation, ResolvedGraph } from "./entityResolver.js";
|
|
141
|
+
export { buildBundle } from "./bundleBuilder.js";
|
|
142
|
+
export type { BundleData, BuildBundleOptions, BuildBundleResult } from "./bundleBuilder.js";
|
|
143
|
+
export { readMarker, writeMarker } from "./marker.js";
|
|
144
|
+
export { loadExistingBundle } from "./bundleLoader.js";
|
|
145
|
+
export type { ExistingBundle } from "./bundleLoader.js";
|
|
146
|
+
export { deltaMerge } from "./deltaMerge.js";
|
|
147
|
+
export type { DeltaOp, DeltaLogEntry, DeltaLog, MergeResult } from "./deltaMerge.js";
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import type { SessionTurn } from "./sessionLoader.js";
|
|
2
|
+
import type {
|
|
3
|
+
ExtractedEntity,
|
|
4
|
+
ExtractedRelation,
|
|
5
|
+
ExtractedEvent,
|
|
6
|
+
LLMFactExtractor,
|
|
7
|
+
EntityType,
|
|
8
|
+
} from "./extractFacts.js";
|
|
9
|
+
import { extractFacts } from "./extractFacts.js";
|
|
10
|
+
import type { LoadedSession } from "./sessionLoader.js";
|
|
11
|
+
import { ALL_RELATIONS } from "./extractFacts.js";
|
|
12
|
+
|
|
13
|
+
/** Provider-agnostic LLM client — any backend that can complete a prompt. */
|
|
14
|
+
export interface LLMClient {
|
|
15
|
+
complete(prompt: string): Promise<string>;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export interface LLMExtractorOptions {
|
|
19
|
+
client: LLMClient;
|
|
20
|
+
/** How many turns to send per LLM call (default 10). */
|
|
21
|
+
batchSize?: number;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const VALID_ENTITY_TYPES = new Set<string>([
|
|
25
|
+
"Person", "Project", "Tool", "Company", "Organization",
|
|
26
|
+
"Location", "Document",
|
|
27
|
+
]);
|
|
28
|
+
|
|
29
|
+
const VALID_ENTITY_TYPES_FULL = new Set<string>([
|
|
30
|
+
...VALID_ENTITY_TYPES,
|
|
31
|
+
"Language", "Concept", "File", "Product", "Website", "Platform", "Unknown",
|
|
32
|
+
]);
|
|
33
|
+
|
|
34
|
+
const RELATION_SET: Set<string> = new Set(ALL_RELATIONS);
|
|
35
|
+
|
|
36
|
+
function buildPrompt(turns: SessionTurn[], sessionId: string): string {
|
|
37
|
+
const turnsText = turns
|
|
38
|
+
.map((t) => `[${t.role} #${t.turnIndex}]: ${t.content}`)
|
|
39
|
+
.join("\n\n");
|
|
40
|
+
|
|
41
|
+
return `You are a structured fact extractor. Analyze the following conversation turns and extract:
|
|
42
|
+
|
|
43
|
+
1. **Entities** — people, projects, tools, companies, organizations, locations, documents mentioned.
|
|
44
|
+
Each entity: { "name": string, "type": one of "Person"|"Project"|"Tool"|"Company"|"Organization"|"Location"|"Document" }
|
|
45
|
+
|
|
46
|
+
2. **Relations** — connections between entities. Use ONLY these relation types:
|
|
47
|
+
${ALL_RELATIONS.join(", ")}
|
|
48
|
+
Each relation: { "head": string, "relation": string, "tail": string, "turn_index": number, "evidence": string, "negated": boolean }
|
|
49
|
+
Set negated=true only when the text explicitly says something is no longer true (e.g. "no longer uses X").
|
|
50
|
+
|
|
51
|
+
3. **Events** — decisions, milestones, deadlines, deployments.
|
|
52
|
+
Each event: { "description": string, "turn_index": number }
|
|
53
|
+
|
|
54
|
+
Session ID: ${sessionId}
|
|
55
|
+
|
|
56
|
+
Conversation:
|
|
57
|
+
${turnsText}
|
|
58
|
+
|
|
59
|
+
Respond with ONLY a JSON object (no markdown fences, no explanation):
|
|
60
|
+
{
|
|
61
|
+
"entities": [...],
|
|
62
|
+
"relations": [...],
|
|
63
|
+
"events": [...]
|
|
64
|
+
}`;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function parseEntityType(raw: string): EntityType {
|
|
68
|
+
if (VALID_ENTITY_TYPES_FULL.has(raw)) return raw as EntityType;
|
|
69
|
+
return "Unknown";
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
interface RawLLMEntity {
|
|
73
|
+
name?: string;
|
|
74
|
+
type?: string;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
interface RawLLMRelation {
|
|
78
|
+
head?: string;
|
|
79
|
+
relation?: string;
|
|
80
|
+
tail?: string;
|
|
81
|
+
turn_index?: number;
|
|
82
|
+
evidence?: string;
|
|
83
|
+
negated?: boolean;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
interface RawLLMEvent {
|
|
87
|
+
description?: string;
|
|
88
|
+
turn_index?: number;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
interface RawLLMResponse {
|
|
92
|
+
entities?: RawLLMEntity[];
|
|
93
|
+
relations?: RawLLMRelation[];
|
|
94
|
+
events?: RawLLMEvent[];
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function parseLLMResponse(
|
|
98
|
+
raw: string,
|
|
99
|
+
sessionId: string,
|
|
100
|
+
turns: SessionTurn[],
|
|
101
|
+
): { entities: ExtractedEntity[]; relations: ExtractedRelation[]; events: ExtractedEvent[] } {
|
|
102
|
+
const cleaned = raw.replace(/^```(?:json)?\s*/m, "").replace(/\s*```\s*$/m, "").trim();
|
|
103
|
+
const parsed: RawLLMResponse = JSON.parse(cleaned);
|
|
104
|
+
|
|
105
|
+
const entities: ExtractedEntity[] = [];
|
|
106
|
+
if (Array.isArray(parsed.entities)) {
|
|
107
|
+
for (const e of parsed.entities) {
|
|
108
|
+
const name = typeof e.name === "string" ? e.name.trim() : "";
|
|
109
|
+
if (!name || name.length > 100) continue;
|
|
110
|
+
entities.push({
|
|
111
|
+
name,
|
|
112
|
+
type: parseEntityType(typeof e.type === "string" ? e.type : "Unknown"),
|
|
113
|
+
mentions: [{
|
|
114
|
+
sessionId,
|
|
115
|
+
turnIndex: 0,
|
|
116
|
+
snippet: name,
|
|
117
|
+
}],
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const relations: ExtractedRelation[] = [];
|
|
123
|
+
if (Array.isArray(parsed.relations)) {
|
|
124
|
+
for (const r of parsed.relations) {
|
|
125
|
+
const head = typeof r.head === "string" ? r.head.trim() : "";
|
|
126
|
+
const tail = typeof r.tail === "string" ? r.tail.trim() : "";
|
|
127
|
+
const relation = typeof r.relation === "string" ? r.relation.trim() : "";
|
|
128
|
+
if (!head || !tail || !relation) continue;
|
|
129
|
+
if (!RELATION_SET.has(relation)) continue;
|
|
130
|
+
const turnIdx = typeof r.turn_index === "number" ? r.turn_index : 0;
|
|
131
|
+
relations.push({
|
|
132
|
+
headName: head,
|
|
133
|
+
relation,
|
|
134
|
+
tailName: tail,
|
|
135
|
+
sessionId,
|
|
136
|
+
turnIndex: turnIdx,
|
|
137
|
+
evidence: typeof r.evidence === "string" ? r.evidence.slice(0, 200) : "",
|
|
138
|
+
negated: r.negated === true ? true : undefined,
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const events: ExtractedEvent[] = [];
|
|
144
|
+
if (Array.isArray(parsed.events)) {
|
|
145
|
+
for (const ev of parsed.events) {
|
|
146
|
+
const desc = typeof ev.description === "string" ? ev.description.trim() : "";
|
|
147
|
+
if (!desc) continue;
|
|
148
|
+
const turnIdx = typeof ev.turn_index === "number" ? ev.turn_index : 0;
|
|
149
|
+
const matchingTurn = turns.find((t) => t.turnIndex === turnIdx);
|
|
150
|
+
events.push({
|
|
151
|
+
description: desc.slice(0, 300),
|
|
152
|
+
sessionId,
|
|
153
|
+
timestamp: new Date().toISOString(),
|
|
154
|
+
turnIndex: matchingTurn?.turnIndex ?? turnIdx,
|
|
155
|
+
});
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return { entities, relations, events };
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
/**
|
|
163
|
+
* LLM-backed fact extractor. Sends turn batches to the LLM, parses
|
|
164
|
+
* structured JSON. Falls back to regex extraction on any LLM failure.
|
|
165
|
+
*/
|
|
166
|
+
export function createLLMFactExtractor(opts: LLMExtractorOptions): LLMFactExtractor {
|
|
167
|
+
const batchSize = opts.batchSize ?? 10;
|
|
168
|
+
|
|
169
|
+
return {
|
|
170
|
+
async extractFacts(
|
|
171
|
+
turns: SessionTurn[],
|
|
172
|
+
sessionId: string,
|
|
173
|
+
): Promise<{ entities: ExtractedEntity[]; relations: ExtractedRelation[]; events: ExtractedEvent[] }> {
|
|
174
|
+
const allEntities: ExtractedEntity[] = [];
|
|
175
|
+
const allRelations: ExtractedRelation[] = [];
|
|
176
|
+
const allEvents: ExtractedEvent[] = [];
|
|
177
|
+
|
|
178
|
+
for (let i = 0; i < turns.length; i += batchSize) {
|
|
179
|
+
const batch = turns.slice(i, i + batchSize);
|
|
180
|
+
try {
|
|
181
|
+
const prompt = buildPrompt(batch, sessionId);
|
|
182
|
+
const response = await opts.client.complete(prompt);
|
|
183
|
+
const parsed = parseLLMResponse(response, sessionId, batch);
|
|
184
|
+
allEntities.push(...parsed.entities);
|
|
185
|
+
allRelations.push(...parsed.relations);
|
|
186
|
+
allEvents.push(...parsed.events);
|
|
187
|
+
} catch {
|
|
188
|
+
const fallbackSession: LoadedSession = {
|
|
189
|
+
id: sessionId,
|
|
190
|
+
title: "",
|
|
191
|
+
createdAt: new Date().toISOString(),
|
|
192
|
+
filePath: "",
|
|
193
|
+
modifiedAt: new Date(),
|
|
194
|
+
turns: batch,
|
|
195
|
+
};
|
|
196
|
+
const fallback = await extractFacts(fallbackSession);
|
|
197
|
+
allEntities.push(...fallback.entities);
|
|
198
|
+
allRelations.push(...fallback.relations);
|
|
199
|
+
allEvents.push(...fallback.events);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
return { entities: allEntities, relations: allRelations, events: allEvents };
|
|
204
|
+
},
|
|
205
|
+
};
|
|
206
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import fs from "node:fs/promises";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Read the last-trained marker timestamp.
|
|
6
|
+
* Returns null if marker file does not exist or is unreadable.
|
|
7
|
+
*/
|
|
8
|
+
export async function readMarker(memoryDir: string): Promise<Date | null> {
|
|
9
|
+
const markerPath = path.join(memoryDir, ".train_marker");
|
|
10
|
+
try {
|
|
11
|
+
const raw = (await fs.readFile(markerPath, "utf8")).trim();
|
|
12
|
+
if (!raw) return null;
|
|
13
|
+
const d = new Date(raw);
|
|
14
|
+
return isNaN(d.getTime()) ? null : d;
|
|
15
|
+
} catch {
|
|
16
|
+
return null;
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Write the last-trained marker timestamp (ISO string).
|
|
22
|
+
*/
|
|
23
|
+
export async function writeMarker(
|
|
24
|
+
memoryDir: string,
|
|
25
|
+
ts: Date,
|
|
26
|
+
): Promise<void> {
|
|
27
|
+
const markerPath = path.join(memoryDir, ".train_marker");
|
|
28
|
+
await fs.mkdir(memoryDir, { recursive: true });
|
|
29
|
+
await fs.writeFile(markerPath, ts.toISOString() + "\n", "utf8");
|
|
30
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { trainBundle, type TrainBundleConfig, type TrainBundleResult } from "./index.js";
|
|
2
|
+
|
|
3
|
+
export interface SchedulerConfig {
|
|
4
|
+
/** Interval string: "1h", "6h", "12h", "24h", or null to disable. */
|
|
5
|
+
interval: string | null;
|
|
6
|
+
/** Passed through to trainBundle. */
|
|
7
|
+
trainConfig?: Omit<TrainBundleConfig, "full" | "dryRun">;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export interface TrainScheduler {
|
|
11
|
+
/** Stop the scheduler. Safe to call multiple times. */
|
|
12
|
+
stop(): void;
|
|
13
|
+
/** Whether the scheduler is currently running. */
|
|
14
|
+
running(): boolean;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export interface SchedulerLog {
|
|
18
|
+
timestamp: string;
|
|
19
|
+
sessionsProcessed: number;
|
|
20
|
+
entityCount: number;
|
|
21
|
+
relationCount: number;
|
|
22
|
+
eventCount: number;
|
|
23
|
+
durationMs: number;
|
|
24
|
+
error?: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const INTERVAL_MAP: Record<string, number> = {
|
|
28
|
+
"1h": 3_600_000,
|
|
29
|
+
"6h": 21_600_000,
|
|
30
|
+
"12h": 43_200_000,
|
|
31
|
+
"24h": 86_400_000,
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
/** Parse interval string to milliseconds. Returns null if disabled/invalid. */
|
|
35
|
+
export function parseInterval(interval: string | null | undefined): number | null {
|
|
36
|
+
if (!interval) return null;
|
|
37
|
+
const ms = INTERVAL_MAP[interval.trim().toLowerCase()];
|
|
38
|
+
return ms ?? null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export type SchedulerLogger = (log: SchedulerLog) => void;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Create an interval-based trainer that runs `trainBundle()` periodically.
|
|
45
|
+
* Runs one tick immediately on start, then repeats at the configured interval.
|
|
46
|
+
*/
|
|
47
|
+
export function createTrainScheduler(
|
|
48
|
+
config: SchedulerConfig,
|
|
49
|
+
logger?: SchedulerLogger,
|
|
50
|
+
): TrainScheduler {
|
|
51
|
+
const intervalMs = parseInterval(config.interval);
|
|
52
|
+
if (intervalMs == null) {
|
|
53
|
+
return { stop() {}, running() { return false; } };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
let timer: ReturnType<typeof setInterval> | null = null;
|
|
57
|
+
let stopped = false;
|
|
58
|
+
let tickInProgress = false;
|
|
59
|
+
|
|
60
|
+
async function tick(): Promise<void> {
|
|
61
|
+
if (stopped || tickInProgress) return;
|
|
62
|
+
tickInProgress = true;
|
|
63
|
+
const start = Date.now();
|
|
64
|
+
let result: TrainBundleResult | null = null;
|
|
65
|
+
let error: string | undefined;
|
|
66
|
+
try {
|
|
67
|
+
result = await trainBundle({
|
|
68
|
+
...config.trainConfig,
|
|
69
|
+
full: false,
|
|
70
|
+
dryRun: false,
|
|
71
|
+
});
|
|
72
|
+
} catch (err) {
|
|
73
|
+
error = err instanceof Error ? err.message : String(err);
|
|
74
|
+
}
|
|
75
|
+
const durationMs = Date.now() - start;
|
|
76
|
+
logger?.({
|
|
77
|
+
timestamp: new Date().toISOString(),
|
|
78
|
+
sessionsProcessed: result?.sessionsProcessed ?? 0,
|
|
79
|
+
entityCount: result?.entityCount ?? 0,
|
|
80
|
+
relationCount: result?.relationCount ?? 0,
|
|
81
|
+
eventCount: result?.eventCount ?? 0,
|
|
82
|
+
durationMs,
|
|
83
|
+
error,
|
|
84
|
+
});
|
|
85
|
+
tickInProgress = false;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Fire first tick asynchronously, then schedule repeating
|
|
89
|
+
void tick();
|
|
90
|
+
timer = setInterval(() => void tick(), intervalMs);
|
|
91
|
+
|
|
92
|
+
return {
|
|
93
|
+
stop() {
|
|
94
|
+
stopped = true;
|
|
95
|
+
if (timer != null) {
|
|
96
|
+
clearInterval(timer);
|
|
97
|
+
timer = null;
|
|
98
|
+
}
|
|
99
|
+
},
|
|
100
|
+
running() {
|
|
101
|
+
return !stopped && timer != null;
|
|
102
|
+
},
|
|
103
|
+
};
|
|
104
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import fs from "node:fs/promises";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
|
|
5
|
+
export interface SessionTurn {
|
|
6
|
+
role: string;
|
|
7
|
+
content: string;
|
|
8
|
+
turnIndex: number;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export interface LoadedSession {
|
|
12
|
+
id: string;
|
|
13
|
+
title: string;
|
|
14
|
+
createdAt: string;
|
|
15
|
+
filePath: string;
|
|
16
|
+
modifiedAt: Date;
|
|
17
|
+
turns: SessionTurn[];
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
interface PiSessionMessage {
|
|
21
|
+
role?: string;
|
|
22
|
+
content?: unknown;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
interface PiSessionFile {
|
|
26
|
+
id?: string;
|
|
27
|
+
title?: string;
|
|
28
|
+
created_at?: string;
|
|
29
|
+
messages?: PiSessionMessage[];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function messageText(content: unknown): string {
|
|
33
|
+
if (typeof content === "string") return content;
|
|
34
|
+
if (!Array.isArray(content)) return "";
|
|
35
|
+
const parts: string[] = [];
|
|
36
|
+
for (const block of content) {
|
|
37
|
+
if (typeof block === "string") {
|
|
38
|
+
parts.push(block);
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (block && typeof block === "object") {
|
|
42
|
+
const b = block as Record<string, unknown>;
|
|
43
|
+
if (typeof b.text === "string") parts.push(b.text);
|
|
44
|
+
else if (typeof b.content === "string") parts.push(b.content);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
return parts.join("\n");
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface SessionLoaderOptions {
|
|
51
|
+
sessionsDir: string;
|
|
52
|
+
modifiedAfter?: Date | null;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Scan session JSON files, parse Pi session format, optionally filter by
|
|
57
|
+
* modified-after timestamp for incremental training.
|
|
58
|
+
*/
|
|
59
|
+
export async function loadSessions(
|
|
60
|
+
opts: SessionLoaderOptions,
|
|
61
|
+
): Promise<LoadedSession[]> {
|
|
62
|
+
const { sessionsDir, modifiedAfter } = opts;
|
|
63
|
+
if (!sessionsDir.trim()) return [];
|
|
64
|
+
|
|
65
|
+
let entries: string[];
|
|
66
|
+
try {
|
|
67
|
+
entries = await fs.readdir(sessionsDir);
|
|
68
|
+
} catch {
|
|
69
|
+
return [];
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
const sessions: LoadedSession[] = [];
|
|
73
|
+
|
|
74
|
+
for (const name of entries) {
|
|
75
|
+
if (!name.endsWith(".json")) continue;
|
|
76
|
+
const filePath = path.join(sessionsDir, name);
|
|
77
|
+
|
|
78
|
+
let st: Awaited<ReturnType<typeof fs.stat>>;
|
|
79
|
+
try {
|
|
80
|
+
st = await fs.stat(filePath);
|
|
81
|
+
} catch {
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
if (!st.isFile()) continue;
|
|
85
|
+
|
|
86
|
+
if (modifiedAfter && st.mtime <= modifiedAfter) {
|
|
87
|
+
continue;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
let session: PiSessionFile;
|
|
91
|
+
try {
|
|
92
|
+
const raw = await fs.readFile(filePath, "utf8");
|
|
93
|
+
session = JSON.parse(raw) as PiSessionFile;
|
|
94
|
+
} catch {
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
if (!session.messages || session.messages.length === 0) continue;
|
|
99
|
+
|
|
100
|
+
const turns: SessionTurn[] = [];
|
|
101
|
+
for (let i = 0; i < session.messages.length; i++) {
|
|
102
|
+
const msg = session.messages[i]!;
|
|
103
|
+
const text = messageText(msg.content);
|
|
104
|
+
if (!text.trim()) continue;
|
|
105
|
+
turns.push({
|
|
106
|
+
role: msg.role ?? "unknown",
|
|
107
|
+
content: text,
|
|
108
|
+
turnIndex: i,
|
|
109
|
+
});
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (turns.length === 0) continue;
|
|
113
|
+
|
|
114
|
+
sessions.push({
|
|
115
|
+
id: session.id ?? path.basename(name, ".json"),
|
|
116
|
+
title: session.title ?? "",
|
|
117
|
+
createdAt: session.created_at ?? "",
|
|
118
|
+
filePath,
|
|
119
|
+
modifiedAt: st.mtime,
|
|
120
|
+
turns,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
sessions.sort((a, b) => a.modifiedAt.getTime() - b.modifiedAt.getTime());
|
|
125
|
+
return deduplicateSessions(sessions);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
function deduplicateSessions(sessions: LoadedSession[]): LoadedSession[] {
|
|
129
|
+
const seen = new Set<string>();
|
|
130
|
+
return sessions.filter((s) => {
|
|
131
|
+
const fingerprint = createHash("sha256")
|
|
132
|
+
.update(s.turns.map((t) => t.content).join("\n"))
|
|
133
|
+
.digest("hex")
|
|
134
|
+
.slice(0, 16);
|
|
135
|
+
if (seen.has(fingerprint)) return false;
|
|
136
|
+
seen.add(fingerprint);
|
|
137
|
+
return true;
|
|
138
|
+
});
|
|
139
|
+
}
|