@eeshans/howiprompt 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +148 -0
- package/bin/bootstrap-db.mjs +166 -0
- package/bin/cli-helpers.mjs +86 -0
- package/bin/cli.mjs +205 -0
- package/config/ml.json +47 -0
- package/data/claude_code/.gitkeep +3 -0
- package/data/codex/.gitkeep +0 -0
- package/data/reference_clusters.json +314 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +194 -0
- package/dist/index.js.map +1 -0
- package/dist/pipeline/backends.d.ts +39 -0
- package/dist/pipeline/backends.js +411 -0
- package/dist/pipeline/backends.js.map +1 -0
- package/dist/pipeline/classifiers.d.ts +17 -0
- package/dist/pipeline/classifiers.js +181 -0
- package/dist/pipeline/classifiers.js.map +1 -0
- package/dist/pipeline/config.d.ts +21 -0
- package/dist/pipeline/config.js +79 -0
- package/dist/pipeline/config.js.map +1 -0
- package/dist/pipeline/db.d.ts +41 -0
- package/dist/pipeline/db.js +130 -0
- package/dist/pipeline/db.js.map +1 -0
- package/dist/pipeline/embeddings.d.ts +15 -0
- package/dist/pipeline/embeddings.js +82 -0
- package/dist/pipeline/embeddings.js.map +1 -0
- package/dist/pipeline/exclusions.d.ts +86 -0
- package/dist/pipeline/exclusions.js +320 -0
- package/dist/pipeline/exclusions.js.map +1 -0
- package/dist/pipeline/metrics.d.ts +12 -0
- package/dist/pipeline/metrics.js +278 -0
- package/dist/pipeline/metrics.js.map +1 -0
- package/dist/pipeline/ml-config.d.ts +23 -0
- package/dist/pipeline/ml-config.js +54 -0
- package/dist/pipeline/ml-config.js.map +1 -0
- package/dist/pipeline/models.d.ts +23 -0
- package/dist/pipeline/models.js +21 -0
- package/dist/pipeline/models.js.map +1 -0
- package/dist/pipeline/nlp.d.ts +20 -0
- package/dist/pipeline/nlp.js +200 -0
- package/dist/pipeline/nlp.js.map +1 -0
- package/dist/pipeline/parsers.d.ts +11 -0
- package/dist/pipeline/parsers.js +492 -0
- package/dist/pipeline/parsers.js.map +1 -0
- package/dist/pipeline/registry.d.ts +21 -0
- package/dist/pipeline/registry.js +45 -0
- package/dist/pipeline/registry.js.map +1 -0
- package/dist/pipeline/style.d.ts +37 -0
- package/dist/pipeline/style.js +204 -0
- package/dist/pipeline/style.js.map +1 -0
- package/dist/pipeline/sync.d.ts +8 -0
- package/dist/pipeline/sync.js +52 -0
- package/dist/pipeline/sync.js.map +1 -0
- package/dist/pipeline/trends.d.ts +8 -0
- package/dist/pipeline/trends.js +226 -0
- package/dist/pipeline/trends.js.map +1 -0
- package/dist/server.d.ts +7 -0
- package/dist/server.js +216 -0
- package/dist/server.js.map +1 -0
- package/frontend/dist/_astro/MethodologyModal.astro_astro_type_script_index_0_lang.jiHwSrn-.js +34 -0
- package/frontend/dist/_astro/index.Ck1ZXjve.css +1 -0
- package/frontend/dist/_astro/index.astro_astro_type_script_index_0_lang.PuBlxVje.js +37 -0
- package/frontend/dist/_astro/index.astro_astro_type_script_index_1_lang.DmQY6kFx.js +1 -0
- package/frontend/dist/_astro/theme.CbYAaQI4.js +1 -0
- package/frontend/dist/_astro/wrapped.CpzRcLjf.css +1 -0
- package/frontend/dist/_astro/wrapped.astro_astro_type_script_index_0_lang.D4GeWu2-.js +11 -0
- package/frontend/dist/_astro/wrapped.astro_astro_type_script_index_1_lang.CPAAJDh5.js +1 -0
- package/frontend/dist/favicon.svg +4 -0
- package/frontend/dist/images/card_architect.png +0 -0
- package/frontend/dist/images/card_commander.png +0 -0
- package/frontend/dist/images/card_delegator.png +0 -0
- package/frontend/dist/images/card_explorer.png +0 -0
- package/frontend/dist/images/card_partner.png +0 -0
- package/frontend/dist/images/char_architect.png +0 -0
- package/frontend/dist/images/char_commander.png +0 -0
- package/frontend/dist/images/char_delegator.png +0 -0
- package/frontend/dist/images/char_explorer.png +0 -0
- package/frontend/dist/images/char_partner.png +0 -0
- package/frontend/dist/index.html +9 -0
- package/frontend/dist/wrapped/index.html +9 -0
- package/package.json +66 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
export interface BackendToggle {
|
|
2
|
+
enabled: boolean;
|
|
3
|
+
exclusions: string[];
|
|
4
|
+
}
|
|
5
|
+
export interface Config {
|
|
6
|
+
dataDir: string;
|
|
7
|
+
claudeCodeSource: string;
|
|
8
|
+
codexHistorySource: string;
|
|
9
|
+
codexSessionsSource: string;
|
|
10
|
+
copilotChatSource: string;
|
|
11
|
+
cursorSource: string;
|
|
12
|
+
lmStudioSource: string;
|
|
13
|
+
engagementThreshold: number;
|
|
14
|
+
politenessThreshold: number;
|
|
15
|
+
agentCwds: string[];
|
|
16
|
+
backends: Record<string, BackendToggle>;
|
|
17
|
+
hasCompletedSetup: boolean;
|
|
18
|
+
}
|
|
19
|
+
export declare function loadConfig(dataDir?: string): Config;
|
|
20
|
+
export declare function saveConfig(dataDir: string, updates: Record<string, any>): void;
|
|
21
|
+
export declare function loadBranding(projectRoot?: string): Record<string, string> | null;
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import os from "node:os";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
function defaultBackends(agentCwds) {
|
|
5
|
+
return {
|
|
6
|
+
claude_code: { enabled: true, exclusions: agentCwds },
|
|
7
|
+
codex: { enabled: true, exclusions: [] },
|
|
8
|
+
copilot_chat: { enabled: false, exclusions: [] },
|
|
9
|
+
cursor: { enabled: false, exclusions: [] },
|
|
10
|
+
lmstudio: { enabled: false, exclusions: [] },
|
|
11
|
+
};
|
|
12
|
+
}
|
|
13
|
+
function mergeBackends(defaults, configured = {}) {
|
|
14
|
+
const merged = {};
|
|
15
|
+
const ids = new Set([...Object.keys(defaults), ...Object.keys(configured)]);
|
|
16
|
+
for (const id of ids) {
|
|
17
|
+
merged[id] = {
|
|
18
|
+
enabled: configured[id]?.enabled ?? defaults[id]?.enabled ?? false,
|
|
19
|
+
exclusions: Array.isArray(configured[id]?.exclusions)
|
|
20
|
+
? configured[id].exclusions
|
|
21
|
+
: (defaults[id]?.exclusions ?? []),
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
return merged;
|
|
25
|
+
}
|
|
26
|
+
export function loadConfig(dataDir) {
|
|
27
|
+
const dd = dataDir ?? path.join(os.homedir(), ".howiprompt");
|
|
28
|
+
// Load user config if it exists
|
|
29
|
+
let userConfig = {};
|
|
30
|
+
const configPath = path.join(dd, "config.json");
|
|
31
|
+
try {
|
|
32
|
+
userConfig = JSON.parse(fs.readFileSync(configPath, "utf-8"));
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
// No config file or invalid — that's fine
|
|
36
|
+
}
|
|
37
|
+
const agentCwds = Array.isArray(userConfig.agentCwds) ? userConfig.agentCwds : [];
|
|
38
|
+
// Migrate legacy agentCwds → backends.claude_code.exclusions
|
|
39
|
+
const backends = mergeBackends(defaultBackends(agentCwds), userConfig.backends);
|
|
40
|
+
return {
|
|
41
|
+
dataDir: dd,
|
|
42
|
+
claudeCodeSource: path.join(dd, "raw", "claude_code"),
|
|
43
|
+
codexHistorySource: path.join(dd, "raw", "codex", "history.jsonl"),
|
|
44
|
+
codexSessionsSource: path.join(os.homedir(), ".codex", "sessions"),
|
|
45
|
+
copilotChatSource: path.join(dd, "raw", "copilot_chat"),
|
|
46
|
+
cursorSource: path.join(dd, "raw", "cursor"),
|
|
47
|
+
lmStudioSource: path.join(dd, "raw", "lmstudio"),
|
|
48
|
+
engagementThreshold: 12.0,
|
|
49
|
+
politenessThreshold: 4.5,
|
|
50
|
+
agentCwds,
|
|
51
|
+
backends,
|
|
52
|
+
hasCompletedSetup: userConfig.hasCompletedSetup ?? false,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
export function saveConfig(dataDir, updates) {
|
|
56
|
+
const configPath = path.join(dataDir, "config.json");
|
|
57
|
+
let existing = {};
|
|
58
|
+
try {
|
|
59
|
+
existing = JSON.parse(fs.readFileSync(configPath, "utf-8"));
|
|
60
|
+
}
|
|
61
|
+
catch {
|
|
62
|
+
// Start fresh
|
|
63
|
+
}
|
|
64
|
+
const merged = { ...existing, ...updates };
|
|
65
|
+
const agentCwds = Array.isArray(merged.agentCwds) ? merged.agentCwds : [];
|
|
66
|
+
merged.backends = mergeBackends(defaultBackends(agentCwds), merged.backends);
|
|
67
|
+
fs.mkdirSync(dataDir, { recursive: true });
|
|
68
|
+
fs.writeFileSync(configPath, JSON.stringify(merged, null, 2));
|
|
69
|
+
}
|
|
70
|
+
export function loadBranding(projectRoot) {
|
|
71
|
+
const brandingPath = path.join(projectRoot ?? process.cwd(), "branding.json");
|
|
72
|
+
try {
|
|
73
|
+
return JSON.parse(fs.readFileSync(brandingPath, "utf-8"));
|
|
74
|
+
}
|
|
75
|
+
catch {
|
|
76
|
+
return null;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/pipeline/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,EAAE,MAAM,SAAS,CAAC;AACzB,OAAO,IAAI,MAAM,WAAW,CAAC;AAsB7B,SAAS,eAAe,CAAC,SAAmB;IAC1C,OAAO;QACL,WAAW,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,SAAS,EAAE;QACrD,KAAK,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,EAAE;QACxC,YAAY,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,EAAE,EAAE;QAChD,MAAM,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,EAAE,EAAE;QAC1C,QAAQ,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,EAAE,EAAE;KAC7C,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CACpB,QAAuC,EACvC,aAA4C,EAAE;IAE9C,MAAM,MAAM,GAAkC,EAAE,CAAC;IACjD,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,GAAG,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;IAE5E,KAAK,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC;QACrB,MAAM,CAAC,EAAE,CAAC,GAAG;YACX,OAAO,EAAE,UAAU,CAAC,EAAE,CAAC,EAAE,OAAO,IAAI,QAAQ,CAAC,EAAE,CAAC,EAAE,OAAO,IAAI,KAAK;YAClE,UAAU,EAAE,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC,EAAE,UAAU,CAAC;gBACnD,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC,CAAC,UAAU;gBAC3B,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,EAAE,UAAU,IAAI,EAAE,CAAC;SACrC,CAAC;IACJ,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,OAAgB;IACzC,MAAM,EAAE,GAAG,OAAO,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,aAAa,CAAC,CAAC;IAE7D,gCAAgC;IAChC,IAAI,UAAU,GAAwB,EAAE,CAAC;IACzC,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,aAAa,CAAC,CAAC;IAChD,IAAI,CAAC;QACH,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC,CAAC;IAChE,CAAC;IAAC,MAAM,CAAC;QACP,0CAA0C;IAC5C,CAAC;IAED,MAAM,SAAS,GAAa,KAAK,CAAC,OAAO,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;IAE5F,6DAA6D;IAC7D,MAAM,QAAQ,GAAG,aAAa,CAAC,eAAe,CAAC,SAAS,CAAC,EAAE,UAAU,CAAC,QAAQ,CAAC,CAAC;IAEhF,OAAO;QACL,OAAO,EAAE,EAAE;QACX,gBAAgB,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,aAAa,CAAC;QACrD,kBAAkB,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,OAAO,EAAE,eAAe,CAAC;QAClE,mBAAmB,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,QAAQ,EAAE,UAAU,CAAC;QAClE,iBAAiB,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,cAAc,CAAC;QACvD,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC;QAC5C,cAAc,EAAE,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,KAAK,EAAE,UAAU,CAAC;QAChD,mBAAmB,EAAE,IAAI;QACzB,mBAAmB,EAAE,GAAG;QACxB,SAAS;QACT,QAAQ;QACR,iBAAiB,EAAE,UAAU,CAAC,iBAAiB,IAAI,KAAK;KACzD,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,UAAU,CAAC,OAAe,EAAE,OAA4B;IACtE,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,aAAa,CAAC,CAAC;IACrD,IAAI,QAAQ,GAAwB,EAAE,CAAC;IACvC,IAAI,CAAC;QACH,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,UAAU,EAAE,OAAO,CAAC,CAAC,CAAC;IAC9D,CAAC;IAAC,MAAM,CAAC;QACP,cAAc;IAChB,CAAC;IACD,MAAM,MAAM,GAAG,EAAE,GAAG,QAAQ,EAAE,GAAG,OAAO,EAAE,CAAC;IAC3C,MAAM,SAAS,GAAa,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;IACpF,MAAM,CAAC,QAAQ,GAAG,aAAa,CAAC,eAAe,CAAC,SAAS,CAAC,EAAE,MAAM,CAAC,QAAQ,CAAC,CAAC;IAC7E,EAAE,CAAC,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3C,EAAE,CAAC,aAAa,CAAC,UAAU,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AAChE,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,WAAoB;IAC/C,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,WAAW,IAAI,OAAO,CAAC,GAAG,EAAE,EAAE,eAAe,CAAC,CAAC;IAC9E,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,YAAY,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC;IAC5D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { type Client } from "@libsql/client";
|
|
2
|
+
import { Message, Platform, Role } from "./models.js";
|
|
3
|
+
export declare function createDbClient(dbPath: string): Client;
|
|
4
|
+
export declare function messageHash(m: Message): string;
|
|
5
|
+
export declare function insertMessages(client: Client, messages: Message[]): Promise<{
|
|
6
|
+
inserted: number;
|
|
7
|
+
skipped: number;
|
|
8
|
+
}>;
|
|
9
|
+
export declare function insertNlpEnrichments(client: Client, enrichments: Array<{
|
|
10
|
+
messageId: number;
|
|
11
|
+
intent: string;
|
|
12
|
+
intentConfidence: number;
|
|
13
|
+
complexityScore: number;
|
|
14
|
+
complexityConfidence: number;
|
|
15
|
+
iterationScore: number;
|
|
16
|
+
iterationConfidence: number;
|
|
17
|
+
}>): Promise<void>;
|
|
18
|
+
export interface MessageRow {
|
|
19
|
+
id: number;
|
|
20
|
+
timestamp: string;
|
|
21
|
+
platform: string;
|
|
22
|
+
role: string;
|
|
23
|
+
content: string;
|
|
24
|
+
conversationId: string;
|
|
25
|
+
wordCount: number;
|
|
26
|
+
modelId: string | null;
|
|
27
|
+
modelProvider: string | null;
|
|
28
|
+
}
|
|
29
|
+
export declare function queryMessages(client: Client, opts?: {
|
|
30
|
+
role?: Role;
|
|
31
|
+
platform?: Platform;
|
|
32
|
+
}): Promise<MessageRow[]>;
|
|
33
|
+
export declare function platformFilter(platform?: Platform): {
|
|
34
|
+
clause: string;
|
|
35
|
+
args: any[];
|
|
36
|
+
};
|
|
37
|
+
export declare function getLastSync(client: Client, source: string): Promise<{
|
|
38
|
+
lastTimestamp: string | null;
|
|
39
|
+
lastFile: string | null;
|
|
40
|
+
}>;
|
|
41
|
+
export declare function logSync(client: Client, source: string, lastFile: string | null, lastTimestamp: string | null, messageCount: number): Promise<void>;
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { createClient } from "@libsql/client";
|
|
3
|
+
export function createDbClient(dbPath) {
|
|
4
|
+
return createClient({ url: `file:${dbPath}` });
|
|
5
|
+
}
|
|
6
|
+
export function messageHash(m) {
|
|
7
|
+
const data = `${m.platform}|${m.conversationId}|${m.timestamp.toISOString()}|${m.content.slice(0, 200)}`;
|
|
8
|
+
return createHash("sha256").update(data).digest("hex");
|
|
9
|
+
}
|
|
10
|
+
export async function insertMessages(client, messages) {
|
|
11
|
+
let inserted = 0;
|
|
12
|
+
let skipped = 0;
|
|
13
|
+
const batchSize = 500;
|
|
14
|
+
for (let i = 0; i < messages.length; i += batchSize) {
|
|
15
|
+
const batch = messages.slice(i, i + batchSize);
|
|
16
|
+
const stmts = batch.map((m) => {
|
|
17
|
+
const local = localTime(m.timestamp);
|
|
18
|
+
return {
|
|
19
|
+
sql: `INSERT OR IGNORE INTO messages
|
|
20
|
+
(hash, timestamp, platform, role, content, conversation_id,
|
|
21
|
+
word_count, model_id, model_provider,
|
|
22
|
+
local_hour, local_weekday, local_date, source_file, synced_at)
|
|
23
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, datetime('now'))`,
|
|
24
|
+
args: [
|
|
25
|
+
messageHash(m),
|
|
26
|
+
m.timestamp.toISOString(),
|
|
27
|
+
m.platform,
|
|
28
|
+
m.role,
|
|
29
|
+
m.content,
|
|
30
|
+
m.conversationId,
|
|
31
|
+
m.wordCount,
|
|
32
|
+
m.modelId ?? null,
|
|
33
|
+
m.modelProvider ?? null,
|
|
34
|
+
local.hour,
|
|
35
|
+
local.weekday,
|
|
36
|
+
local.dateStr,
|
|
37
|
+
m.sourceFile ?? null,
|
|
38
|
+
],
|
|
39
|
+
};
|
|
40
|
+
});
|
|
41
|
+
const results = await client.batch(stmts, "write");
|
|
42
|
+
for (const r of results) {
|
|
43
|
+
if (r.rowsAffected > 0)
|
|
44
|
+
inserted++;
|
|
45
|
+
else
|
|
46
|
+
skipped++;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return { inserted, skipped };
|
|
50
|
+
}
|
|
51
|
+
function localTime(d) {
|
|
52
|
+
return {
|
|
53
|
+
hour: d.getHours(),
|
|
54
|
+
weekday: ((d.getDay() + 6) % 7), // JS: 0=Sun → Python-like: 0=Mon
|
|
55
|
+
dateStr: `${d.getFullYear()}-${String(d.getMonth() + 1).padStart(2, "0")}-${String(d.getDate()).padStart(2, "0")}`,
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
export async function insertNlpEnrichments(client, enrichments) {
|
|
59
|
+
const batchSize = 500;
|
|
60
|
+
for (let i = 0; i < enrichments.length; i += batchSize) {
|
|
61
|
+
const batch = enrichments.slice(i, i + batchSize);
|
|
62
|
+
await client.batch(batch.map((e) => ({
|
|
63
|
+
sql: `INSERT OR IGNORE INTO nlp_enrichments
|
|
64
|
+
(message_id, intent, intent_confidence, complexity_score,
|
|
65
|
+
complexity_confidence, iteration_score, iteration_confidence)
|
|
66
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
|
67
|
+
args: [
|
|
68
|
+
e.messageId,
|
|
69
|
+
e.intent,
|
|
70
|
+
e.intentConfidence,
|
|
71
|
+
e.complexityScore,
|
|
72
|
+
e.complexityConfidence,
|
|
73
|
+
e.iterationScore,
|
|
74
|
+
e.iterationConfidence,
|
|
75
|
+
],
|
|
76
|
+
})), "write");
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
export async function queryMessages(client, opts) {
|
|
80
|
+
let sql = "SELECT id, timestamp, platform, role, content, conversation_id, word_count, model_id, model_provider FROM messages WHERE 1=1";
|
|
81
|
+
const args = [];
|
|
82
|
+
if (opts?.role) {
|
|
83
|
+
sql += " AND role = ?";
|
|
84
|
+
args.push(opts.role);
|
|
85
|
+
if (opts.role === "human")
|
|
86
|
+
sql += " AND is_excluded = 0";
|
|
87
|
+
}
|
|
88
|
+
if (opts?.platform) {
|
|
89
|
+
sql += " AND platform = ?";
|
|
90
|
+
args.push(opts.platform);
|
|
91
|
+
}
|
|
92
|
+
sql += " ORDER BY timestamp";
|
|
93
|
+
const result = await client.execute({ sql, args });
|
|
94
|
+
return result.rows.map((r) => ({
|
|
95
|
+
id: Number(r.id),
|
|
96
|
+
timestamp: String(r.timestamp),
|
|
97
|
+
platform: String(r.platform),
|
|
98
|
+
role: String(r.role),
|
|
99
|
+
content: String(r.content),
|
|
100
|
+
conversationId: String(r.conversation_id),
|
|
101
|
+
wordCount: Number(r.word_count),
|
|
102
|
+
modelId: r.model_id ? String(r.model_id) : null,
|
|
103
|
+
modelProvider: r.model_provider ? String(r.model_provider) : null,
|
|
104
|
+
}));
|
|
105
|
+
}
|
|
106
|
+
export function platformFilter(platform) {
|
|
107
|
+
if (platform)
|
|
108
|
+
return { clause: " AND platform = ?", args: [platform] };
|
|
109
|
+
return { clause: "", args: [] };
|
|
110
|
+
}
|
|
111
|
+
export async function getLastSync(client, source) {
|
|
112
|
+
const result = await client.execute({
|
|
113
|
+
sql: "SELECT last_timestamp, last_file FROM sync_log WHERE source = ? ORDER BY synced_at DESC LIMIT 1",
|
|
114
|
+
args: [source],
|
|
115
|
+
});
|
|
116
|
+
if (result.rows.length === 0)
|
|
117
|
+
return { lastTimestamp: null, lastFile: null };
|
|
118
|
+
const row = result.rows[0];
|
|
119
|
+
return {
|
|
120
|
+
lastTimestamp: row.last_timestamp ? String(row.last_timestamp) : null,
|
|
121
|
+
lastFile: row.last_file ? String(row.last_file) : null,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
export async function logSync(client, source, lastFile, lastTimestamp, messageCount) {
|
|
125
|
+
await client.execute({
|
|
126
|
+
sql: "INSERT INTO sync_log (source, last_file, last_timestamp, message_count, synced_at) VALUES (?, ?, ?, ?, datetime('now'))",
|
|
127
|
+
args: [source, lastFile, lastTimestamp, messageCount],
|
|
128
|
+
});
|
|
129
|
+
}
|
|
130
|
+
//# sourceMappingURL=db.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"db.js","sourceRoot":"","sources":["../../src/pipeline/db.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,YAAY,EAAiC,MAAM,gBAAgB,CAAC;AAG7E,MAAM,UAAU,cAAc,CAAC,MAAc;IAC3C,OAAO,YAAY,CAAC,EAAE,GAAG,EAAE,QAAQ,MAAM,EAAE,EAAE,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,CAAU;IACpC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,cAAc,IAAI,CAAC,CAAC,SAAS,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;IACzG,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,MAAc,EACd,QAAmB;IAEnB,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,IAAI,OAAO,GAAG,CAAC,CAAC;IAChB,MAAM,SAAS,GAAG,GAAG,CAAC;IAEtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;QAC/C,MAAM,KAAK,GAAkB,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;YAC3C,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;YACrC,OAAO;gBACL,GAAG,EAAE;;;;8EAIiE;gBACtE,IAAI,EAAE;oBACJ,WAAW,CAAC,CAAC,CAAC;oBACd,CAAC,CAAC,SAAS,CAAC,WAAW,EAAE;oBACzB,CAAC,CAAC,QAAQ;oBACV,CAAC,CAAC,IAAI;oBACN,CAAC,CAAC,OAAO;oBACT,CAAC,CAAC,cAAc;oBAChB,CAAC,CAAC,SAAS;oBACX,CAAC,CAAC,OAAO,IAAI,IAAI;oBACjB,CAAC,CAAC,aAAa,IAAI,IAAI;oBACvB,KAAK,CAAC,IAAI;oBACV,KAAK,CAAC,OAAO;oBACb,KAAK,CAAC,OAAO;oBACb,CAAC,CAAC,UAAU,IAAI,IAAI;iBACrB;aACF,CAAC;QACJ,CAAC,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACnD,KAAK,MAAM,CAAC,IAAI,OAAO,EAAE,CAAC;YACxB,IAAI,CAAC,CAAC,YAAY,GAAG,CAAC;gBAAE,QAAQ,EAAE,CAAC;;gBAC9B,OAAO,EAAE,CAAC;QACjB,CAAC;IACH,CAAC;IAED,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,CAAC;AAC/B,CAAC;AAED,SAAS,SAAS,CAAC,CAAO;IACxB,OAAO;QACL,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE;QAClB,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,iCAAiC;QAClE,OAAO,EAAE,GAAG,CAAC,CAAC,WAAW,EAAE,IAAI,MAAM,CAAC,CAAC,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE;KACnH,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,oBAAoB,CACxC,MAAc,EACd,WAQE;IAEF,MAAM,SAAS,GAAG,GAAG,CAAC;IACtB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACvD,MAAM,KAAK,GAAG,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;QAClD,MAAM,MAAM,CAAC,KAAK,CAChB,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YAChB,GAAG,EAAE;;;2CAG8B;YACnC,IAAI,EAAE;gBACJ,CAAC,CAAC,SAAS;gBACX,CAAC,CAAC,MAAM;gBACR,CAAC,CAAC,gBAAgB;gBAClB,CAAC,CAAC,eAAe;gBACjB,CAAC,CAAC,oBAAoB;gBACtB,CAAC,CAAC,cAAc;gBAChB,CAAC,CAAC,mBAAmB;aACtB;SACF,CAAC,CAAC,EACH,OAAO,CACR,CAAC;IACJ,CAAC;AACH,CAAC;AAcD,MAAM,CAAC,KAAK,UAAU,aAAa,CACjC,MAAc,EACd,IAA2C;IAE3C,IAAI,GAAG,GACL,8HAA8H,CAAC;IACjI,MAAM,IAAI,GAAU,EAAE,CAAC;IACvB,IAAI,IAAI,EAAE,IAAI,EAAE,CAAC;QACf,GAAG,IAAI,eAAe,CAAC;QACvB,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACrB,IAAI,IAAI,CAAC,IAAI,KAAK,OAAO;YAAE,GAAG,IAAI,sBAAsB,CAAC;IAC3D,CAAC;IACD,IAAI,IAAI,EAAE,QAAQ,EAAE,CAAC;QACnB,GAAG,IAAI,mBAAmB,CAAC;QAC3B,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAC3B,CAAC;IACD,GAAG,IAAI,qBAAqB,CAAC;IAE7B,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,EAAE,GAAG,EAAE,IAAI,EAAE,CAAC,CAAC;IACnD,OAAO,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC7B,EAAE,EAAE,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC;QAChB,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC,SAAS,CAAC;QAC9B,QAAQ,EAAE,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC;QAC5B,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC;QACpB,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC,CAAC,CAAC,eAAe,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC;QAC/B,OAAO,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI;QAC/C,aAAa,EAAE,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI;KAClE,CAAC,CAAC,CAAC;AACN,CAAC;AAED,MAAM,UAAU,cAAc,CAAC,QAAmB;IAChD,IAAI,QAAQ;QAAE,OAAO,EAAE,MAAM,EAAE,mBAAmB,EAAE,IAAI,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC;IACvE,OAAO,EAAE,MAAM,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE,CAAC;AAClC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,MAAc,EACd,MAAc;IAEd,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC;QAClC,GAAG,EAAE,iGAAiG;QACtG,IAAI,EAAE,CAAC,MAAM,CAAC;KACf,CAAC,CAAC;IACH,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,aAAa,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC;IAC7E,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC3B,OAAO;QACL,aAAa,EAAE,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,CAAC,CAAC,IAAI;QACrE,QAAQ,EAAE,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI;KACvD,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,OAAO,CAC3B,MAAc,EACd,MAAc,EACd,QAAuB,EACvB,aAA4B,EAC5B,YAAoB;IAEpB,MAAM,MAAM,CAAC,OAAO,CAAC;QACnB,GAAG,EAAE,yHAAyH;QAC9H,IAAI,EAAE,CAAC,MAAM,EAAE,QAAQ,EAAE,aAAa,EAAE,YAAY,CAAC;KACtD,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import type { Client } from "@libsql/client";
|
|
2
|
+
import type { MlConfig } from "./ml-config.js";
|
|
3
|
+
export declare function resetEmbedderForTests(): void;
|
|
4
|
+
export declare function initEmbedder(mlConfig: MlConfig, cacheDir: string, onProgress?: (progress: {
|
|
5
|
+
status: string;
|
|
6
|
+
file?: string;
|
|
7
|
+
progress?: number;
|
|
8
|
+
}) => void): Promise<void>;
|
|
9
|
+
export declare function embedTexts(texts: string[], mlConfig: MlConfig): Promise<Float32Array[]>;
|
|
10
|
+
export declare function embedSingle(text: string, mlConfig: MlConfig): Promise<Float32Array>;
|
|
11
|
+
export declare function cosineSimilarity(a: Float32Array, b: Float32Array): number;
|
|
12
|
+
export declare function enrichEmbeddings(client: Client, mlConfig: MlConfig, dataDir: string, onProgress?: (progress: {
|
|
13
|
+
status: string;
|
|
14
|
+
progress?: number;
|
|
15
|
+
}) => void, onBatchProgress?: (embedded: number, total: number) => void): Promise<number>;
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
let extractor = null;
|
|
3
|
+
export function resetEmbedderForTests() {
|
|
4
|
+
extractor = null;
|
|
5
|
+
}
|
|
6
|
+
export async function initEmbedder(mlConfig, cacheDir, onProgress) {
|
|
7
|
+
if (extractor)
|
|
8
|
+
return;
|
|
9
|
+
const { env, pipeline: createPipeline } = await import("@huggingface/transformers");
|
|
10
|
+
env.cacheDir = cacheDir;
|
|
11
|
+
// Silence unnecessary logs
|
|
12
|
+
env.allowRemoteModels = true;
|
|
13
|
+
extractor = await createPipeline("feature-extraction", mlConfig.embedding.model, {
|
|
14
|
+
dtype: mlConfig.embedding.dtype,
|
|
15
|
+
revision: "main",
|
|
16
|
+
progress_callback: onProgress,
|
|
17
|
+
});
|
|
18
|
+
}
|
|
19
|
+
export async function embedTexts(texts, mlConfig) {
|
|
20
|
+
if (!extractor)
|
|
21
|
+
throw new Error("Embedder not initialized — call initEmbedder() first");
|
|
22
|
+
const results = [];
|
|
23
|
+
const batchSize = mlConfig.embedding.batchSize;
|
|
24
|
+
for (let i = 0; i < texts.length; i += batchSize) {
|
|
25
|
+
const batch = texts.slice(i, i + batchSize);
|
|
26
|
+
const output = await extractor(batch, { pooling: "cls", normalize: true });
|
|
27
|
+
for (let j = 0; j < batch.length; j++) {
|
|
28
|
+
const vec = output[j].data;
|
|
29
|
+
results.push(new Float32Array(vec));
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return results;
|
|
33
|
+
}
|
|
34
|
+
export async function embedSingle(text, mlConfig) {
|
|
35
|
+
const results = await embedTexts([text], mlConfig);
|
|
36
|
+
return results[0];
|
|
37
|
+
}
|
|
38
|
+
export function cosineSimilarity(a, b) {
|
|
39
|
+
let dot = 0;
|
|
40
|
+
let normA = 0;
|
|
41
|
+
let normB = 0;
|
|
42
|
+
for (let i = 0; i < a.length; i++) {
|
|
43
|
+
dot += a[i] * b[i];
|
|
44
|
+
normA += a[i] * a[i];
|
|
45
|
+
normB += b[i] * b[i];
|
|
46
|
+
}
|
|
47
|
+
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
|
|
48
|
+
}
|
|
49
|
+
export async function enrichEmbeddings(client, mlConfig, dataDir, onProgress, onBatchProgress) {
|
|
50
|
+
// Find human messages without embeddings
|
|
51
|
+
const result = await client.execute("SELECT id, content FROM messages WHERE role = 'human' AND is_excluded = 0 AND embedding IS NULL");
|
|
52
|
+
if (result.rows.length === 0)
|
|
53
|
+
return 0;
|
|
54
|
+
// Init model (downloads on first run)
|
|
55
|
+
const modelCacheDir = path.join(dataDir, "models");
|
|
56
|
+
await initEmbedder(mlConfig, modelCacheDir, onProgress);
|
|
57
|
+
const ids = result.rows.map((r) => Number(r.id));
|
|
58
|
+
const texts = result.rows.map((r) => String(r.content));
|
|
59
|
+
// Embed in small batches with brief pauses to avoid sustained CPU spike
|
|
60
|
+
const batchSize = Math.min(mlConfig.embedding.batchSize, 32);
|
|
61
|
+
let embedded = 0;
|
|
62
|
+
const total = texts.length;
|
|
63
|
+
for (let i = 0; i < total; i += batchSize) {
|
|
64
|
+
const batchTexts = texts.slice(i, i + batchSize);
|
|
65
|
+
const batchIds = ids.slice(i, i + batchSize);
|
|
66
|
+
const vectors = await embedTexts(batchTexts, mlConfig);
|
|
67
|
+
// Store vectors in DB
|
|
68
|
+
const stmts = batchIds.map((id, j) => ({
|
|
69
|
+
sql: "UPDATE messages SET embedding = vector32(?) WHERE id = ?",
|
|
70
|
+
args: [JSON.stringify(Array.from(vectors[j])), id],
|
|
71
|
+
}));
|
|
72
|
+
await client.batch(stmts, "write");
|
|
73
|
+
embedded += batchTexts.length;
|
|
74
|
+
onBatchProgress?.(embedded, total);
|
|
75
|
+
// Brief yield every batch so the event loop breathes
|
|
76
|
+
if (i + batchSize < total) {
|
|
77
|
+
await new Promise((r) => setTimeout(r, 10));
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return embedded;
|
|
81
|
+
}
|
|
82
|
+
//# sourceMappingURL=embeddings.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embeddings.js","sourceRoot":"","sources":["../../src/pipeline/embeddings.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,WAAW,CAAC;AAI7B,IAAI,SAAS,GAAQ,IAAI,CAAC;AAE1B,MAAM,UAAU,qBAAqB;IACnC,SAAS,GAAG,IAAI,CAAC;AACnB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAChC,QAAkB,EAClB,QAAgB,EAChB,UAAqF;IAErF,IAAI,SAAS;QAAE,OAAO;IAEtB,MAAM,EAAE,GAAG,EAAE,QAAQ,EAAE,cAAc,EAAE,GAAG,MAAM,MAAM,CAAC,2BAA2B,CAAC,CAAC;IAEpF,GAAG,CAAC,QAAQ,GAAG,QAAQ,CAAC;IACxB,2BAA2B;IAC3B,GAAG,CAAC,iBAAiB,GAAG,IAAI,CAAC;IAE7B,SAAS,GAAG,MAAM,cAAc,CAAC,oBAAoB,EAAE,QAAQ,CAAC,SAAS,CAAC,KAAK,EAAE;QAC/E,KAAK,EAAE,QAAQ,CAAC,SAAS,CAAC,KAAY;QACtC,QAAQ,EAAE,MAAM;QAChB,iBAAiB,EAAE,UAAU;KAC9B,CAAC,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAC9B,KAAe,EACf,QAAkB;IAElB,IAAI,CAAC,SAAS;QAAE,MAAM,IAAI,KAAK,CAAC,sDAAsD,CAAC,CAAC;IAExF,MAAM,OAAO,GAAmB,EAAE,CAAC;IACnC,MAAM,SAAS,GAAG,QAAQ,CAAC,SAAS,CAAC,SAAS,CAAC;IAE/C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACjD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;QAC5C,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,KAAK,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAE3E,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAC3B,OAAO,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,IAAY,EACZ,QAAkB;IAElB,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,CAAC,IAAI,CAAC,EAAE,QAAQ,CAAC,CAAC;IACnD,OAAO,OAAO,CAAC,CAAC,CAAC,CAAC;AACpB,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,CAAe,EAAE,CAAe;IAC/D,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACnB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IACD,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AACrD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,gBAAgB,CACpC,MAAc,EACd,QAAkB,EAClB,OAAe,EACf,UAAsE,EACtE,eAA2D;IAE3D,yCAAyC;IACzC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,CACjC,iGAAiG,CAClG,CAAC;IAEF,IAAI,MAAM,CAAC,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,CAAC,CAAC;IAEvC,sCAAsC;IACtC,MAAM,aAAa,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,CAAC,CAAC;IACnD,MAAM,YAAY,CAAC,QAAQ,EAAE,aAAa,EAAE,UAAU,CAAC,CAAC;IAExD,MAAM,GAAG,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACjD,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;IAExD,wEAAwE;IACxE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,SAAS,EAAE,EAAE,CAAC,CAAC;IAC7D,IAAI,QAAQ,GAAG,CAAC,CAAC;IACjB,MAAM,KAAK,GAAG,KAAK,CAAC,MAAM,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QAC1C,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;QACjD,MAAM,QAAQ,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;QAC7C,MAAM,OAAO,GAAG,MAAM,UAAU,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QAEvD,sBAAsB;QACtB,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YACrC,GAAG,EAAE,0DAA0D;YAC/D,IAAI,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;SACnD,CAAC,CAAC,CAAC;QAEJ,MAAM,MAAM,CAAC,KAAK,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QACnC,QAAQ,IAAI,UAAU,CAAC,MAAM,CAAC;QAC9B,eAAe,EAAE,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAEnC,qDAAqD;QACrD,IAAI,CAAC,GAAG,SAAS,GAAG,KAAK,EAAE,CAAC;YAC1B,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,OAAO,QAAQ,CAAC;AAClB,CAAC"}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified exclusion rules engine.
|
|
3
|
+
*
|
|
4
|
+
* All message filtering rules live in the `exclusion_rules` DB table.
|
|
5
|
+
* The parser and flagging system read from this table — no hardcoded filters.
|
|
6
|
+
*
|
|
7
|
+
* Rule types:
|
|
8
|
+
* - content_prefix: message content starts with pattern
|
|
9
|
+
* - cwd_pattern: JSONL entry cwd field contains pattern
|
|
10
|
+
* - dir_name: source file directory name matches pattern
|
|
11
|
+
* - skill_invocation: message matches a skill invocation syntax (regex)
|
|
12
|
+
*
|
|
13
|
+
* Match modes:
|
|
14
|
+
* - starts_with: content.startsWith(pattern)
|
|
15
|
+
* - contains: content.includes(pattern)
|
|
16
|
+
* - exact: content === pattern
|
|
17
|
+
* - regex: new RegExp(pattern).test(content)
|
|
18
|
+
*/
|
|
19
|
+
import type { Client } from "@libsql/client";
|
|
20
|
+
import type { Backend } from "./backends.js";
|
|
21
|
+
export interface ExclusionRule {
|
|
22
|
+
id: number;
|
|
23
|
+
platform: string;
|
|
24
|
+
ruleType: string;
|
|
25
|
+
pattern: string;
|
|
26
|
+
matchMode: string;
|
|
27
|
+
description: string;
|
|
28
|
+
source: string;
|
|
29
|
+
templateContent: string;
|
|
30
|
+
isActive: boolean;
|
|
31
|
+
}
|
|
32
|
+
export interface CompiledRules {
|
|
33
|
+
contentPrefix: Array<{
|
|
34
|
+
rule: ExclusionRule;
|
|
35
|
+
test: (content: string) => boolean;
|
|
36
|
+
}>;
|
|
37
|
+
cwdPattern: Array<{
|
|
38
|
+
rule: ExclusionRule;
|
|
39
|
+
test: (cwd: string) => boolean;
|
|
40
|
+
}>;
|
|
41
|
+
dirName: Array<{
|
|
42
|
+
rule: ExclusionRule;
|
|
43
|
+
test: (dirName: string) => boolean;
|
|
44
|
+
}>;
|
|
45
|
+
skillInvocation: Array<{
|
|
46
|
+
rule: ExclusionRule;
|
|
47
|
+
test: (content: string, platform: string) => boolean;
|
|
48
|
+
}>;
|
|
49
|
+
}
|
|
50
|
+
export declare function loadExclusionRules(client: Client): Promise<ExclusionRule[]>;
|
|
51
|
+
export declare function compileRules(rules: ExclusionRule[]): CompiledRules;
|
|
52
|
+
/** Check if a message content should be excluded (content_prefix rules). */
|
|
53
|
+
export declare function shouldExcludeContent(compiled: CompiledRules, content: string, platform: string): {
|
|
54
|
+
excluded: boolean;
|
|
55
|
+
ruleId: number | null;
|
|
56
|
+
};
|
|
57
|
+
/** Check if a CWD indicates a programmatic (non-human) session. */
|
|
58
|
+
export declare function shouldExcludeCwd(compiled: CompiledRules, cwd: string): {
|
|
59
|
+
excluded: boolean;
|
|
60
|
+
ruleId: number | null;
|
|
61
|
+
};
|
|
62
|
+
/** Check if a source directory name should be excluded. */
|
|
63
|
+
export declare function shouldExcludeDir(compiled: CompiledRules, dirName: string): {
|
|
64
|
+
excluded: boolean;
|
|
65
|
+
ruleId: number | null;
|
|
66
|
+
};
|
|
67
|
+
/**
|
|
68
|
+
* Flag existing messages that match exclusion rules.
|
|
69
|
+
* Checks skill_invocation and content_prefix rules against unflagged messages.
|
|
70
|
+
* Also checks template_content via hash comparison.
|
|
71
|
+
*/
|
|
72
|
+
export declare function flagExcludedMessages(client: Client): Promise<number>;
|
|
73
|
+
/**
|
|
74
|
+
* Discover skills from all backends and upsert as exclusion rules.
|
|
75
|
+
* Also upserts into skills table for template content storage.
|
|
76
|
+
*/
|
|
77
|
+
export declare function discoverAndSyncRules(client: Client, backends: Backend[]): Promise<{
|
|
78
|
+
skillsFound: number;
|
|
79
|
+
rulesUpserted: number;
|
|
80
|
+
}>;
|
|
81
|
+
/**
|
|
82
|
+
* Seed system-level exclusion rules.
|
|
83
|
+
* These are platform-inherent rules that don't come from skill discovery.
|
|
84
|
+
* Uses INSERT OR IGNORE so they're only added once.
|
|
85
|
+
*/
|
|
86
|
+
export declare function seedSystemRules(client: Client): Promise<number>;
|