@voidwire/lore 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +142 -9
- package/lib/age.ts +162 -0
- package/lib/config.ts +24 -0
- package/lib/contradiction.ts +18 -4
- package/lib/db.ts +2 -9
- package/lib/embed.ts +8 -12
- package/lib/indexer.ts +70 -25
- package/lib/indexers/blogs.ts +14 -2
- package/lib/indexers/captures.ts +14 -2
- package/lib/indexers/commits.ts +14 -2
- package/lib/indexers/development.ts +14 -2
- package/lib/indexers/explorations.ts +14 -2
- package/lib/indexers/flux.ts +14 -2
- package/lib/indexers/index.ts +28 -28
- package/lib/indexers/observations.ts +14 -2
- package/lib/indexers/obsidian.ts +14 -2
- package/lib/indexers/personal.ts +14 -2
- package/lib/indexers/readmes.ts +14 -2
- package/lib/indexers/sessions.ts +14 -2
- package/lib/indexers/teachings.ts +14 -2
- package/lib/info.ts +2 -3
- package/lib/init.ts +24 -28
- package/lib/list.ts +3 -10
- package/lib/projects.ts +2 -3
- package/lib/realtime.ts +2 -7
- package/lib/search.ts +64 -17
- package/lib/semantic.ts +7 -1
- package/package.json +4 -2
package/cli.ts
CHANGED
|
@@ -61,8 +61,10 @@ import {
|
|
|
61
61
|
type PurgeableSource,
|
|
62
62
|
type ContradictionDecision,
|
|
63
63
|
} from "./index";
|
|
64
|
+
import { findStaleEntries, applyAging, type StaleEntry } from "./lib/age.js";
|
|
64
65
|
import { isValidLoreType, LORE_TYPES } from "./lib/types";
|
|
65
|
-
import { runIndexer } from "./lib/indexer";
|
|
66
|
+
import { runIndexer, getNestedValue } from "./lib/indexer";
|
|
67
|
+
import { getConfig } from "./lib/config";
|
|
66
68
|
import { indexers } from "./lib/indexers/index";
|
|
67
69
|
import { runInit } from "./lib/init";
|
|
68
70
|
import { runEmbed } from "./lib/embed";
|
|
@@ -155,6 +157,8 @@ const BOOLEAN_FLAGS = new Set([
|
|
|
155
157
|
"list",
|
|
156
158
|
"rebuild",
|
|
157
159
|
"force",
|
|
160
|
+
"dry-run",
|
|
161
|
+
"apply",
|
|
158
162
|
]);
|
|
159
163
|
|
|
160
164
|
function getPositionalArgs(args: string[]): string[] {
|
|
@@ -819,6 +823,113 @@ Examples:
|
|
|
819
823
|
process.exit(0);
|
|
820
824
|
}
|
|
821
825
|
|
|
826
|
+
// ============================================================================
|
|
827
|
+
// Age Command
|
|
828
|
+
// ============================================================================
|
|
829
|
+
|
|
830
|
+
async function handleAge(args: string[]): Promise<void> {
|
|
831
|
+
if (hasFlag(args, "help")) {
|
|
832
|
+
showAgeHelp();
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
const dryRun = hasFlag(args, "dry-run") || !hasFlag(args, "apply");
|
|
836
|
+
|
|
837
|
+
try {
|
|
838
|
+
const config = getConfig();
|
|
839
|
+
const staleEntries = findStaleEntries(config);
|
|
840
|
+
|
|
841
|
+
if (staleEntries.length === 0) {
|
|
842
|
+
output({ success: true, stale: 0, message: "Nothing to age" });
|
|
843
|
+
console.error("Nothing to age — no entries exceed their TTL.");
|
|
844
|
+
process.exit(0);
|
|
845
|
+
}
|
|
846
|
+
|
|
847
|
+
// Group by source/type for display
|
|
848
|
+
const bySource: Record<string, Record<string, number>> = {};
|
|
849
|
+
for (const entry of staleEntries) {
|
|
850
|
+
if (!bySource[entry.source]) bySource[entry.source] = {};
|
|
851
|
+
const typeKey = entry.type || "(no type)";
|
|
852
|
+
bySource[entry.source][typeKey] =
|
|
853
|
+
(bySource[entry.source][typeKey] || 0) + 1;
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
// Print summary to stderr
|
|
857
|
+
console.error(`\nStale entries (${staleEntries.length} total):\n`);
|
|
858
|
+
for (const [source, types] of Object.entries(bySource)) {
|
|
859
|
+
const sourceTotal = Object.values(types).reduce((a, b) => a + b, 0);
|
|
860
|
+
console.error(` ${source} (${sourceTotal}):`);
|
|
861
|
+
for (const [type, count] of Object.entries(types)) {
|
|
862
|
+
console.error(` ${type}: ${count}`);
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
console.error("");
|
|
866
|
+
|
|
867
|
+
if (dryRun) {
|
|
868
|
+
output({
|
|
869
|
+
success: true,
|
|
870
|
+
dryRun: true,
|
|
871
|
+
stale: staleEntries.length,
|
|
872
|
+
bySource,
|
|
873
|
+
});
|
|
874
|
+
console.error("Dry run — no entries deleted. Use --apply to execute.");
|
|
875
|
+
process.exit(0);
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
// Apply aging
|
|
879
|
+
const deleted = applyAging(staleEntries);
|
|
880
|
+
output({
|
|
881
|
+
success: true,
|
|
882
|
+
dryRun: false,
|
|
883
|
+
deleted,
|
|
884
|
+
});
|
|
885
|
+
console.error(`Aged ${deleted} entries from search + embeddings.`);
|
|
886
|
+
process.exit(0);
|
|
887
|
+
} catch (error) {
|
|
888
|
+
const message = error instanceof Error ? error.message : "Unknown error";
|
|
889
|
+
fail(message, 2);
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
function showAgeHelp(): void {
|
|
894
|
+
console.log(`
|
|
895
|
+
lore age - Remove stale purgeable entries based on TTL
|
|
896
|
+
|
|
897
|
+
Usage:
|
|
898
|
+
lore age Preview stale entries (dry run, default)
|
|
899
|
+
lore age --dry-run Preview stale entries without deleting
|
|
900
|
+
lore age --apply Delete stale entries
|
|
901
|
+
|
|
902
|
+
Purgeable Sources:
|
|
903
|
+
captures Quick captures (knowledge, decisions, gotchas, etc.)
|
|
904
|
+
observations Model observations about user patterns
|
|
905
|
+
teachings Teaching moments
|
|
906
|
+
|
|
907
|
+
Non-purgeable sources (blogs, commits, obsidian, etc.) are never affected.
|
|
908
|
+
|
|
909
|
+
TTL Defaults (days):
|
|
910
|
+
observations: inferred=180, stated=365, verified=365
|
|
911
|
+
captures: gotcha=365, learning=365, decision=730,
|
|
912
|
+
preference=730, pattern=365, context=180, general=365
|
|
913
|
+
teachings: 365
|
|
914
|
+
|
|
915
|
+
Override defaults in config.toml:
|
|
916
|
+
[aging]
|
|
917
|
+
observations.inferred = 180
|
|
918
|
+
captures.decision = 730
|
|
919
|
+
teachings = 365
|
|
920
|
+
|
|
921
|
+
Options:
|
|
922
|
+
--dry-run Preview only (default if neither flag given)
|
|
923
|
+
--apply Execute deletions
|
|
924
|
+
--help Show this help
|
|
925
|
+
|
|
926
|
+
Examples:
|
|
927
|
+
lore age # Preview stale entries
|
|
928
|
+
lore age --apply # Delete stale entries
|
|
929
|
+
`);
|
|
930
|
+
process.exit(0);
|
|
931
|
+
}
|
|
932
|
+
|
|
822
933
|
// ============================================================================
|
|
823
934
|
// Capture Command
|
|
824
935
|
// ============================================================================
|
|
@@ -1059,13 +1170,31 @@ async function handleIndex(args: string[]): Promise<void> {
|
|
|
1059
1170
|
}
|
|
1060
1171
|
|
|
1061
1172
|
if (hasFlag(args, "list")) {
|
|
1062
|
-
|
|
1063
|
-
const
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1173
|
+
const config = getConfig();
|
|
1174
|
+
const pluginList = Object.values(indexers).map((plugin) => {
|
|
1175
|
+
const available = plugin.canRun(config);
|
|
1176
|
+
const missingKeys = available
|
|
1177
|
+
? null
|
|
1178
|
+
: plugin.manifest.requiredConfig
|
|
1179
|
+
.filter(
|
|
1180
|
+
(k) =>
|
|
1181
|
+
!getNestedValue(
|
|
1182
|
+
config as unknown as Record<string, unknown>,
|
|
1183
|
+
k,
|
|
1184
|
+
),
|
|
1185
|
+
)
|
|
1186
|
+
.join(", ");
|
|
1187
|
+
return {
|
|
1188
|
+
name: plugin.manifest.name,
|
|
1189
|
+
description: plugin.manifest.description,
|
|
1190
|
+
requiredConfig: plugin.manifest.requiredConfig,
|
|
1191
|
+
optionalConfig: plugin.manifest.optionalConfig,
|
|
1192
|
+
rebuildExcluded: plugin.manifest.rebuildExcluded,
|
|
1193
|
+
available,
|
|
1194
|
+
reason: missingKeys ? `missing: ${missingKeys}` : null,
|
|
1195
|
+
};
|
|
1196
|
+
});
|
|
1197
|
+
output({ success: true, indexers: pluginList });
|
|
1069
1198
|
process.exit(0);
|
|
1070
1199
|
}
|
|
1071
1200
|
|
|
@@ -1130,6 +1259,7 @@ Usage:
|
|
|
1130
1259
|
lore about <project> --brief Compact project summary
|
|
1131
1260
|
lore capture task|knowledge|note|teaching Capture knowledge
|
|
1132
1261
|
lore purge --match "content" Delete matching entries
|
|
1262
|
+
lore age [--dry-run] [--apply] Age out stale entries by TTL
|
|
1133
1263
|
lore index [source] [--rebuild] [--list] Run indexers
|
|
1134
1264
|
|
|
1135
1265
|
Search Options:
|
|
@@ -1520,6 +1650,9 @@ async function main(): Promise<void> {
|
|
|
1520
1650
|
case "purge":
|
|
1521
1651
|
await handlePurge(commandArgs);
|
|
1522
1652
|
break;
|
|
1653
|
+
case "age":
|
|
1654
|
+
await handleAge(commandArgs);
|
|
1655
|
+
break;
|
|
1523
1656
|
case "init":
|
|
1524
1657
|
await runInit();
|
|
1525
1658
|
break;
|
|
@@ -1534,7 +1667,7 @@ async function main(): Promise<void> {
|
|
|
1534
1667
|
break;
|
|
1535
1668
|
default:
|
|
1536
1669
|
fail(
|
|
1537
|
-
`Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, index, init, embed, or import`,
|
|
1670
|
+
`Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, purge, age, index, init, embed, or import`,
|
|
1538
1671
|
);
|
|
1539
1672
|
}
|
|
1540
1673
|
}
|
package/lib/age.ts
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/age.ts - TTL-based aging for purgeable knowledge entries
|
|
3
|
+
*
|
|
4
|
+
* Finds and deletes stale entries from purgeable sources (captures, observations, teachings)
|
|
5
|
+
* based on per-source/subtype TTL defaults with config.toml overrides.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* const stale = findStaleEntries(config);
|
|
9
|
+
* const deleted = applyAging(stale);
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { openDatabaseBasic } from "./db.js";
|
|
13
|
+
import { deleteEntries } from "./purge.js";
|
|
14
|
+
import { indexers } from "./indexers/index.js";
|
|
15
|
+
import type { LoreConfig } from "./config.js";
|
|
16
|
+
|
|
17
|
+
// Purgeable sources derived from indexer manifests (rebuildExcluded === true)
|
|
18
|
+
const PURGEABLE_SOURCES = Object.entries(indexers)
|
|
19
|
+
.filter(([, plugin]) => plugin.manifest.rebuildExcluded)
|
|
20
|
+
.map(([key]) => key);
|
|
21
|
+
|
|
22
|
+
// Default TTLs (days) — conservative, override via config.toml [aging]
|
|
23
|
+
const DEFAULT_TTLS: Record<string, Record<string, number>> = {
|
|
24
|
+
observations: { inferred: 180, stated: 365, verified: 365 },
|
|
25
|
+
captures: {
|
|
26
|
+
gotcha: 365,
|
|
27
|
+
learning: 365,
|
|
28
|
+
decision: 730,
|
|
29
|
+
preference: 730,
|
|
30
|
+
pattern: 365,
|
|
31
|
+
context: 180,
|
|
32
|
+
general: 365,
|
|
33
|
+
},
|
|
34
|
+
teachings: { teaching: 365 },
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
export interface StaleEntry {
|
|
38
|
+
rowid: number;
|
|
39
|
+
source: string;
|
|
40
|
+
type: string;
|
|
41
|
+
topic: string;
|
|
42
|
+
timestamp: string;
|
|
43
|
+
ageDays: number;
|
|
44
|
+
ttlDays: number;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Resolve TTL for a given source/type using config overrides with DEFAULT_TTLS fallback.
|
|
49
|
+
*
|
|
50
|
+
* Returns null if:
|
|
51
|
+
* - Source is not in DEFAULT_TTLS (safety guard — non-purgeable sources skipped)
|
|
52
|
+
* - TTL is 0 (never-age sentinel)
|
|
53
|
+
*/
|
|
54
|
+
function resolveTtl(
|
|
55
|
+
source: string,
|
|
56
|
+
type: string,
|
|
57
|
+
config: LoreConfig,
|
|
58
|
+
): number | null {
|
|
59
|
+
// Safety guard: skip sources not in DEFAULT_TTLS (non-purgeable)
|
|
60
|
+
if (!DEFAULT_TTLS[source]) return null;
|
|
61
|
+
|
|
62
|
+
// Check config override first
|
|
63
|
+
if (config.aging) {
|
|
64
|
+
if (source === "teachings") {
|
|
65
|
+
// teachings config is a flat number, not nested
|
|
66
|
+
const val = config.aging.teachings;
|
|
67
|
+
if (typeof val === "number") {
|
|
68
|
+
return val === 0 ? null : val;
|
|
69
|
+
}
|
|
70
|
+
} else {
|
|
71
|
+
// observations and captures are nested objects
|
|
72
|
+
const sourceConfig = config.aging[source as keyof typeof config.aging];
|
|
73
|
+
if (sourceConfig && typeof sourceConfig === "object") {
|
|
74
|
+
const typeConfig = (sourceConfig as Record<string, unknown>)[type];
|
|
75
|
+
if (typeof typeConfig === "number") {
|
|
76
|
+
return typeConfig === 0 ? null : typeConfig;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Fall back to DEFAULT_TTLS
|
|
83
|
+
const sourceDefaults = DEFAULT_TTLS[source];
|
|
84
|
+
const ttl = sourceDefaults[type];
|
|
85
|
+
if (ttl === undefined) return null;
|
|
86
|
+
return ttl === 0 ? null : ttl;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Find stale entries from purgeable sources based on TTL.
|
|
91
|
+
*
|
|
92
|
+
* Queries only captures, observations, teachings (purgeable sources).
|
|
93
|
+
* Compares entry timestamp against TTL — entries older than their TTL are stale.
|
|
94
|
+
*/
|
|
95
|
+
export function findStaleEntries(config: LoreConfig): StaleEntry[] {
|
|
96
|
+
const db = openDatabaseBasic(true);
|
|
97
|
+
|
|
98
|
+
try {
|
|
99
|
+
const placeholders = PURGEABLE_SOURCES.map(() => "?").join(", ");
|
|
100
|
+
const sql = `
|
|
101
|
+
SELECT rowid, source, type, topic, timestamp
|
|
102
|
+
FROM search
|
|
103
|
+
WHERE source IN (${placeholders})
|
|
104
|
+
AND timestamp IS NOT NULL
|
|
105
|
+
AND timestamp != ''
|
|
106
|
+
ORDER BY timestamp ASC
|
|
107
|
+
`;
|
|
108
|
+
|
|
109
|
+
const rows = db.prepare(sql).all(...PURGEABLE_SOURCES) as Array<{
|
|
110
|
+
rowid: number;
|
|
111
|
+
source: string;
|
|
112
|
+
type: string;
|
|
113
|
+
topic: string;
|
|
114
|
+
timestamp: string;
|
|
115
|
+
}>;
|
|
116
|
+
|
|
117
|
+
const now = Date.now();
|
|
118
|
+
const stale: StaleEntry[] = [];
|
|
119
|
+
|
|
120
|
+
for (const row of rows) {
|
|
121
|
+
const parsed = new Date(row.timestamp).getTime();
|
|
122
|
+
if (isNaN(parsed)) continue; // Malformed timestamp — skip
|
|
123
|
+
|
|
124
|
+
const ageDays = (now - parsed) / 86400000;
|
|
125
|
+
if (ageDays <= 0) continue; // Future timestamp — skip
|
|
126
|
+
|
|
127
|
+
const ttlDays = resolveTtl(row.source, row.type, config);
|
|
128
|
+
if (ttlDays === null) continue; // No TTL or non-purgeable — skip
|
|
129
|
+
|
|
130
|
+
if (ageDays > ttlDays) {
|
|
131
|
+
stale.push({
|
|
132
|
+
rowid: row.rowid,
|
|
133
|
+
source: row.source,
|
|
134
|
+
type: row.type,
|
|
135
|
+
topic: row.topic,
|
|
136
|
+
timestamp: row.timestamp,
|
|
137
|
+
ageDays: Math.floor(ageDays),
|
|
138
|
+
ttlDays,
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
return stale;
|
|
144
|
+
} finally {
|
|
145
|
+
db.close();
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* Delete stale entries via deleteEntries() from purge.ts.
|
|
151
|
+
*
|
|
152
|
+
* No matchContents provided — log.jsonl cleanup is skipped for TTL-based aging.
|
|
153
|
+
*
|
|
154
|
+
* @returns Count of deleted entries
|
|
155
|
+
*/
|
|
156
|
+
export function applyAging(staleEntries: StaleEntry[]): number {
|
|
157
|
+
if (staleEntries.length === 0) return 0;
|
|
158
|
+
|
|
159
|
+
const rowids = staleEntries.map((e) => e.rowid);
|
|
160
|
+
const result = deleteEntries(rowids);
|
|
161
|
+
return result.deleted;
|
|
162
|
+
}
|
package/lib/config.ts
CHANGED
|
@@ -15,6 +15,24 @@ import { readFileSync } from "fs";
|
|
|
15
15
|
import { homedir } from "os";
|
|
16
16
|
import { parse as parseToml } from "@iarna/toml";
|
|
17
17
|
|
|
18
|
+
export interface AgingConfig {
|
|
19
|
+
observations?: {
|
|
20
|
+
inferred?: number;
|
|
21
|
+
stated?: number;
|
|
22
|
+
verified?: number;
|
|
23
|
+
};
|
|
24
|
+
captures?: {
|
|
25
|
+
gotcha?: number;
|
|
26
|
+
learning?: number;
|
|
27
|
+
decision?: number;
|
|
28
|
+
preference?: number;
|
|
29
|
+
pattern?: number;
|
|
30
|
+
context?: number;
|
|
31
|
+
general?: number;
|
|
32
|
+
};
|
|
33
|
+
teachings?: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
18
36
|
export interface LoreConfig {
|
|
19
37
|
paths: {
|
|
20
38
|
data: string;
|
|
@@ -38,6 +56,7 @@ export interface LoreConfig {
|
|
|
38
56
|
model: string;
|
|
39
57
|
dimensions: number;
|
|
40
58
|
};
|
|
59
|
+
aging?: AgingConfig;
|
|
41
60
|
}
|
|
42
61
|
|
|
43
62
|
let cachedConfig: LoreConfig | null = null;
|
|
@@ -176,5 +195,10 @@ export function getConfig(): LoreConfig {
|
|
|
176
195
|
},
|
|
177
196
|
};
|
|
178
197
|
|
|
198
|
+
// Aging config (optional section) — missing [aging] uses DEFAULT_TTLS in age.ts
|
|
199
|
+
if (parsed.aging && typeof parsed.aging === "object") {
|
|
200
|
+
cachedConfig.aging = parsed.aging as AgingConfig;
|
|
201
|
+
}
|
|
202
|
+
|
|
179
203
|
return cachedConfig!;
|
|
180
204
|
}
|
package/lib/contradiction.ts
CHANGED
|
@@ -14,7 +14,8 @@
|
|
|
14
14
|
* // result.deleteRowid: number (only set for DELETE+ADD)
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
|
-
import {
|
|
17
|
+
import type { HybridResult } from "./semantic.js";
|
|
18
|
+
import { rawSearch } from "./search.js";
|
|
18
19
|
import { PURGEABLE_SOURCES } from "./purge.js";
|
|
19
20
|
import { complete } from "@voidwire/llm-core";
|
|
20
21
|
import type { CaptureEvent } from "./capture.js";
|
|
@@ -57,9 +58,9 @@ export function isContradictionCheckable(source: string): boolean {
|
|
|
57
58
|
|
|
58
59
|
/**
|
|
59
60
|
* Find existing entries that may contradict or duplicate the new event.
|
|
60
|
-
* Uses
|
|
61
|
+
* Uses raw FTS5 search scoped by source for precision.
|
|
61
62
|
*
|
|
62
|
-
* Opens its own read connection (
|
|
63
|
+
* Opens its own read connection (rawSearch uses openDatabaseBasic(true)).
|
|
63
64
|
* Safe in WAL mode — concurrent reads with the write connection in indexAndEmbed.
|
|
64
65
|
*
|
|
65
66
|
* Note: entries inserted in the current indexAndEmbed batch are not visible
|
|
@@ -76,10 +77,23 @@ export async function findCandidates(
|
|
|
76
77
|
|
|
77
78
|
if (!content) return [];
|
|
78
79
|
|
|
79
|
-
|
|
80
|
+
const raw = rawSearch(content, {
|
|
80
81
|
source,
|
|
81
82
|
limit: CANDIDATE_LIMIT,
|
|
82
83
|
});
|
|
84
|
+
|
|
85
|
+
return raw.map((r) => ({
|
|
86
|
+
rowid: r.rowid,
|
|
87
|
+
source: r.source,
|
|
88
|
+
title: r.title,
|
|
89
|
+
content: r.content,
|
|
90
|
+
metadata: r.metadata,
|
|
91
|
+
topic: r.topic,
|
|
92
|
+
type: r.type,
|
|
93
|
+
score: 0,
|
|
94
|
+
vectorScore: 0,
|
|
95
|
+
textScore: 0,
|
|
96
|
+
}));
|
|
83
97
|
}
|
|
84
98
|
|
|
85
99
|
/**
|
package/lib/db.ts
CHANGED
|
@@ -7,6 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
import { Database } from "bun:sqlite";
|
|
9
9
|
import { existsSync } from "fs";
|
|
10
|
+
import { load } from "sqlite-vec";
|
|
10
11
|
import { getConfig } from "./config";
|
|
11
12
|
|
|
12
13
|
// Lazy initialization — deferred until first database open
|
|
@@ -48,15 +49,7 @@ export function openDatabase(readonly = false): Database {
|
|
|
48
49
|
? new Database(dbPath, { readonly: true })
|
|
49
50
|
: new Database(dbPath);
|
|
50
51
|
|
|
51
|
-
//
|
|
52
|
-
const vecPath = getConfig().database.sqlite_vec;
|
|
53
|
-
if (!vecPath) {
|
|
54
|
-
throw new Error(
|
|
55
|
-
"sqlite-vec path not configured. Run lore init to detect and configure it.",
|
|
56
|
-
);
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
db.loadExtension(vecPath);
|
|
52
|
+
load(db); // loads vec0 from npm package
|
|
60
53
|
|
|
61
54
|
return db;
|
|
62
55
|
}
|
package/lib/embed.ts
CHANGED
|
@@ -108,18 +108,14 @@ async function processBatch(db: Database, batch: FTSEntry[]): Promise<void> {
|
|
|
108
108
|
}
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
-
// Embed cache misses
|
|
112
|
-
|
|
113
|
-
const
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
const { idx, hash } = toEmbed[i];
|
|
120
|
-
embeddings[idx] = results[i].embedding;
|
|
121
|
-
cacheEmbedding(db, hash, results[i].embedding, MODEL_NAME);
|
|
122
|
-
}
|
|
111
|
+
// Embed cache misses sequentially (server is single-threaded)
|
|
112
|
+
for (const { idx, contentString, hash } of toEmbed) {
|
|
113
|
+
const result = await embed({
|
|
114
|
+
text: contentString,
|
|
115
|
+
prefix: "search_document",
|
|
116
|
+
});
|
|
117
|
+
embeddings[idx] = result.embedding;
|
|
118
|
+
cacheEmbedding(db, hash, result.embedding, MODEL_NAME);
|
|
123
119
|
}
|
|
124
120
|
|
|
125
121
|
// Insert all embeddings
|
package/lib/indexer.ts
CHANGED
|
@@ -10,13 +10,12 @@
|
|
|
10
10
|
* - Orchestration (runIndexer)
|
|
11
11
|
*
|
|
12
12
|
* Usage:
|
|
13
|
-
* import { runIndexer, type
|
|
14
|
-
*
|
|
15
|
-
* ctx.insert({ source: "mySource", title: "...", content: "...", topic: "..." });
|
|
16
|
-
* };
|
|
13
|
+
* import { runIndexer, type IndexerPlugin } from "./indexer";
|
|
14
|
+
* // Register plugins via lib/indexers/index.ts barrel file
|
|
17
15
|
*/
|
|
18
16
|
|
|
19
|
-
import { Database } from "bun:sqlite";
|
|
17
|
+
import type { Database } from "bun:sqlite";
|
|
18
|
+
import { openDatabaseBasic } from "./db.js";
|
|
20
19
|
import { createHash } from "crypto";
|
|
21
20
|
import { existsSync } from "fs";
|
|
22
21
|
import { getConfig, type LoreConfig } from "./config";
|
|
@@ -40,17 +39,34 @@ export interface IndexerContext {
|
|
|
40
39
|
|
|
41
40
|
export type IndexerFunction = (ctx: IndexerContext) => Promise<void>;
|
|
42
41
|
|
|
42
|
+
export interface IndexerManifest {
|
|
43
|
+
name: string;
|
|
44
|
+
description: string;
|
|
45
|
+
requiredConfig: string[];
|
|
46
|
+
optionalConfig: string[];
|
|
47
|
+
rebuildExcluded: boolean;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export interface IndexerPlugin {
|
|
51
|
+
manifest: IndexerManifest;
|
|
52
|
+
canRun: (config: LoreConfig) => boolean;
|
|
53
|
+
run: IndexerFunction;
|
|
54
|
+
}
|
|
55
|
+
|
|
43
56
|
/**
|
|
44
|
-
*
|
|
45
|
-
*
|
|
46
|
-
* resolution. Rebuilding from log.jsonl would resurrect purged/superseded
|
|
47
|
-
* entries, breaking the contradiction resolution invariant.
|
|
57
|
+
* Resolve a dot-notation config path against a nested object.
|
|
58
|
+
* Used to derive human-readable missing config keys from manifest.requiredConfig.
|
|
48
59
|
*/
|
|
49
|
-
export
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
60
|
+
export function getNestedValue(
|
|
61
|
+
obj: Record<string, unknown>,
|
|
62
|
+
path: string,
|
|
63
|
+
): unknown {
|
|
64
|
+
return path.split(".").reduce<unknown>((acc, key) => {
|
|
65
|
+
if (acc && typeof acc === "object")
|
|
66
|
+
return (acc as Record<string, unknown>)[key];
|
|
67
|
+
return undefined;
|
|
68
|
+
}, obj as unknown);
|
|
69
|
+
}
|
|
54
70
|
|
|
55
71
|
/**
|
|
56
72
|
* Check if a path is configured and exists on disk.
|
|
@@ -216,14 +232,15 @@ export function createIndexerContext(
|
|
|
216
232
|
/**
|
|
217
233
|
* Main indexing orchestrator.
|
|
218
234
|
* Runs registered indexers for the given source (or all).
|
|
235
|
+
* Reports per-indexer status to stderr: checkmark ran, skip skipped, cross failed.
|
|
219
236
|
*/
|
|
220
237
|
export async function runIndexer(
|
|
221
238
|
source: string | "all",
|
|
222
239
|
rebuild: boolean,
|
|
223
|
-
registry: Record<string,
|
|
240
|
+
registry: Record<string, IndexerPlugin>,
|
|
224
241
|
): Promise<void> {
|
|
225
242
|
const config = getConfig();
|
|
226
|
-
const db =
|
|
243
|
+
const db = openDatabaseBasic(false);
|
|
227
244
|
|
|
228
245
|
try {
|
|
229
246
|
db.run("PRAGMA busy_timeout = 5000");
|
|
@@ -237,31 +254,59 @@ export async function runIndexer(
|
|
|
237
254
|
const toRun = source === "all" ? Object.keys(registry) : [source];
|
|
238
255
|
|
|
239
256
|
for (const src of toRun) {
|
|
240
|
-
const
|
|
241
|
-
if (!
|
|
257
|
+
const plugin = registry[src];
|
|
258
|
+
if (!plugin) {
|
|
242
259
|
console.error(`Unknown source: ${src}`);
|
|
243
260
|
continue;
|
|
244
261
|
}
|
|
245
262
|
|
|
263
|
+
// Check canRun gate — skip indexers with missing config
|
|
264
|
+
if (!plugin.canRun(config)) {
|
|
265
|
+
const missing = plugin.manifest.requiredConfig
|
|
266
|
+
.filter(
|
|
267
|
+
(k) =>
|
|
268
|
+
!getNestedValue(config as unknown as Record<string, unknown>, k),
|
|
269
|
+
)
|
|
270
|
+
.join(", ");
|
|
271
|
+
console.error(`\u23ED\uFE0F ${src}: skipped (missing: ${missing})`);
|
|
272
|
+
continue;
|
|
273
|
+
}
|
|
274
|
+
|
|
246
275
|
// Skip rebuild-excluded sources — DB is source of truth, not log.jsonl
|
|
247
|
-
if (rebuild &&
|
|
248
|
-
console.
|
|
249
|
-
|
|
276
|
+
if (rebuild && plugin.manifest.rebuildExcluded) {
|
|
277
|
+
console.error(
|
|
278
|
+
`\u23ED\uFE0F ${src}: skipped (DB is source of truth \u2014 not rebuilt from log)`,
|
|
250
279
|
);
|
|
251
280
|
continue;
|
|
252
281
|
}
|
|
253
282
|
|
|
254
|
-
console.log(`Indexing ${src}...`);
|
|
255
|
-
|
|
256
283
|
// Clear source if rebuilding
|
|
257
284
|
if (rebuild) {
|
|
258
285
|
db.run("DELETE FROM search WHERE source = ?", [src]);
|
|
259
286
|
}
|
|
260
287
|
|
|
261
|
-
|
|
288
|
+
const countBefore = (
|
|
289
|
+
db
|
|
290
|
+
.query("SELECT COUNT(*) as n FROM search WHERE source = ?")
|
|
291
|
+
.get(src) as { n: number }
|
|
292
|
+
).n;
|
|
293
|
+
|
|
294
|
+
try {
|
|
295
|
+
await plugin.run(ctx);
|
|
296
|
+
const countAfter = (
|
|
297
|
+
db
|
|
298
|
+
.query("SELECT COUNT(*) as n FROM search WHERE source = ?")
|
|
299
|
+
.get(src) as { n: number }
|
|
300
|
+
).n;
|
|
301
|
+
console.error(`\u2705 ${src}: ${countAfter - countBefore} entries`);
|
|
302
|
+
} catch (err) {
|
|
303
|
+
console.error(
|
|
304
|
+
`\u274C ${src}: ${err instanceof Error ? err.message : String(err)}`,
|
|
305
|
+
);
|
|
306
|
+
}
|
|
262
307
|
}
|
|
263
308
|
|
|
264
|
-
console.
|
|
309
|
+
console.error("Indexing complete");
|
|
265
310
|
} finally {
|
|
266
311
|
db.close();
|
|
267
312
|
}
|
package/lib/indexers/blogs.ts
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
import { readdirSync, readFileSync, statSync, existsSync } from "fs";
|
|
15
15
|
import { join, basename } from "path";
|
|
16
|
-
import { checkPath, type IndexerContext } from "../indexer";
|
|
16
|
+
import { checkPath, type IndexerContext, type IndexerPlugin } from "../indexer";
|
|
17
17
|
|
|
18
18
|
function walkMarkdownFiles(dir: string, files: string[] = []): string[] {
|
|
19
19
|
if (!existsSync(dir)) return files;
|
|
@@ -33,7 +33,7 @@ function walkMarkdownFiles(dir: string, files: string[] = []): string[] {
|
|
|
33
33
|
return files;
|
|
34
34
|
}
|
|
35
35
|
|
|
36
|
-
|
|
36
|
+
async function indexBlogs(ctx: IndexerContext): Promise<void> {
|
|
37
37
|
const blogsDir = ctx.config.paths.blogs;
|
|
38
38
|
if (!checkPath("blogs", "paths.blogs", blogsDir)) return;
|
|
39
39
|
|
|
@@ -149,3 +149,15 @@ export async function indexBlogs(ctx: IndexerContext): Promise<void> {
|
|
|
149
149
|
}
|
|
150
150
|
}
|
|
151
151
|
}
|
|
152
|
+
|
|
153
|
+
export const blogsPlugin: IndexerPlugin = {
|
|
154
|
+
manifest: {
|
|
155
|
+
name: "blogs",
|
|
156
|
+
description: "Indexes Hugo blog posts from the content directory",
|
|
157
|
+
requiredConfig: ["paths.blogs"],
|
|
158
|
+
optionalConfig: [],
|
|
159
|
+
rebuildExcluded: false,
|
|
160
|
+
},
|
|
161
|
+
canRun: (config) => !!config.paths?.blogs,
|
|
162
|
+
run: indexBlogs,
|
|
163
|
+
};
|