@gmickel/gno 1.5.2 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +5 -2
- package/src/cli/commands/doctor.ts +179 -1
- package/src/cli/commands/embed.ts +217 -242
- package/src/embed/backlog.ts +92 -45
- package/src/embed/fingerprint.ts +37 -0
- package/src/embed/retry.ts +137 -0
- package/src/llm/nodeLlamaCpp/embedding.ts +81 -19
- package/src/sdk/embed.ts +134 -59
- package/src/store/migrations/008-vector-fingerprints.ts +25 -0
- package/src/store/migrations/index.ts +2 -1
- package/src/store/sqlite/adapter.ts +20 -6
- package/src/store/types.ts +1 -0
- package/src/store/vector/freshness.ts +34 -0
- package/src/store/vector/sqlite-vec.ts +5 -2
- package/src/store/vector/stats.ts +20 -2
- package/src/store/vector/types.ts +3 -0
|
@@ -53,6 +53,7 @@ import { analyzeGraphCommunities } from "../../core/graph-analysis";
|
|
|
53
53
|
import { normalizeWikiName, stripWikiMdExt } from "../../core/links";
|
|
54
54
|
import { migrations, runMigrations } from "../migrations";
|
|
55
55
|
import { err, ok } from "../types";
|
|
56
|
+
import { getStoredEmbeddingFingerprint } from "../vector/freshness";
|
|
56
57
|
import { modelTableName } from "../vector/sqlite-vec";
|
|
57
58
|
import { loadFts5Snowball } from "./fts5-snowball";
|
|
58
59
|
|
|
@@ -3065,10 +3066,14 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
|
|
|
3065
3066
|
|
|
3066
3067
|
async getStatus(options?: {
|
|
3067
3068
|
embedModel?: string;
|
|
3069
|
+
embedFingerprint?: string;
|
|
3068
3070
|
}): Promise<StoreResult<IndexStatus>> {
|
|
3069
3071
|
try {
|
|
3070
3072
|
const db = this.ensureOpen();
|
|
3071
3073
|
const embedModel = options?.embedModel ?? null;
|
|
3074
|
+
const embedFingerprint =
|
|
3075
|
+
options?.embedFingerprint ??
|
|
3076
|
+
(embedModel ? getStoredEmbeddingFingerprint(db, embedModel) : null);
|
|
3072
3077
|
|
|
3073
3078
|
// Get version
|
|
3074
3079
|
const versionRow = db
|
|
@@ -3097,7 +3102,7 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
|
|
|
3097
3102
|
}
|
|
3098
3103
|
|
|
3099
3104
|
const collectionStats = db
|
|
3100
|
-
.query<CollectionStat, [string | null, string | null]>(
|
|
3105
|
+
.query<CollectionStat, [string | null, string | null, string | null]>(
|
|
3101
3106
|
`
|
|
3102
3107
|
SELECT
|
|
3103
3108
|
c.name,
|
|
@@ -3120,7 +3125,10 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
|
|
|
3120
3125
|
SELECT 1 FROM content_vectors cv
|
|
3121
3126
|
WHERE cv.mirror_hash = cc.mirror_hash
|
|
3122
3127
|
AND cv.seq = cc.seq
|
|
3123
|
-
AND (? IS NULL OR
|
|
3128
|
+
AND (? IS NULL OR (
|
|
3129
|
+
cv.model = ?
|
|
3130
|
+
AND cv.embed_fingerprint = ?
|
|
3131
|
+
))
|
|
3124
3132
|
AND cv.embedded_at >= cc.created_at
|
|
3125
3133
|
)) as embedded_count
|
|
3126
3134
|
FROM collections c
|
|
@@ -3128,7 +3136,7 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
|
|
|
3128
3136
|
GROUP BY c.name, c.path
|
|
3129
3137
|
`
|
|
3130
3138
|
)
|
|
3131
|
-
.all(embedModel, embedModel);
|
|
3139
|
+
.all(embedModel, embedModel, embedFingerprint);
|
|
3132
3140
|
|
|
3133
3141
|
// Get totals
|
|
3134
3142
|
const totalsRow = db
|
|
@@ -3152,7 +3160,10 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
|
|
|
3152
3160
|
// Embedding backlog: chunks from active docs without vectors
|
|
3153
3161
|
// Uses EXISTS to avoid duplicates when multiple docs share mirror_hash
|
|
3154
3162
|
const backlogRow = db
|
|
3155
|
-
.query<
|
|
3163
|
+
.query<
|
|
3164
|
+
{ count: number },
|
|
3165
|
+
[string | null, string | null, string | null]
|
|
3166
|
+
>(
|
|
3156
3167
|
`
|
|
3157
3168
|
SELECT COUNT(*) as count FROM content_chunks c
|
|
3158
3169
|
WHERE EXISTS (
|
|
@@ -3163,12 +3174,15 @@ export class SqliteAdapter implements StorePort, SqliteDbProvider {
|
|
|
3163
3174
|
SELECT 1 FROM content_vectors v
|
|
3164
3175
|
WHERE v.mirror_hash = c.mirror_hash
|
|
3165
3176
|
AND v.seq = c.seq
|
|
3166
|
-
AND (? IS NULL OR
|
|
3177
|
+
AND (? IS NULL OR (
|
|
3178
|
+
v.model = ?
|
|
3179
|
+
AND v.embed_fingerprint = ?
|
|
3180
|
+
))
|
|
3167
3181
|
AND v.embedded_at >= c.created_at
|
|
3168
3182
|
)
|
|
3169
3183
|
`
|
|
3170
3184
|
)
|
|
3171
|
-
.get(embedModel, embedModel);
|
|
3185
|
+
.get(embedModel, embedModel, embedFingerprint);
|
|
3172
3186
|
|
|
3173
3187
|
// Recent errors (last 24h)
|
|
3174
3188
|
const recentErrorsRow = db
|
package/src/store/types.ts
CHANGED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector freshness helpers.
|
|
3
|
+
*
|
|
4
|
+
* @module src/store/vector/freshness
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import type { Database } from "bun:sqlite";
|
|
8
|
+
|
|
9
|
+
import { getEmbeddingFingerprint } from "../../embed/fingerprint";
|
|
10
|
+
|
|
11
|
+
export function getStoredEmbeddingDimensions(
|
|
12
|
+
db: Database,
|
|
13
|
+
model: string
|
|
14
|
+
): number | undefined {
|
|
15
|
+
const row = db
|
|
16
|
+
.prepare("SELECT embedding FROM content_vectors WHERE model = ? LIMIT 1")
|
|
17
|
+
.get(model) as { embedding: Uint8Array } | undefined;
|
|
18
|
+
|
|
19
|
+
if (!row?.embedding) {
|
|
20
|
+
return undefined;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
return row.embedding.byteLength / Float32Array.BYTES_PER_ELEMENT;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function getStoredEmbeddingFingerprint(
|
|
27
|
+
db: Database,
|
|
28
|
+
modelUri: string
|
|
29
|
+
): string {
|
|
30
|
+
return getEmbeddingFingerprint({
|
|
31
|
+
modelUri,
|
|
32
|
+
dimensions: getStoredEmbeddingDimensions(db, modelUri),
|
|
33
|
+
});
|
|
34
|
+
}
|
|
@@ -116,8 +116,10 @@ export async function createVectorIndexPort(
|
|
|
116
116
|
|
|
117
117
|
// Prepared statements for content_vectors table
|
|
118
118
|
const upsertVectorStmt = db.prepare(`
|
|
119
|
-
INSERT OR REPLACE INTO content_vectors (
|
|
120
|
-
|
|
119
|
+
INSERT OR REPLACE INTO content_vectors (
|
|
120
|
+
mirror_hash, seq, model, embed_fingerprint, embedding, embedded_at
|
|
121
|
+
)
|
|
122
|
+
VALUES (?, ?, ?, ?, ?, datetime('now'))
|
|
121
123
|
`);
|
|
122
124
|
|
|
123
125
|
const deleteVectorStmt = db.prepare(`
|
|
@@ -172,6 +174,7 @@ export async function createVectorIndexPort(
|
|
|
172
174
|
row.mirrorHash,
|
|
173
175
|
row.seq,
|
|
174
176
|
row.model,
|
|
177
|
+
row.embedFingerprint,
|
|
175
178
|
encodeEmbedding(row.embedding)
|
|
176
179
|
);
|
|
177
180
|
}
|
|
@@ -65,6 +65,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
|
|
|
65
65
|
|
|
66
66
|
countBacklog(
|
|
67
67
|
model: string,
|
|
68
|
+
embedFingerprint: string,
|
|
68
69
|
options?: { collection?: string }
|
|
69
70
|
): Promise<StoreResult<number>> {
|
|
70
71
|
try {
|
|
@@ -80,10 +81,13 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
|
|
|
80
81
|
WHERE v.mirror_hash = c.mirror_hash
|
|
81
82
|
AND v.seq = c.seq
|
|
82
83
|
AND v.model = ?
|
|
84
|
+
AND v.embed_fingerprint = ?
|
|
83
85
|
AND v.embedded_at >= c.created_at
|
|
84
86
|
)
|
|
85
87
|
`;
|
|
86
|
-
const result = db
|
|
88
|
+
const result = db
|
|
89
|
+
.prepare(sql)
|
|
90
|
+
.get(...activeDoc.params, model, embedFingerprint) as {
|
|
87
91
|
count: number;
|
|
88
92
|
};
|
|
89
93
|
return Promise.resolve(ok(result.count));
|
|
@@ -99,6 +103,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
|
|
|
99
103
|
|
|
100
104
|
getBacklog(
|
|
101
105
|
model: string,
|
|
106
|
+
embedFingerprint: string,
|
|
102
107
|
options?: {
|
|
103
108
|
limit?: number;
|
|
104
109
|
after?: { mirrorHash: string; seq: number };
|
|
@@ -123,6 +128,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
|
|
|
123
128
|
WHERE v.mirror_hash = c.mirror_hash
|
|
124
129
|
AND v.seq = c.seq
|
|
125
130
|
AND v.model = ?
|
|
131
|
+
AND v.embed_fingerprint = ?
|
|
126
132
|
) THEN 'new'
|
|
127
133
|
ELSE 'changed'
|
|
128
134
|
END as reason
|
|
@@ -133,6 +139,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
|
|
|
133
139
|
WHERE v.mirror_hash = c.mirror_hash
|
|
134
140
|
AND v.seq = c.seq
|
|
135
141
|
AND v.model = ?
|
|
142
|
+
AND v.embed_fingerprint = ?
|
|
136
143
|
AND v.embedded_at >= c.created_at
|
|
137
144
|
)
|
|
138
145
|
AND (c.mirror_hash > ? OR (c.mirror_hash = ? AND c.seq > ?))
|
|
@@ -148,6 +155,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
|
|
|
148
155
|
WHERE v.mirror_hash = c.mirror_hash
|
|
149
156
|
AND v.seq = c.seq
|
|
150
157
|
AND v.model = ?
|
|
158
|
+
AND v.embed_fingerprint = ?
|
|
151
159
|
) THEN 'new'
|
|
152
160
|
ELSE 'changed'
|
|
153
161
|
END as reason
|
|
@@ -158,6 +166,7 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
|
|
|
158
166
|
WHERE v.mirror_hash = c.mirror_hash
|
|
159
167
|
AND v.seq = c.seq
|
|
160
168
|
AND v.model = ?
|
|
169
|
+
AND v.embed_fingerprint = ?
|
|
161
170
|
AND v.embedded_at >= c.created_at
|
|
162
171
|
)
|
|
163
172
|
ORDER BY c.mirror_hash, c.seq
|
|
@@ -167,14 +176,23 @@ export function createVectorStatsPort(db: Database): VectorStatsPort {
|
|
|
167
176
|
const params = after
|
|
168
177
|
? [
|
|
169
178
|
model,
|
|
179
|
+
embedFingerprint,
|
|
170
180
|
...activeDoc.params,
|
|
171
181
|
model,
|
|
182
|
+
embedFingerprint,
|
|
172
183
|
after.mirrorHash,
|
|
173
184
|
after.mirrorHash,
|
|
174
185
|
after.seq,
|
|
175
186
|
limit,
|
|
176
187
|
]
|
|
177
|
-
: [
|
|
188
|
+
: [
|
|
189
|
+
model,
|
|
190
|
+
embedFingerprint,
|
|
191
|
+
...activeDoc.params,
|
|
192
|
+
model,
|
|
193
|
+
embedFingerprint,
|
|
194
|
+
limit,
|
|
195
|
+
];
|
|
178
196
|
|
|
179
197
|
const results = db.prepare(sql).all(...params) as BacklogItem[];
|
|
180
198
|
return Promise.resolve(ok(results));
|
|
@@ -16,6 +16,7 @@ export interface VectorRow {
|
|
|
16
16
|
mirrorHash: string;
|
|
17
17
|
seq: number;
|
|
18
18
|
model: string;
|
|
19
|
+
embedFingerprint: string;
|
|
19
20
|
embedding: Float32Array;
|
|
20
21
|
// embeddedAt is set by DB via datetime('now')
|
|
21
22
|
}
|
|
@@ -112,12 +113,14 @@ export interface VectorStatsPort {
|
|
|
112
113
|
/** Count chunks needing embedding for a model */
|
|
113
114
|
countBacklog(
|
|
114
115
|
model: string,
|
|
116
|
+
embedFingerprint: string,
|
|
115
117
|
options?: { collection?: string }
|
|
116
118
|
): Promise<StoreResult<number>>;
|
|
117
119
|
|
|
118
120
|
/** Get chunks needing embedding for a model (seek pagination) */
|
|
119
121
|
getBacklog(
|
|
120
122
|
model: string,
|
|
123
|
+
embedFingerprint: string,
|
|
121
124
|
options?: { limit?: number; after?: BacklogCursor; collection?: string }
|
|
122
125
|
): Promise<StoreResult<BacklogItem[]>>;
|
|
123
126
|
}
|