@hasna/knowledge 0.2.12 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -30,6 +30,7 @@ export interface OutboxConsumeResult {
30
30
  deleted_sources: number;
31
31
  moved_sources: number;
32
32
  permission_updates: number;
33
+ vector_entries_deleted: number;
33
34
  }
34
35
 
35
36
  interface NormalizedOutboxEvent {
@@ -289,12 +290,16 @@ function revisionIdsForEvent(db: Database, sourceId: string, event: NormalizedOu
289
290
  ).all(sourceId).map((row) => row.id);
290
291
  }
291
292
 
292
- function invalidateRevision(db: Database, revisionId: string): { chunksDeleted: number; embeddingsDeleted: number } {
293
+ function invalidateRevision(db: Database, revisionId: string): { chunksDeleted: number; embeddingsDeleted: number; vectorEntriesDeleted: number } {
293
294
  const chunks = db.query<{ id: string }, [string]>('SELECT id FROM chunks WHERE source_revision_id = ?').all(revisionId);
294
295
  let embeddingsDeleted = 0;
296
+ let vectorEntriesDeleted = 0;
295
297
  for (const chunk of chunks) {
296
298
  const row = db.query<{ n: number }, [string]>('SELECT COUNT(*) AS n FROM chunk_embeddings WHERE chunk_id = ?').get(chunk.id);
297
299
  embeddingsDeleted += row?.n ?? 0;
300
+ const vectorRow = db.query<{ n: number }, [string]>('SELECT COUNT(*) AS n FROM vector_index_entries WHERE chunk_id = ?').get(chunk.id);
301
+ vectorEntriesDeleted += vectorRow?.n ?? 0;
302
+ db.run('DELETE FROM vector_index_entries WHERE chunk_id = ?', [chunk.id]);
298
303
  db.run('DELETE FROM chunk_embeddings WHERE chunk_id = ?', [chunk.id]);
299
304
  db.run('DELETE FROM chunks_fts WHERE chunk_id = ?', [chunk.id]);
300
305
  }
@@ -304,7 +309,7 @@ function invalidateRevision(db: Database, revisionId: string): { chunksDeleted:
304
309
  'UPDATE source_revisions SET metadata_json = ? WHERE id = ?',
305
310
  [mergeJson(revision?.metadata_json, { reindex_required: true, invalidated_at: new Date().toISOString() }), revisionId],
306
311
  );
307
- return { chunksDeleted: chunks.length, embeddingsDeleted };
312
+ return { chunksDeleted: chunks.length, embeddingsDeleted, vectorEntriesDeleted };
308
313
  }
309
314
 
310
315
  function isDeleteEvent(eventType: string, status: string | null): boolean {
@@ -349,6 +354,7 @@ export async function consumeOpenFilesOutbox(options: OutboxConsumeOptions): Pro
349
354
  const revisionsTouched = new Set<string>();
350
355
  let chunksDeleted = 0;
351
356
  let embeddingsDeleted = 0;
357
+ let vectorEntriesDeleted = 0;
352
358
  let staleRevisions = 0;
353
359
  let deletedSources = 0;
354
360
  let movedSources = 0;
@@ -376,6 +382,7 @@ export async function consumeOpenFilesOutbox(options: OutboxConsumeOptions): Pro
376
382
  const invalidation = invalidateRevision(db, revisionId);
377
383
  chunksDeleted += invalidation.chunksDeleted;
378
384
  embeddingsDeleted += invalidation.embeddingsDeleted;
385
+ vectorEntriesDeleted += invalidation.vectorEntriesDeleted;
379
386
  staleRevisions += 1;
380
387
  }
381
388
 
@@ -429,6 +436,7 @@ export async function consumeOpenFilesOutbox(options: OutboxConsumeOptions): Pro
429
436
  revisions: revisionsTouched.size,
430
437
  chunks_deleted: chunksDeleted,
431
438
  embeddings_deleted: embeddingsDeleted,
439
+ vector_entries_deleted: vectorEntriesDeleted,
432
440
  },
433
441
  created_at: now,
434
442
  });
@@ -442,6 +450,7 @@ export async function consumeOpenFilesOutbox(options: OutboxConsumeOptions): Pro
442
450
  revisions_touched: revisionsTouched.size,
443
451
  chunks_deleted: chunksDeleted,
444
452
  embeddings_deleted: embeddingsDeleted,
453
+ vector_entries_deleted: vectorEntriesDeleted,
445
454
  stale_revisions: staleRevisions,
446
455
  deleted_sources: deletedSources,
447
456
  moved_sources: movedSources,
@@ -0,0 +1,93 @@
1
+ export interface KnowledgeProvenance {
2
+ source_owner: 'open-files';
3
+ source_ref: string | null;
4
+ source_uri: string | null;
5
+ source_kind: string | null;
6
+ source_revision_id: string | null;
7
+ revision: string | null;
8
+ hash: string | null;
9
+ chunk_id: string | null;
10
+ start_offset: number | null;
11
+ end_offset: number | null;
12
+ status: string | null;
13
+ read_only: true;
14
+ citation_required: boolean;
15
+ resolver: string | null;
16
+ stale: boolean;
17
+ }
18
+
19
+ export interface GeneratedArtifactProvenance {
20
+ source_owner: 'open-files';
21
+ generated_from: string;
22
+ artifact_key: string;
23
+ source_refs: string[];
24
+ read_only_sources: true;
25
+ citation_required: boolean;
26
+ raw_source_bytes_stored_in_open_knowledge: false;
27
+ }
28
+
29
+ export interface SourceProvenanceInput {
30
+ source_ref?: string | null;
31
+ source_uri?: string | null;
32
+ source_kind?: string | null;
33
+ source_revision_id?: string | null;
34
+ revision?: string | null;
35
+ hash?: string | null;
36
+ chunk_id?: string | null;
37
+ start_offset?: number | null;
38
+ end_offset?: number | null;
39
+ status?: string | null;
40
+ resolver?: string | null;
41
+ }
42
+
43
+ export function isStaleStatus(status: string | null | undefined): boolean {
44
+ return ['deleted', 'stale', 'invalidated', 'reindex_required'].includes((status ?? '').toLowerCase());
45
+ }
46
+
47
+ export function sourceProvenance(input: SourceProvenanceInput): KnowledgeProvenance {
48
+ const status = input.status ?? null;
49
+ return {
50
+ source_owner: 'open-files',
51
+ source_ref: input.source_ref ?? null,
52
+ source_uri: input.source_uri ?? null,
53
+ source_kind: input.source_kind ?? null,
54
+ source_revision_id: input.source_revision_id ?? null,
55
+ revision: input.revision ?? null,
56
+ hash: input.hash ?? null,
57
+ chunk_id: input.chunk_id ?? null,
58
+ start_offset: input.start_offset ?? null,
59
+ end_offset: input.end_offset ?? null,
60
+ status,
61
+ read_only: true,
62
+ citation_required: true,
63
+ resolver: input.resolver ?? null,
64
+ stale: isStaleStatus(status),
65
+ };
66
+ }
67
+
68
+ export function generatedArtifactProvenance(input: {
69
+ generated_from: string;
70
+ artifact_key: string;
71
+ source_refs?: string[];
72
+ citation_required?: boolean;
73
+ }): GeneratedArtifactProvenance {
74
+ return {
75
+ source_owner: 'open-files',
76
+ generated_from: input.generated_from,
77
+ artifact_key: input.artifact_key,
78
+ source_refs: input.source_refs ?? [],
79
+ read_only_sources: true,
80
+ citation_required: input.citation_required ?? true,
81
+ raw_source_bytes_stored_in_open_knowledge: false,
82
+ };
83
+ }
84
+
85
+ export function withProvenance<T extends Record<string, unknown>>(
86
+ metadata: T,
87
+ provenance: KnowledgeProvenance | GeneratedArtifactProvenance,
88
+ ): T & { provenance: KnowledgeProvenance | GeneratedArtifactProvenance } {
89
+ return {
90
+ ...metadata,
91
+ provenance,
92
+ };
93
+ }
package/src/service.ts CHANGED
@@ -1,4 +1,11 @@
1
1
  import { createArtifactStore } from './artifact-store';
2
+ import {
3
+ embeddingIndexStatus,
4
+ indexKnowledgeEmbeddings,
5
+ searchVectorIndex,
6
+ type EmbeddingIndexOptions,
7
+ type EmbeddingSearchOptions,
8
+ } from './embeddings';
2
9
  import { consumeOpenFilesOutbox } from './outbox-consume';
3
10
  import { getKnowledgeDbStats, migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
4
11
  import { ingestOpenFilesManifest } from './manifest-ingest';
@@ -13,7 +20,7 @@ import {
13
20
  type StorageContract,
14
21
  type StorageValidationResult,
15
22
  } from './storage-contract';
16
- import { initializeWikiLayout } from './wiki-layout';
23
+ import { initializeWikiLayout, recordWikiLayoutCatalog } from './wiki-layout';
17
24
  import {
18
25
  ensureKnowledgeWorkspace,
19
26
  readKnowledgeConfig,
@@ -128,6 +135,7 @@ export class KnowledgeService {
128
135
  const db = openKnowledgeDb(workspace.knowledgeDbPath);
129
136
  try {
130
137
  recordStorageObjects(db, result.artifacts);
138
+ recordWikiLayoutCatalog(db, result.artifacts);
131
139
  } finally {
132
140
  db.close();
133
141
  }
@@ -183,6 +191,29 @@ export class KnowledgeService {
183
191
  modelRegistry(): ModelRegistryEntry[] {
184
192
  return listModelRegistry(this.config());
185
193
  }
194
+
195
+ embeddingStatus() {
196
+ const workspace = this.ensureWorkspace();
197
+ return embeddingIndexStatus(workspace.knowledgeDbPath);
198
+ }
199
+
200
+ async indexEmbeddings(options: Omit<EmbeddingIndexOptions, 'dbPath' | 'config'> = {}) {
201
+ const workspace = this.ensureWorkspace();
202
+ return indexKnowledgeEmbeddings({
203
+ ...options,
204
+ dbPath: workspace.knowledgeDbPath,
205
+ config: this.config(),
206
+ });
207
+ }
208
+
209
+ async semanticSearch(options: Omit<EmbeddingSearchOptions, 'dbPath' | 'config'>) {
210
+ const workspace = this.ensureWorkspace();
211
+ return searchVectorIndex({
212
+ ...options,
213
+ dbPath: workspace.knowledgeDbPath,
214
+ config: this.config(),
215
+ });
216
+ }
186
217
  }
187
218
 
188
219
  export function createKnowledgeService(options: KnowledgeServiceOptions = {}): KnowledgeService {
@@ -1,5 +1,6 @@
1
1
  import type { Database } from 'bun:sqlite';
2
2
  import { migrateKnowledgeDb, openKnowledgeDb } from './knowledge-db';
3
+ import { sourceProvenance, type KnowledgeProvenance } from './provenance';
3
4
  import { catalogSourceUriForRef, parseSourceRef, revisionIdForSourceRef } from './source-ref';
4
5
  import { assertWriteAllowed, recordAuditEvent, type SafetyPolicy } from './safety';
5
6
 
@@ -38,6 +39,7 @@ export interface ResolvedSourceChunk {
38
39
  end_offset: number | null;
39
40
  metadata: Record<string, unknown>;
40
41
  evidence: SourceResolverEvidence;
42
+ provenance: KnowledgeProvenance;
41
43
  }
42
44
 
43
45
  export interface ResolvedSourceCitation {
@@ -48,6 +50,7 @@ export interface ResolvedSourceCitation {
48
50
  start_offset: number | null;
49
51
  end_offset: number | null;
50
52
  evidence: SourceResolverEvidence;
53
+ provenance: KnowledgeProvenance;
51
54
  }
52
55
 
53
56
  export interface SourceResolveResult {
@@ -326,6 +329,19 @@ export async function resolveOpenFilesSource(options: SourceResolveOptions): Pro
326
329
  end_offset: row.end_offset,
327
330
  resolved_at: resolvedAt,
328
331
  };
332
+ const provenance = sourceProvenance({
333
+ source_ref: evidence.source_ref,
334
+ source_uri: evidence.source_uri,
335
+ source_kind: source.kind,
336
+ source_revision_id: evidence.source_revision_id,
337
+ revision: evidence.revision,
338
+ hash: evidence.hash,
339
+ chunk_id: row.id,
340
+ start_offset: row.start_offset,
341
+ end_offset: row.end_offset,
342
+ status: metadataString(metadata, ['status']),
343
+ resolver: evidence.resolver,
344
+ });
329
345
  return {
330
346
  id: row.id,
331
347
  kind: row.kind,
@@ -336,6 +352,7 @@ export async function resolveOpenFilesSource(options: SourceResolveOptions): Pro
336
352
  end_offset: row.end_offset,
337
353
  metadata,
338
354
  evidence,
355
+ provenance,
339
356
  };
340
357
  });
341
358
 
@@ -347,6 +364,7 @@ export async function resolveOpenFilesSource(options: SourceResolveOptions): Pro
347
364
  start_offset: chunk.start_offset,
348
365
  end_offset: chunk.end_offset,
349
366
  evidence: chunk.evidence,
367
+ provenance: chunk.provenance,
350
368
  }));
351
369
 
352
370
  recordAuditEvent(db, {
@@ -1,4 +1,7 @@
1
+ import { createHash } from 'node:crypto';
2
+ import type { Database } from 'bun:sqlite';
1
3
  import type { ArtifactStore } from './artifact-store';
4
+ import { generatedArtifactProvenance, type GeneratedArtifactProvenance } from './provenance';
2
5
  import {
3
6
  artifactKindForKey,
4
7
  hashArtifactBody,
@@ -14,6 +17,13 @@ export interface WikiLayoutInitResult {
14
17
  written: string[];
15
18
  }
16
19
 
20
+ interface CatalogArtifact {
21
+ key: string;
22
+ uri: string;
23
+ hash?: string;
24
+ metadata?: Record<string, unknown>;
25
+ }
26
+
17
27
  function todayParts(now: Date): { year: string; month: string; day: string } {
18
28
  const year = String(now.getUTCFullYear());
19
29
  const month = String(now.getUTCMonth() + 1).padStart(2, '0');
@@ -21,6 +31,10 @@ function todayParts(now: Date): { year: string; month: string; day: string } {
21
31
  return { year, month, day };
22
32
  }
23
33
 
34
+ function stableId(prefix: string, value: string): string {
35
+ return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
36
+ }
37
+
24
38
  export function agentSchemaTemplate(): string {
25
39
  return `# Knowledge Agent Schema v1
26
40
 
@@ -106,6 +120,13 @@ export async function initializeWikiLayout(store: ArtifactStore, now = new Date(
106
120
  uri: result.uri,
107
121
  kind: artifactKindForKey(entry.key),
108
122
  content_type: entry.content_type,
123
+ metadata: {
124
+ provenance: generatedArtifactProvenance({
125
+ generated_from: 'wiki_layout_init',
126
+ artifact_key: entry.key,
127
+ citation_required: entry.key.startsWith('wiki/') || entry.key.startsWith('indexes/'),
128
+ }),
129
+ },
109
130
  ...hashArtifactBody(entry.body),
110
131
  };
111
132
  }));
@@ -118,3 +139,73 @@ export async function initializeWikiLayout(store: ArtifactStore, now = new Date(
118
139
  written: [schemaKey, rootIndexKey, wikiReadmeKey, logKey],
119
140
  };
120
141
  }
142
+
143
+ function provenanceFor(artifact: CatalogArtifact): GeneratedArtifactProvenance {
144
+ const existing = artifact.metadata?.provenance;
145
+ if (existing && typeof existing === 'object' && !Array.isArray(existing)) {
146
+ return existing as GeneratedArtifactProvenance;
147
+ }
148
+ return generatedArtifactProvenance({
149
+ generated_from: 'wiki_layout_init',
150
+ artifact_key: artifact.key,
151
+ });
152
+ }
153
+
154
+ export function recordWikiLayoutCatalog(db: Database, artifacts: CatalogArtifact[], now = new Date()): void {
155
+ const timestamp = now.toISOString();
156
+ const rootIndex = artifacts.find((artifact) => artifact.key.endsWith('indexes/root.md'));
157
+ const wikiReadme = artifacts.find((artifact) => artifact.key.endsWith('wiki/README.md'));
158
+
159
+ if (rootIndex) {
160
+ db.run(
161
+ `INSERT INTO knowledge_indexes (id, kind, name, artifact_uri, shard_key, metadata_json, created_at, updated_at)
162
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
163
+ ON CONFLICT(kind, name, shard_key) DO UPDATE SET
164
+ artifact_uri = excluded.artifact_uri,
165
+ metadata_json = excluded.metadata_json,
166
+ updated_at = excluded.updated_at`,
167
+ [
168
+ stableId('idx', 'root:indexes/root.md'),
169
+ 'root',
170
+ 'root',
171
+ rootIndex.uri,
172
+ 'root',
173
+ JSON.stringify({
174
+ artifact_key: rootIndex.key,
175
+ content_hash: rootIndex.hash ?? null,
176
+ provenance: provenanceFor(rootIndex),
177
+ }),
178
+ timestamp,
179
+ timestamp,
180
+ ],
181
+ );
182
+ }
183
+
184
+ if (wikiReadme) {
185
+ db.run(
186
+ `INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
187
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
188
+ ON CONFLICT(path) DO UPDATE SET
189
+ title = excluded.title,
190
+ artifact_uri = excluded.artifact_uri,
191
+ content_hash = excluded.content_hash,
192
+ status = excluded.status,
193
+ metadata_json = excluded.metadata_json,
194
+ updated_at = excluded.updated_at`,
195
+ [
196
+ stableId('wiki', 'wiki/README.md'),
197
+ 'wiki/README.md',
198
+ 'Wiki',
199
+ wikiReadme.uri,
200
+ wikiReadme.hash ?? null,
201
+ 'active',
202
+ JSON.stringify({
203
+ artifact_key: wikiReadme.key,
204
+ provenance: provenanceFor(wikiReadme),
205
+ }),
206
+ timestamp,
207
+ timestamp,
208
+ ],
209
+ );
210
+ }
211
+ }
package/src/workspace.ts CHANGED
@@ -39,6 +39,12 @@ export interface KnowledgeConfig {
39
39
  preferred_ref: 'open-files';
40
40
  allowed_schemes: string[];
41
41
  };
42
+ embeddings?: {
43
+ default_model?: string;
44
+ dimensions?: number;
45
+ batch_size?: number;
46
+ max_parallel_calls?: number;
47
+ };
42
48
  providers?: {
43
49
  default_model?: string;
44
50
  aliases?: Record<string, string>;
@@ -136,6 +142,12 @@ export function defaultKnowledgeConfig(): KnowledgeConfig {
136
142
  default_model: 'deepseek-chat',
137
143
  },
138
144
  },
145
+ embeddings: {
146
+ default_model: 'openai:text-embedding-3-small',
147
+ dimensions: 1536,
148
+ batch_size: 64,
149
+ max_parallel_calls: 4,
150
+ },
139
151
  safety: {
140
152
  network: {
141
153
  web_search_enabled: false,