@hasna/knowledge 0.2.11 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -2
- package/bin/open-knowledge-mcp.js +370 -14
- package/bin/open-knowledge.js +57 -31
- package/docs/architecture/ai-native-knowledge-base.md +27 -0
- package/package.json +1 -1
- package/src/cli.ts +31 -4
- package/src/knowledge-db.ts +3 -0
- package/src/manifest-ingest.ts +19 -2
- package/src/mcp.js +12 -0
- package/src/provenance.ts +93 -0
- package/src/service.ts +28 -3
- package/src/source-resolver.ts +18 -0
- package/src/storage-contract.ts +265 -0
- package/src/wiki-layout.ts +113 -6
|
@@ -0,0 +1,265 @@
|
|
|
1
|
+
import { createHash, randomUUID } from 'node:crypto';
|
|
2
|
+
import type { Database } from 'bun:sqlite';
|
|
3
|
+
import type { KnowledgeConfig, KnowledgeWorkspace } from './workspace';
|
|
4
|
+
import { HASNA_KNOWLEDGE_APP_PATH } from './workspace';
|
|
5
|
+
|
|
6
|
+
export interface StorageArtifactClass {
|
|
7
|
+
kind: string;
|
|
8
|
+
prefix: string;
|
|
9
|
+
description: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface StorageContract {
|
|
13
|
+
scope: string;
|
|
14
|
+
mode: KnowledgeConfig['mode'];
|
|
15
|
+
storage_type: KnowledgeConfig['storage']['type'];
|
|
16
|
+
workspace_home: string;
|
|
17
|
+
local_layout: {
|
|
18
|
+
app_path: string;
|
|
19
|
+
config_path: string;
|
|
20
|
+
json_store_path: string;
|
|
21
|
+
knowledge_db_path: string;
|
|
22
|
+
directories: Record<string, string>;
|
|
23
|
+
};
|
|
24
|
+
artifact_store: {
|
|
25
|
+
type: KnowledgeConfig['storage']['type'];
|
|
26
|
+
artifacts_root: string;
|
|
27
|
+
uri_prefix: string;
|
|
28
|
+
s3: {
|
|
29
|
+
bucket: string;
|
|
30
|
+
prefix: string;
|
|
31
|
+
region: string | null;
|
|
32
|
+
profile: string | null;
|
|
33
|
+
server_side_encryption: string | null;
|
|
34
|
+
kms_key_configured: boolean;
|
|
35
|
+
} | null;
|
|
36
|
+
};
|
|
37
|
+
source_ownership: {
|
|
38
|
+
owner: 'open-files';
|
|
39
|
+
preferred_ref: string;
|
|
40
|
+
allowed_schemes: string[];
|
|
41
|
+
raw_source_bytes_stored_in_open_knowledge: false;
|
|
42
|
+
stores: string[];
|
|
43
|
+
does_not_store: string[];
|
|
44
|
+
};
|
|
45
|
+
generated_artifacts: StorageArtifactClass[];
|
|
46
|
+
scalability: {
|
|
47
|
+
catalog: string;
|
|
48
|
+
indexes: string;
|
|
49
|
+
logs: string;
|
|
50
|
+
markdown: string;
|
|
51
|
+
};
|
|
52
|
+
warnings: string[];
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export interface StorageValidationResult {
|
|
56
|
+
ok: boolean;
|
|
57
|
+
errors: string[];
|
|
58
|
+
warnings: string[];
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export interface GeneratedStorageObject {
|
|
62
|
+
uri: string;
|
|
63
|
+
key: string;
|
|
64
|
+
kind: string;
|
|
65
|
+
content_type?: string;
|
|
66
|
+
hash?: string;
|
|
67
|
+
size_bytes?: number;
|
|
68
|
+
metadata?: Record<string, unknown>;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const GENERATED_ARTIFACTS: StorageArtifactClass[] = [
|
|
72
|
+
{
|
|
73
|
+
kind: 'schema',
|
|
74
|
+
prefix: 'schemas/',
|
|
75
|
+
description: 'Machine-readable agent schemas and source rules.',
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
kind: 'index',
|
|
79
|
+
prefix: 'indexes/',
|
|
80
|
+
description: 'Small orientation indexes and future shard manifests.',
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
kind: 'log',
|
|
84
|
+
prefix: 'logs/',
|
|
85
|
+
description: 'Append-only JSONL run and wiki-maintenance log partitions.',
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
kind: 'run',
|
|
89
|
+
prefix: 'runs/',
|
|
90
|
+
description: 'Prompt/tool/cost ledgers and generated output records.',
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
kind: 'wiki_page',
|
|
94
|
+
prefix: 'wiki/',
|
|
95
|
+
description: 'Generated cited Markdown pages, not raw source files.',
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
kind: 'export',
|
|
99
|
+
prefix: 'exports/',
|
|
100
|
+
description: 'Portable exports and snapshots of derived knowledge state.',
|
|
101
|
+
},
|
|
102
|
+
];
|
|
103
|
+
|
|
104
|
+
export function hashArtifactBody(body: string | Uint8Array): { hash: string; size_bytes: number } {
|
|
105
|
+
const bytes = typeof body === 'string' ? Buffer.from(body) : Buffer.from(body);
|
|
106
|
+
return {
|
|
107
|
+
hash: `sha256:${createHash('sha256').update(bytes).digest('hex')}`,
|
|
108
|
+
size_bytes: bytes.byteLength,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
export function artifactKindForKey(key: string): string {
|
|
113
|
+
const match = GENERATED_ARTIFACTS.find((entry) => key.startsWith(entry.prefix));
|
|
114
|
+
return match?.kind ?? 'artifact';
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export function resolveStorageContract(
|
|
118
|
+
config: KnowledgeConfig,
|
|
119
|
+
workspace: KnowledgeWorkspace,
|
|
120
|
+
scope = 'global',
|
|
121
|
+
): StorageContract {
|
|
122
|
+
const validation = validateStorageConfig(config, workspace);
|
|
123
|
+
const s3 = config.storage.s3 ?? null;
|
|
124
|
+
const prefix = s3?.prefix?.replace(/^\/+|\/+$/g, '') ?? '';
|
|
125
|
+
const s3UriPrefix = s3 ? `s3://${s3.bucket}/${prefix ? `${prefix}/` : ''}` : '';
|
|
126
|
+
|
|
127
|
+
return {
|
|
128
|
+
scope,
|
|
129
|
+
mode: config.mode,
|
|
130
|
+
storage_type: config.storage.type,
|
|
131
|
+
workspace_home: workspace.home,
|
|
132
|
+
local_layout: {
|
|
133
|
+
app_path: HASNA_KNOWLEDGE_APP_PATH,
|
|
134
|
+
config_path: workspace.configPath,
|
|
135
|
+
json_store_path: workspace.jsonStorePath,
|
|
136
|
+
knowledge_db_path: workspace.knowledgeDbPath,
|
|
137
|
+
directories: {
|
|
138
|
+
artifacts: workspace.artifactsDir,
|
|
139
|
+
cache: workspace.cacheDir,
|
|
140
|
+
exports: workspace.exportsDir,
|
|
141
|
+
indexes: workspace.indexesDir,
|
|
142
|
+
logs: workspace.logsDir,
|
|
143
|
+
runs: workspace.runsDir,
|
|
144
|
+
schemas: workspace.schemasDir,
|
|
145
|
+
wiki: workspace.wikiDir,
|
|
146
|
+
},
|
|
147
|
+
},
|
|
148
|
+
artifact_store: {
|
|
149
|
+
type: config.storage.type,
|
|
150
|
+
artifacts_root: config.storage.artifacts_root,
|
|
151
|
+
uri_prefix: config.storage.type === 's3' ? s3UriPrefix : `file://${workspace.artifactsDir}/`,
|
|
152
|
+
s3: s3
|
|
153
|
+
? {
|
|
154
|
+
bucket: s3.bucket,
|
|
155
|
+
prefix,
|
|
156
|
+
region: s3.region ?? null,
|
|
157
|
+
profile: s3.profile ?? null,
|
|
158
|
+
server_side_encryption: s3.server_side_encryption ?? null,
|
|
159
|
+
kms_key_configured: Boolean(s3.kms_key_id),
|
|
160
|
+
}
|
|
161
|
+
: null,
|
|
162
|
+
},
|
|
163
|
+
source_ownership: {
|
|
164
|
+
owner: 'open-files',
|
|
165
|
+
preferred_ref: config.sources.preferred_ref,
|
|
166
|
+
allowed_schemes: config.sources.allowed_schemes,
|
|
167
|
+
raw_source_bytes_stored_in_open_knowledge: false,
|
|
168
|
+
stores: [
|
|
169
|
+
'source refs',
|
|
170
|
+
'source revisions and hashes',
|
|
171
|
+
'citation spans',
|
|
172
|
+
'redacted extracted chunks',
|
|
173
|
+
'embeddings',
|
|
174
|
+
'generated wiki artifacts',
|
|
175
|
+
'indexes',
|
|
176
|
+
'run ledgers',
|
|
177
|
+
],
|
|
178
|
+
does_not_store: [
|
|
179
|
+
'raw open-files bytes',
|
|
180
|
+
'S3 object credentials',
|
|
181
|
+
'connector secrets',
|
|
182
|
+
'hosted tenant ownership state',
|
|
183
|
+
],
|
|
184
|
+
},
|
|
185
|
+
generated_artifacts: GENERATED_ARTIFACTS,
|
|
186
|
+
scalability: {
|
|
187
|
+
catalog: 'knowledge.db tracks sources, revisions, chunks, citations, indexes, runs, and storage_objects.',
|
|
188
|
+
indexes: 'Indexes are cataloged DB rows plus sharded artifacts, not one giant index.md.',
|
|
189
|
+
logs: 'Logs use dated JSONL partitions under logs/yyyy/mm/dd.jsonl.',
|
|
190
|
+
markdown: 'Markdown pages are the readable wiki layer over DB/object-store state.',
|
|
191
|
+
},
|
|
192
|
+
warnings: validation.warnings,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
export function validateStorageConfig(config: KnowledgeConfig, workspace: KnowledgeWorkspace): StorageValidationResult {
|
|
197
|
+
const errors: string[] = [];
|
|
198
|
+
const warnings: string[] = [];
|
|
199
|
+
|
|
200
|
+
if (!workspace.home.endsWith(HASNA_KNOWLEDGE_APP_PATH)) {
|
|
201
|
+
warnings.push(`Workspace home does not end with ${HASNA_KNOWLEDGE_APP_PATH}: ${workspace.home}`);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
if (config.storage.type === 's3') {
|
|
205
|
+
if (!config.storage.s3?.bucket) errors.push('storage.s3.bucket is required when storage.type is s3.');
|
|
206
|
+
if (!config.storage.s3?.prefix) warnings.push('storage.s3.prefix is empty; generated knowledge artifacts will be written at the bucket root.');
|
|
207
|
+
if (config.mode === 'local') warnings.push('storage.type is s3 while mode is local; this is valid for BYO S3, but hosted wrappers should set mode to hosted.');
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
if (config.storage.type === 'local' && config.storage.s3) {
|
|
211
|
+
warnings.push('storage.s3 is configured but ignored while storage.type is local.');
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
if (config.sources.preferred_ref !== 'open-files') {
|
|
215
|
+
warnings.push('sources.preferred_ref should stay open-files for durable company knowledge.');
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
if (!config.sources.allowed_schemes.includes('open-files')) {
|
|
219
|
+
errors.push('sources.allowed_schemes must include open-files.');
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return {
|
|
223
|
+
ok: errors.length === 0,
|
|
224
|
+
errors,
|
|
225
|
+
warnings,
|
|
226
|
+
};
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
export function recordStorageObjects(db: Database, objects: GeneratedStorageObject[], now = new Date()): void {
|
|
230
|
+
const timestamp = now.toISOString();
|
|
231
|
+
const statement = db.prepare(`
|
|
232
|
+
INSERT INTO storage_objects (
|
|
233
|
+
id, artifact_uri, kind, content_type, hash, size_bytes, metadata_json, created_at, updated_at
|
|
234
|
+
)
|
|
235
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
236
|
+
ON CONFLICT(artifact_uri) DO UPDATE SET
|
|
237
|
+
kind = excluded.kind,
|
|
238
|
+
content_type = excluded.content_type,
|
|
239
|
+
hash = excluded.hash,
|
|
240
|
+
size_bytes = excluded.size_bytes,
|
|
241
|
+
metadata_json = excluded.metadata_json,
|
|
242
|
+
updated_at = excluded.updated_at
|
|
243
|
+
`);
|
|
244
|
+
|
|
245
|
+
const insert = db.transaction((entries: GeneratedStorageObject[]) => {
|
|
246
|
+
for (const entry of entries) {
|
|
247
|
+
statement.run(
|
|
248
|
+
randomUUID(),
|
|
249
|
+
entry.uri,
|
|
250
|
+
entry.kind,
|
|
251
|
+
entry.content_type ?? null,
|
|
252
|
+
entry.hash ?? null,
|
|
253
|
+
entry.size_bytes ?? null,
|
|
254
|
+
JSON.stringify({
|
|
255
|
+
key: entry.key,
|
|
256
|
+
...(entry.metadata ?? {}),
|
|
257
|
+
}),
|
|
258
|
+
timestamp,
|
|
259
|
+
timestamp,
|
|
260
|
+
);
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
|
|
264
|
+
insert(objects);
|
|
265
|
+
}
|
package/src/wiki-layout.ts
CHANGED
|
@@ -1,13 +1,29 @@
|
|
|
1
|
+
import { createHash } from 'node:crypto';
|
|
2
|
+
import type { Database } from 'bun:sqlite';
|
|
1
3
|
import type { ArtifactStore } from './artifact-store';
|
|
4
|
+
import { generatedArtifactProvenance, type GeneratedArtifactProvenance } from './provenance';
|
|
5
|
+
import {
|
|
6
|
+
artifactKindForKey,
|
|
7
|
+
hashArtifactBody,
|
|
8
|
+
type GeneratedStorageObject,
|
|
9
|
+
} from './storage-contract';
|
|
2
10
|
|
|
3
11
|
export interface WikiLayoutInitResult {
|
|
4
12
|
schema_key: string;
|
|
5
13
|
root_index_key: string;
|
|
6
14
|
wiki_readme_key: string;
|
|
7
15
|
log_key: string;
|
|
16
|
+
artifacts: GeneratedStorageObject[];
|
|
8
17
|
written: string[];
|
|
9
18
|
}
|
|
10
19
|
|
|
20
|
+
interface CatalogArtifact {
|
|
21
|
+
key: string;
|
|
22
|
+
uri: string;
|
|
23
|
+
hash?: string;
|
|
24
|
+
metadata?: Record<string, unknown>;
|
|
25
|
+
}
|
|
26
|
+
|
|
11
27
|
function todayParts(now: Date): { year: string; month: string; day: string } {
|
|
12
28
|
const year = String(now.getUTCFullYear());
|
|
13
29
|
const month = String(now.getUTCMonth() + 1).padStart(2, '0');
|
|
@@ -15,6 +31,10 @@ function todayParts(now: Date): { year: string; month: string; day: string } {
|
|
|
15
31
|
return { year, month, day };
|
|
16
32
|
}
|
|
17
33
|
|
|
34
|
+
function stableId(prefix: string, value: string): string {
|
|
35
|
+
return `${prefix}_${createHash('sha256').update(value).digest('hex').slice(0, 20)}`;
|
|
36
|
+
}
|
|
37
|
+
|
|
18
38
|
export function agentSchemaTemplate(): string {
|
|
19
39
|
return `# Knowledge Agent Schema v1
|
|
20
40
|
|
|
@@ -86,19 +106,106 @@ export async function initializeWikiLayout(store: ArtifactStore, now = new Date(
|
|
|
86
106
|
wiki_readme_key: wikiReadmeKey,
|
|
87
107
|
};
|
|
88
108
|
|
|
89
|
-
const
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
109
|
+
const entries = [
|
|
110
|
+
{ key: schemaKey, body: agentSchemaTemplate(), content_type: 'text/markdown' },
|
|
111
|
+
{ key: rootIndexKey, body: rootIndexTemplate(), content_type: 'text/markdown' },
|
|
112
|
+
{ key: wikiReadmeKey, body: wikiReadmeTemplate(), content_type: 'text/markdown' },
|
|
113
|
+
{ key: logKey, body: `${JSON.stringify(event)}\n`, content_type: 'application/x-ndjson' },
|
|
94
114
|
];
|
|
95
115
|
|
|
96
|
-
await Promise.all(
|
|
116
|
+
const artifacts = await Promise.all(entries.map(async (entry) => {
|
|
117
|
+
const result = await store.put(entry);
|
|
118
|
+
return {
|
|
119
|
+
key: result.key,
|
|
120
|
+
uri: result.uri,
|
|
121
|
+
kind: artifactKindForKey(entry.key),
|
|
122
|
+
content_type: entry.content_type,
|
|
123
|
+
metadata: {
|
|
124
|
+
provenance: generatedArtifactProvenance({
|
|
125
|
+
generated_from: 'wiki_layout_init',
|
|
126
|
+
artifact_key: entry.key,
|
|
127
|
+
citation_required: entry.key.startsWith('wiki/') || entry.key.startsWith('indexes/'),
|
|
128
|
+
}),
|
|
129
|
+
},
|
|
130
|
+
...hashArtifactBody(entry.body),
|
|
131
|
+
};
|
|
132
|
+
}));
|
|
97
133
|
return {
|
|
98
134
|
schema_key: schemaKey,
|
|
99
135
|
root_index_key: rootIndexKey,
|
|
100
136
|
wiki_readme_key: wikiReadmeKey,
|
|
101
137
|
log_key: logKey,
|
|
138
|
+
artifacts,
|
|
102
139
|
written: [schemaKey, rootIndexKey, wikiReadmeKey, logKey],
|
|
103
140
|
};
|
|
104
141
|
}
|
|
142
|
+
|
|
143
|
+
function provenanceFor(artifact: CatalogArtifact): GeneratedArtifactProvenance {
|
|
144
|
+
const existing = artifact.metadata?.provenance;
|
|
145
|
+
if (existing && typeof existing === 'object' && !Array.isArray(existing)) {
|
|
146
|
+
return existing as GeneratedArtifactProvenance;
|
|
147
|
+
}
|
|
148
|
+
return generatedArtifactProvenance({
|
|
149
|
+
generated_from: 'wiki_layout_init',
|
|
150
|
+
artifact_key: artifact.key,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export function recordWikiLayoutCatalog(db: Database, artifacts: CatalogArtifact[], now = new Date()): void {
|
|
155
|
+
const timestamp = now.toISOString();
|
|
156
|
+
const rootIndex = artifacts.find((artifact) => artifact.key.endsWith('indexes/root.md'));
|
|
157
|
+
const wikiReadme = artifacts.find((artifact) => artifact.key.endsWith('wiki/README.md'));
|
|
158
|
+
|
|
159
|
+
if (rootIndex) {
|
|
160
|
+
db.run(
|
|
161
|
+
`INSERT INTO knowledge_indexes (id, kind, name, artifact_uri, shard_key, metadata_json, created_at, updated_at)
|
|
162
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
163
|
+
ON CONFLICT(kind, name, shard_key) DO UPDATE SET
|
|
164
|
+
artifact_uri = excluded.artifact_uri,
|
|
165
|
+
metadata_json = excluded.metadata_json,
|
|
166
|
+
updated_at = excluded.updated_at`,
|
|
167
|
+
[
|
|
168
|
+
stableId('idx', 'root:indexes/root.md'),
|
|
169
|
+
'root',
|
|
170
|
+
'root',
|
|
171
|
+
rootIndex.uri,
|
|
172
|
+
'root',
|
|
173
|
+
JSON.stringify({
|
|
174
|
+
artifact_key: rootIndex.key,
|
|
175
|
+
content_hash: rootIndex.hash ?? null,
|
|
176
|
+
provenance: provenanceFor(rootIndex),
|
|
177
|
+
}),
|
|
178
|
+
timestamp,
|
|
179
|
+
timestamp,
|
|
180
|
+
],
|
|
181
|
+
);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
if (wikiReadme) {
|
|
185
|
+
db.run(
|
|
186
|
+
`INSERT INTO wiki_pages (id, path, title, artifact_uri, content_hash, status, metadata_json, created_at, updated_at)
|
|
187
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
188
|
+
ON CONFLICT(path) DO UPDATE SET
|
|
189
|
+
title = excluded.title,
|
|
190
|
+
artifact_uri = excluded.artifact_uri,
|
|
191
|
+
content_hash = excluded.content_hash,
|
|
192
|
+
status = excluded.status,
|
|
193
|
+
metadata_json = excluded.metadata_json,
|
|
194
|
+
updated_at = excluded.updated_at`,
|
|
195
|
+
[
|
|
196
|
+
stableId('wiki', 'wiki/README.md'),
|
|
197
|
+
'wiki/README.md',
|
|
198
|
+
'Wiki',
|
|
199
|
+
wikiReadme.uri,
|
|
200
|
+
wikiReadme.hash ?? null,
|
|
201
|
+
'active',
|
|
202
|
+
JSON.stringify({
|
|
203
|
+
artifact_key: wikiReadme.key,
|
|
204
|
+
provenance: provenanceFor(wikiReadme),
|
|
205
|
+
}),
|
|
206
|
+
timestamp,
|
|
207
|
+
timestamp,
|
|
208
|
+
],
|
|
209
|
+
);
|
|
210
|
+
}
|
|
211
|
+
}
|