llm-wiki-kit 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,263 @@
1
+ import { basename, join, posix, relative } from 'path';
2
+ import { exists, listMarkdownFiles, readText } from './fs-utils.js';
3
+ import { normalizeForStorage } from './redaction.js';
4
+
5
+ export const MEMORY_LINE_LIMIT = 200;
6
+ export const MEMORY_BYTE_LIMIT = 25 * 1024;
7
+ export const DEFAULT_MAX_WIKI_FILES = 500;
8
+ export const DEFAULT_MAX_PAGE_CHARS = 50000;
9
+
10
+ export function wikiRoot(projectRoot) {
11
+ return join(projectRoot, 'llm-wiki', 'wiki');
12
+ }
13
+
14
+ export function wikiRel(projectRoot, file) {
15
+ return relative(join(projectRoot, 'llm-wiki'), file).split('\\').join('/');
16
+ }
17
+
18
+ function stripQuotes(value) {
19
+ return String(value || '').replace(/^["']|["']$/g, '');
20
+ }
21
+
22
+ function parseScalar(value) {
23
+ const trimmed = String(value || '').trim();
24
+ if (trimmed === '[]') return [];
25
+ if (/^\[.*\]$/.test(trimmed)) {
26
+ const inner = trimmed.slice(1, -1).trim();
27
+ if (!inner) return [];
28
+ return inner.split(',').map((item) => stripQuotes(item.trim())).filter(Boolean);
29
+ }
30
+ return stripQuotes(trimmed);
31
+ }
32
+
33
+ export function parseFrontmatter(content) {
34
+ const text = normalizeForStorage(content || '');
35
+ const match = text.match(/^---\n([\s\S]*?)\n---\n?/);
36
+ if (!match) return { data: {}, body: text, hasFrontmatter: false };
37
+
38
+ const data = {};
39
+ const lines = match[1].split('\n');
40
+ let currentArrayKey = null;
41
+ for (const line of lines) {
42
+ const arrayItem = line.match(/^\s*-\s+(.+)\s*$/);
43
+ if (arrayItem && currentArrayKey) {
44
+ data[currentArrayKey].push(stripQuotes(arrayItem[1].trim()));
45
+ continue;
46
+ }
47
+
48
+ const keyValue = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/);
49
+ if (!keyValue) {
50
+ currentArrayKey = null;
51
+ continue;
52
+ }
53
+
54
+ const [, key, value] = keyValue;
55
+ if (value.trim() === '') {
56
+ data[key] = [];
57
+ currentArrayKey = key;
58
+ } else {
59
+ data[key] = parseScalar(value);
60
+ currentArrayKey = Array.isArray(data[key]) ? key : null;
61
+ }
62
+ }
63
+
64
+ return {
65
+ data,
66
+ body: text.slice(match[0].length),
67
+ hasFrontmatter: true,
68
+ };
69
+ }
70
+
71
+ export function firstHeading(body) {
72
+ return (body.match(/^#\s+(.+)$/m)?.[1] || '').trim();
73
+ }
74
+
75
+ export function normalizeTarget(value) {
76
+ return normalizeForStorage(value || '')
77
+ .split('|')[0]
78
+ .split('#')[0]
79
+ .trim()
80
+ .replace(/\\/g, '/')
81
+ .replace(/^\.?\//, '')
82
+ .replace(/\.md$/i, '')
83
+ .toLowerCase();
84
+ }
85
+
86
+ export function stripMarkdownCode(content) {
87
+ return normalizeForStorage(content || '')
88
+ .replace(/(^|\n)(`{3,}|~{3,})[^\n]*\n[\s\S]*?\n\2[ \t]*(?=\n|$)/g, '$1')
89
+ .replace(/`+[^`\n]*`+/g, '');
90
+ }
91
+
92
+ export function extractWikilinks(content) {
93
+ const links = [];
94
+ const regex = /\[\[([^\]]+)\]\]/g;
95
+ const searchable = stripMarkdownCode(content);
96
+ let match = regex.exec(searchable);
97
+ while (match) {
98
+ const raw = match[1].trim();
99
+ const target = normalizeTarget(raw);
100
+ if (target) links.push({ raw, target });
101
+ match = regex.exec(searchable);
102
+ }
103
+ return links;
104
+ }
105
+
106
+ export function extractMarkdownLinks(content) {
107
+ const links = [];
108
+ const regex = /(?<!!)\[[^\]]+\]\(([^)]+)\)/g;
109
+ const searchable = stripMarkdownCode(content);
110
+ let match = regex.exec(searchable);
111
+ while (match) {
112
+ const raw = match[1].trim();
113
+ const href = raw.replace(/^<|>$/g, '').split(/\s+/)[0];
114
+ if (!href || href.startsWith('#') || href.startsWith('/') || /^[a-z][a-z0-9+.-]*:/i.test(href)) {
115
+ match = regex.exec(searchable);
116
+ continue;
117
+ }
118
+ const path = href.split('#')[0].split('?')[0].replace(/\\/g, '/');
119
+ if (path) links.push({ raw, path });
120
+ match = regex.exec(searchable);
121
+ }
122
+ return links;
123
+ }
124
+
125
+ export function parseWikiPage(projectRoot, file, content) {
126
+ const rel = wikiRel(projectRoot, file);
127
+ const { data, body, hasFrontmatter } = parseFrontmatter(content);
128
+ const title = String(data.title || firstHeading(body) || basename(file, '.md')).trim();
129
+ const stem = basename(file, '.md');
130
+ const relNoExt = rel.replace(/\.md$/i, '');
131
+ const withoutWiki = rel.replace(/^wiki\//, '');
132
+ const withoutWikiNoExt = withoutWiki.replace(/\.md$/i, '');
133
+ const aliases = [
134
+ stem,
135
+ title,
136
+ rel,
137
+ relNoExt,
138
+ withoutWiki,
139
+ withoutWikiNoExt,
140
+ ].map(normalizeTarget).filter(Boolean);
141
+
142
+ return {
143
+ absolutePath: file,
144
+ rel,
145
+ title,
146
+ stem,
147
+ frontmatter: data,
148
+ hasFrontmatter,
149
+ body,
150
+ content: normalizeForStorage(content || ''),
151
+ type: data.type || '',
152
+ status: data.status || '',
153
+ confidence: data.confidence || '',
154
+ memoryType: data.memory_type || '',
155
+ aliases: [...new Set(aliases)],
156
+ wikilinks: extractWikilinks(content),
157
+ markdownLinks: extractMarkdownLinks(content),
158
+ sourceIds: Array.isArray(data.source_ids) ? data.source_ids : [],
159
+ };
160
+ }
161
+
162
+ export async function collectWikiPages(projectRoot, options = {}) {
163
+ const root = wikiRoot(projectRoot);
164
+ if (!(await exists(root))) {
165
+ throw new Error(`llm-wiki wiki directory not found: ${root}`);
166
+ }
167
+ const files = (await listMarkdownFiles(root, options.maxFiles || DEFAULT_MAX_WIKI_FILES))
168
+ .sort();
169
+ const maxChars = options.maxChars || DEFAULT_MAX_PAGE_CHARS;
170
+ const pages = [];
171
+ for (const file of files) {
172
+ const content = (await readText(file)).slice(0, maxChars);
173
+ pages.push(parseWikiPage(projectRoot, file, content));
174
+ }
175
+ return pages;
176
+ }
177
+
178
+ export function buildAliasMap(pages) {
179
+ const aliases = new Map();
180
+ for (const page of pages) {
181
+ for (const alias of page.aliases) {
182
+ if (!aliases.has(alias)) aliases.set(alias, []);
183
+ aliases.get(alias).push(page.rel);
184
+ }
185
+ }
186
+ return aliases;
187
+ }
188
+
189
+ function pageRelativeTarget(page, target) {
190
+ if (!page || !target) return null;
191
+ const raw = normalizeForStorage(target)
192
+ .split('|')[0]
193
+ .split('#')[0]
194
+ .trim()
195
+ .replace(/\\/g, '/');
196
+ if (!raw.startsWith('./') && !raw.startsWith('../')) return null;
197
+ const pageDir = posix.dirname(page.rel.replace(/^wiki\//, ''));
198
+ const joined = posix.normalize(posix.join(pageDir, raw));
199
+ if (joined === '..' || joined.startsWith('../')) return null;
200
+ return normalizeTarget(joined);
201
+ }
202
+
203
+ export function resolveWikiLink(aliasMap, target, page = null) {
204
+ const normalized = pageRelativeTarget(page, target) || normalizeTarget(target);
205
+ const matches = aliasMap.get(normalized) || [];
206
+ return matches.length === 1 ? matches[0] : null;
207
+ }
208
+
209
+ export function resolveMarkdownWikiLink(aliasMap, page, link) {
210
+ if (!link?.path || !/\.md$/i.test(link.path)) return null;
211
+ const pageDir = posix.dirname(page.rel.replace(/^wiki\//, ''));
212
+ const joined = posix.normalize(posix.join(pageDir, link.path));
213
+ if (joined === '..' || joined.startsWith('../')) return null;
214
+ return resolveWikiLink(aliasMap, joined);
215
+ }
216
+
217
+ export function buildWikiGraph(pages) {
218
+ const aliasMap = buildAliasMap(pages);
219
+ const outlinks = new Map();
220
+ const backlinks = new Map();
221
+ for (const page of pages) {
222
+ outlinks.set(page.rel, new Set());
223
+ backlinks.set(page.rel, new Set());
224
+ }
225
+
226
+ for (const page of pages) {
227
+ for (const link of page.wikilinks) {
228
+ const resolved = resolveWikiLink(aliasMap, link.raw, page);
229
+ if (!resolved || resolved === page.rel) continue;
230
+ outlinks.get(page.rel).add(resolved);
231
+ backlinks.get(resolved)?.add(page.rel);
232
+ }
233
+ for (const link of page.markdownLinks) {
234
+ const resolved = resolveMarkdownWikiLink(aliasMap, page, link);
235
+ if (!resolved || resolved === page.rel) continue;
236
+ outlinks.get(page.rel).add(resolved);
237
+ backlinks.get(resolved)?.add(page.rel);
238
+ }
239
+ }
240
+
241
+ return { aliasMap, outlinks, backlinks };
242
+ }
243
+
244
+ export async function readMemoryExcerpt(projectRoot) {
245
+ const path = join(projectRoot, 'llm-wiki', 'wiki', 'memory.md');
246
+ const content = await readText(path, '');
247
+ if (!content) return '';
248
+ const lines = content.split('\n').slice(0, MEMORY_LINE_LIMIT).join('\n');
249
+ if (Buffer.byteLength(lines, 'utf8') <= MEMORY_BYTE_LIMIT) return lines;
250
+ let bytes = 0;
251
+ let output = '';
252
+ for (const char of lines) {
253
+ const nextBytes = Buffer.byteLength(char, 'utf8');
254
+ if (bytes + nextBytes > MEMORY_BYTE_LIMIT) break;
255
+ output += char;
256
+ bytes += nextBytes;
257
+ }
258
+ return output;
259
+ }
260
+
261
+ export function pageLookup(pages) {
262
+ return new Map(pages.map((page) => [page.rel, page]));
263
+ }
@@ -0,0 +1,246 @@
1
+ import { join } from 'path';
2
+ import { readText } from './fs-utils.js';
3
+ import { redactText } from './redaction.js';
4
+ import {
5
+ buildWikiGraph,
6
+ collectWikiPages,
7
+ pageLookup,
8
+ readMemoryExcerpt,
9
+ } from './wiki-model.js';
10
+
11
+ const DEFAULT_LIMIT = 5;
12
+ const SNIPPET_CHARS = 350;
13
+ let miniSearchLoader = null;
14
+
15
+ async function loadMiniSearch() {
16
+ if (!miniSearchLoader) {
17
+ miniSearchLoader = import('minisearch')
18
+ .then((module) => module.default || module.MiniSearch || null)
19
+ .catch(() => null);
20
+ }
21
+ return miniSearchLoader;
22
+ }
23
+
24
+ function normalizeText(value) {
25
+ return String(value || '').normalize('NFC').toLowerCase();
26
+ }
27
+
28
+ export function tokenizeWikiQuery(query) {
29
+ return normalizeText(query)
30
+ .replace(/[^\p{Letter}\p{Number}]+/gu, ' ')
31
+ .split(/\s+/)
32
+ .filter((token) => token.length >= 2)
33
+ .slice(0, 80);
34
+ }
35
+
36
+ function substringScore(page, terms) {
37
+ if (terms.length === 0) return 0;
38
+ const haystack = normalizeText(`${page.title}\n${page.rel}\n${page.body}`);
39
+ let score = 0;
40
+ for (const term of terms) {
41
+ if (haystack.includes(term)) score += term.length > 3 ? 2 : 1;
42
+ }
43
+ return score;
44
+ }
45
+
46
+ function snippetFor(page, terms) {
47
+ const text = page.body.replace(/\s+/g, ' ').trim() || page.content.replace(/\s+/g, ' ').trim();
48
+ const lower = normalizeText(text);
49
+ let start = 0;
50
+ for (const term of terms) {
51
+ const index = lower.indexOf(term);
52
+ if (index !== -1) {
53
+ start = Math.max(0, index - 80);
54
+ break;
55
+ }
56
+ }
57
+ return text.slice(start, start + SNIPPET_CHARS);
58
+ }
59
+
60
+ function resultRecord(page, score, fields = {}) {
61
+ return {
62
+ path: page.rel,
63
+ title: page.title,
64
+ type: page.type || '',
65
+ memoryType: page.memoryType || '',
66
+ score,
67
+ directScore: fields.directScore || 0,
68
+ linkScore: fields.linkScore || 0,
69
+ source: fields.source || 'direct',
70
+ via: fields.via || [],
71
+ matchedTerms: fields.matchedTerms || [],
72
+ snippet: fields.snippet || '',
73
+ };
74
+ }
75
+
76
+ function sortHits(a, b) {
77
+ if (b.score !== a.score) return b.score - a.score;
78
+ if (a.source !== b.source) return a.source === 'direct' ? -1 : 1;
79
+ return a.path.localeCompare(b.path);
80
+ }
81
+
82
+ export async function searchWiki(projectRoot, query, options = {}) {
83
+ return (await performSearch(projectRoot, query, options)).hits;
84
+ }
85
+
86
+ async function performSearch(projectRoot, query, options = {}) {
87
+ const opts = typeof options === 'number' ? { limit: options } : options;
88
+ const limit = Number(opts.limit || DEFAULT_LIMIT);
89
+ const expand = opts.expand !== false;
90
+ const terms = tokenizeWikiQuery(query);
91
+ if (!query || terms.length === 0) {
92
+ return { hits: [], search: 'none' };
93
+ }
94
+
95
+ let pages = [];
96
+ try {
97
+ pages = await collectWikiPages(projectRoot, opts);
98
+ } catch {
99
+ return { hits: [], search: 'missing-wiki' };
100
+ }
101
+
102
+ const docs = pages.map((page) => ({
103
+ id: page.rel,
104
+ title: page.title,
105
+ path: page.rel,
106
+ body: page.body,
107
+ type: page.type || '',
108
+ memoryType: page.memoryType || '',
109
+ }));
110
+ const MiniSearch = await loadMiniSearch();
111
+ const byPath = pageLookup(pages);
112
+ const hits = new Map();
113
+ let search = 'substring+wikilink';
114
+
115
+ if (MiniSearch) {
116
+ search = 'minisearch+wikilink';
117
+ const index = new MiniSearch({
118
+ fields: ['title', 'path', 'body'],
119
+ storeFields: ['title', 'path', 'type', 'memoryType'],
120
+ searchOptions: {
121
+ boost: { title: 3, path: 2 },
122
+ prefix: true,
123
+ },
124
+ });
125
+ index.addAll(docs);
126
+
127
+ for (const item of index.search(query, { prefix: true, boost: { title: 3, path: 2 } })) {
128
+ const page = byPath.get(item.id);
129
+ if (!page) continue;
130
+ const subScore = substringScore(page, terms);
131
+ const score = item.score + subScore;
132
+ hits.set(page.rel, resultRecord(page, score, {
133
+ directScore: score,
134
+ matchedTerms: item.terms || terms,
135
+ snippet: snippetFor(page, terms),
136
+ }));
137
+ }
138
+ }
139
+
140
+ for (const page of pages) {
141
+ const score = substringScore(page, terms);
142
+ if (score <= 0 || hits.has(page.rel)) continue;
143
+ hits.set(page.rel, resultRecord(page, score, {
144
+ directScore: score,
145
+ matchedTerms: terms,
146
+ snippet: snippetFor(page, terms),
147
+ }));
148
+ }
149
+
150
+ if (expand && hits.size > 0) {
151
+ const graph = buildWikiGraph(pages);
152
+ const direct = [...hits.values()].sort(sortHits).slice(0, Math.max(limit, 5));
153
+ for (const seed of direct) {
154
+ const neighbors = new Set([
155
+ ...(graph.outlinks.get(seed.path) || []),
156
+ ...(graph.backlinks.get(seed.path) || []),
157
+ ]);
158
+ for (const neighborPath of neighbors) {
159
+ if (hits.has(neighborPath)) continue;
160
+ const page = byPath.get(neighborPath);
161
+ if (!page) continue;
162
+ const linkScore = seed.score * 0.2;
163
+ hits.set(neighborPath, resultRecord(page, linkScore, {
164
+ linkScore,
165
+ source: 'linked',
166
+ via: [seed.path],
167
+ matchedTerms: [],
168
+ snippet: snippetFor(page, terms),
169
+ }));
170
+ }
171
+ }
172
+ }
173
+
174
+ return {
175
+ hits: [...hits.values()].sort(sortHits).slice(0, limit),
176
+ search,
177
+ };
178
+ }
179
+
180
+ function redactHit(hit) {
181
+ return {
182
+ ...hit,
183
+ path: redactText(hit.path, 300),
184
+ title: redactText(hit.title, 300),
185
+ via: Array.isArray(hit.via) ? hit.via.map((item) => redactText(item, 300)) : [],
186
+ matchedTerms: Array.isArray(hit.matchedTerms) ? hit.matchedTerms.map((item) => redactText(item, 120)) : [],
187
+ snippet: redactText(hit.snippet, SNIPPET_CHARS),
188
+ };
189
+ }
190
+
191
+ export async function buildContextPack(projectRoot, query, options = {}) {
192
+ const limit = Number(options.limit || DEFAULT_LIMIT);
193
+ const expand = options.expand !== false;
194
+ const memoryExcerpt = await readMemoryExcerpt(projectRoot);
195
+ const indexExcerpt = (await readText(join(projectRoot, 'llm-wiki', 'wiki', 'index.md'))).slice(0, 1200).trim();
196
+ const logExcerpt = options.includeLog
197
+ ? (await readText(join(projectRoot, 'llm-wiki', 'wiki', 'log.md'))).slice(-1000).trim()
198
+ : '';
199
+ const result = query ? await performSearch(projectRoot, query, { ...options, limit, expand }) : { hits: [], search: 'none' };
200
+ return {
201
+ workspace: projectRoot,
202
+ query: redactText(query || '', 1000),
203
+ limit,
204
+ expand,
205
+ search: result.search,
206
+ memoryExcerpt: redactText(memoryExcerpt.trim(), 30000),
207
+ indexExcerpt: redactText(indexExcerpt, 2000),
208
+ logExcerpt: redactText(logExcerpt, 2000),
209
+ hits: result.hits.map(redactHit),
210
+ };
211
+ }
212
+
213
+ export function formatContextPack(pack) {
214
+ const lines = [
215
+ 'LLM Wiki context from llm-wiki-kit:',
216
+ '- Treat chat memory as temporary; update project Markdown when knowledge should persist.',
217
+ '- Preserve raw/wiki separation. Do not store secrets, tokens, .env contents, private keys, or personal/customer identifiers.',
218
+ '- Prefer updating existing wiki pages over creating duplicate pages.',
219
+ '- Claude Code/Codex를 평소처럼 사용한다. 사용자가 별도 명령을 실행하지 않아도 agent가 필요한 wiki 조회와 정리를 수행한다.',
220
+ '- 오래 쓸 지식은 outputs/questions에만 두지 말고 기존 wiki/architecture, wiki/debugging, wiki/decisions, wiki/concepts, procedures 문서에 합친다.',
221
+ '- wiki/memory.md는 짧은 핵심 기억으로 유지하고, 긴 설명 대신 관련 정식 문서 링크를 둔다.',
222
+ ];
223
+ if (pack.query) {
224
+ lines.push(`- query: "${pack.query}"`);
225
+ lines.push(`- search: ${pack.search}`);
226
+ }
227
+ if (pack.memoryExcerpt) {
228
+ lines.push('', 'Memory excerpt:', pack.memoryExcerpt);
229
+ }
230
+ if (pack.indexExcerpt) {
231
+ lines.push('', 'Index excerpt:', pack.indexExcerpt);
232
+ }
233
+ if (pack.hits.length > 0) {
234
+ lines.push('', 'Relevant wiki pages:');
235
+ for (const hit of pack.hits) {
236
+ const suffix = hit.source === 'linked' && hit.via.length > 0
237
+ ? `, linked via ${hit.via.join(', ')}`
238
+ : '';
239
+ lines.push(`- ${hit.path} (score ${hit.score.toFixed(2)}, ${hit.source}${suffix}): ${hit.snippet}`);
240
+ }
241
+ }
242
+ if (pack.logExcerpt) {
243
+ lines.push('', 'Recent log excerpt:', pack.logExcerpt);
244
+ }
245
+ return lines.join('\n').trim();
246
+ }