@open330/kiwimu 0.8.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -27
- package/package.json +1 -1
- package/src/build/renderer.ts +272 -32
- package/src/build/static/dynamic-qa.js +423 -0
- package/src/build/static/edit-page.js +58 -0
- package/src/build/static/peek-panel.css +201 -0
- package/src/build/static/peek-panel.js +470 -0
- package/src/build/static/search.js +30 -15
- package/src/build/static/style.css +821 -6
- package/src/build/templates.ts +700 -48
- package/src/config.ts +41 -3
- package/src/demo/sample-data.ts +69 -2
- package/src/demo/setup.ts +25 -6
- package/src/expand/llm.ts +2 -2
- package/src/index.ts +467 -60
- package/src/ingest/docx.ts +1 -1
- package/src/ingest/markdown.ts +21 -0
- package/src/ingest/pdf.ts +4 -2
- package/src/llm-client.ts +63 -69
- package/src/pipeline/citations.ts +107 -0
- package/src/pipeline/llm-chunker.ts +277 -131
- package/src/pipeline/standardizer.ts +41 -0
- package/src/server.ts +465 -32
- package/src/services/dynamic-qa.ts +190 -0
- package/src/services/embedding.ts +122 -0
- package/src/services/index-generator.ts +185 -0
- package/src/services/ingest.ts +83 -25
- package/src/services/lint.ts +249 -0
- package/src/services/promote.ts +150 -0
- package/src/store.test.ts +11 -0
- package/src/store.ts +561 -28
- package/src/utils.ts +30 -0
package/src/ingest/pdf.ts
CHANGED
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
export async function extractTextFromPdf(pdfPath: string): Promise<{ title: string; text: string }> {
|
|
2
|
-
let
|
|
2
|
+
let pdfParseModule: Record<string, unknown>;
|
|
3
3
|
try {
|
|
4
|
-
|
|
4
|
+
pdfParseModule = await import("pdf-parse");
|
|
5
5
|
} catch {
|
|
6
6
|
throw new Error("PDF support requires pdf-parse. Run: bun add pdf-parse");
|
|
7
7
|
}
|
|
8
8
|
|
|
9
|
+
const pdfParse = (pdfParseModule.default ?? pdfParseModule) as (buffer: Buffer) => Promise<{ info?: { Title?: string }; text: string }>;
|
|
10
|
+
|
|
9
11
|
const buffer = await Bun.file(pdfPath).arrayBuffer();
|
|
10
12
|
const data = await pdfParse(Buffer.from(buffer));
|
|
11
13
|
|
package/src/llm-client.ts
CHANGED
|
@@ -46,6 +46,21 @@ async function geminiComplete(config: LLMConfig, system: string, userMessage: st
|
|
|
46
46
|
|
|
47
47
|
type ProviderResult = { text: string; usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number } };
|
|
48
48
|
|
|
49
|
+
function isRetryableError(error: unknown): boolean {
|
|
50
|
+
if (error instanceof Error) {
|
|
51
|
+
// Gemini: raw fetch, status in error message
|
|
52
|
+
if (/\b(429|503)\b/.test(error.message)) return true;
|
|
53
|
+
}
|
|
54
|
+
// OpenAI/Azure/Anthropic SDKs: error objects with status property
|
|
55
|
+
const status = (error as any)?.status;
|
|
56
|
+
if (status === 429 || status === 503) return true;
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function sleep(ms: number): Promise<void> {
|
|
61
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
62
|
+
}
|
|
63
|
+
|
|
49
64
|
export class LLMClient {
|
|
50
65
|
private config: LLMConfig;
|
|
51
66
|
private usage: UsageStats = { totalCalls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
|
@@ -53,6 +68,8 @@ export class LLMClient {
|
|
|
53
68
|
private _anthropicClient: InstanceType<typeof import("@anthropic-ai/sdk").default> | null = null;
|
|
54
69
|
private _azureClient: InstanceType<typeof import("openai").AzureOpenAI> | null = null;
|
|
55
70
|
|
|
71
|
+
onRetry?: (attempt: number, maxRetries: number, delayMs: number) => void;
|
|
72
|
+
|
|
56
73
|
constructor(config: LLMConfig) {
|
|
57
74
|
this.config = config;
|
|
58
75
|
}
|
|
@@ -65,7 +82,8 @@ export class LLMClient {
|
|
|
65
82
|
if (!apiKey) {
|
|
66
83
|
try {
|
|
67
84
|
const keyFile = `${process.env.HOME}/keys/openai.azure.com/${this.config.model}.json`;
|
|
68
|
-
const
|
|
85
|
+
const { readFileSync } = await import("fs");
|
|
86
|
+
const raw = readFileSync(keyFile, "utf-8");
|
|
69
87
|
const keyConfig = JSON.parse(raw)[0] as { key: string; endpoint: string; deployment: string };
|
|
70
88
|
apiKey = keyConfig.key;
|
|
71
89
|
endpoint = keyConfig.endpoint.split("/openai/")[0];
|
|
@@ -105,7 +123,7 @@ export class LLMClient {
|
|
|
105
123
|
this._openaiClient = new OpenAI({ apiKey: this.config.api_key });
|
|
106
124
|
}
|
|
107
125
|
const resp = await this._openaiClient.chat.completions.create({
|
|
108
|
-
model: this.config.model || "gpt-
|
|
126
|
+
model: this.config.model || "gpt-5.4",
|
|
109
127
|
messages: [
|
|
110
128
|
{ role: "system", content: system },
|
|
111
129
|
{ role: "user", content: userMessage },
|
|
@@ -128,7 +146,7 @@ export class LLMClient {
|
|
|
128
146
|
this._anthropicClient = new Anthropic({ apiKey: this.config.api_key });
|
|
129
147
|
}
|
|
130
148
|
const resp = await this._anthropicClient.messages.create({
|
|
131
|
-
model: this.config.model || "claude-sonnet-4-
|
|
149
|
+
model: this.config.model || "claude-sonnet-4-6",
|
|
132
150
|
max_tokens: maxTokens,
|
|
133
151
|
system: system,
|
|
134
152
|
messages: [{ role: "user", content: userMessage }],
|
|
@@ -145,34 +163,51 @@ export class LLMClient {
|
|
|
145
163
|
}
|
|
146
164
|
|
|
147
165
|
async chatComplete(system: string, userMessage: string, maxTokens = 8192): Promise<string> {
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
switch (this.config.provider) {
|
|
151
|
-
case "gemini":
|
|
152
|
-
result = await geminiComplete(this.config, system, userMessage, maxTokens);
|
|
153
|
-
break;
|
|
154
|
-
case "azure-openai":
|
|
155
|
-
result = await this.azureComplete(system, userMessage, maxTokens);
|
|
156
|
-
break;
|
|
157
|
-
case "openai":
|
|
158
|
-
result = await this.openaiComplete(system, userMessage, maxTokens);
|
|
159
|
-
break;
|
|
160
|
-
case "anthropic":
|
|
161
|
-
result = await this.anthropicComplete(system, userMessage, maxTokens);
|
|
162
|
-
break;
|
|
163
|
-
default:
|
|
164
|
-
throw new Error(`Unknown LLM provider: ${this.config.provider}`);
|
|
165
|
-
}
|
|
166
|
+
const MAX_RETRIES = 5;
|
|
167
|
+
const BASE_DELAY_MS = 2000;
|
|
166
168
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
169
|
+
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
170
|
+
try {
|
|
171
|
+
let result: ProviderResult;
|
|
172
|
+
|
|
173
|
+
switch (this.config.provider) {
|
|
174
|
+
case "gemini":
|
|
175
|
+
result = await geminiComplete(this.config, system, userMessage, maxTokens);
|
|
176
|
+
break;
|
|
177
|
+
case "azure-openai":
|
|
178
|
+
result = await this.azureComplete(system, userMessage, maxTokens);
|
|
179
|
+
break;
|
|
180
|
+
case "openai":
|
|
181
|
+
result = await this.openaiComplete(system, userMessage, maxTokens);
|
|
182
|
+
break;
|
|
183
|
+
case "anthropic":
|
|
184
|
+
result = await this.anthropicComplete(system, userMessage, maxTokens);
|
|
185
|
+
break;
|
|
186
|
+
default:
|
|
187
|
+
throw new Error(`Unknown LLM provider: ${this.config.provider}`);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Track usage
|
|
191
|
+
if (result.usage) {
|
|
192
|
+
this.usage.totalCalls++;
|
|
193
|
+
this.usage.promptTokens += result.usage.prompt_tokens || 0;
|
|
194
|
+
this.usage.completionTokens += result.usage.completion_tokens || 0;
|
|
195
|
+
this.usage.totalTokens += result.usage.total_tokens || 0;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
return result.text;
|
|
199
|
+
} catch (error) {
|
|
200
|
+
if (isRetryableError(error) && attempt < MAX_RETRIES) {
|
|
201
|
+
const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 1000;
|
|
202
|
+
this.onRetry?.(attempt + 1, MAX_RETRIES, delay);
|
|
203
|
+
await sleep(delay);
|
|
204
|
+
continue;
|
|
205
|
+
}
|
|
206
|
+
throw error;
|
|
207
|
+
}
|
|
173
208
|
}
|
|
174
209
|
|
|
175
|
-
return
|
|
210
|
+
throw new Error("Unreachable: retry loop exited without return or throw");
|
|
176
211
|
}
|
|
177
212
|
|
|
178
213
|
getUsageStats(): UsageStats {
|
|
@@ -211,44 +246,3 @@ export class LLMClient {
|
|
|
211
246
|
}
|
|
212
247
|
}
|
|
213
248
|
|
|
214
|
-
// ── Deprecated global state wrappers (for backward compatibility) ──
|
|
215
|
-
|
|
216
|
-
/** @deprecated Use LLMClient class instead */
|
|
217
|
-
let _globalClient: LLMClient | null = null;
|
|
218
|
-
|
|
219
|
-
/** @deprecated Use `new LLMClient(config)` instead */
|
|
220
|
-
export function setLLMConfig(config: LLMConfig): void {
|
|
221
|
-
_globalClient = new LLMClient(config);
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
/** @deprecated Use LLMClient instance methods instead */
|
|
225
|
-
export function getUsageStats(): UsageStats {
|
|
226
|
-
if (!_globalClient) return { totalCalls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 };
|
|
227
|
-
return _globalClient.getUsageStats();
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
/** @deprecated Use LLMClient instance methods instead */
|
|
231
|
-
export function resetUsageStats(): void {
|
|
232
|
-
if (_globalClient) _globalClient.resetUsageStats();
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
/** @deprecated Use LLMClient instance methods instead */
|
|
236
|
-
export function getEstimatedCost(): number {
|
|
237
|
-
if (!_globalClient) return 0;
|
|
238
|
-
return _globalClient.getEstimatedCost();
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
/** @deprecated Use LLMClient instance methods instead */
|
|
242
|
-
export function printUsageSummary(): void {
|
|
243
|
-
if (_globalClient) _globalClient.printUsageSummary();
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
/** @deprecated Use LLMClient instance methods instead */
|
|
247
|
-
export async function chatComplete(
|
|
248
|
-
system: string,
|
|
249
|
-
userMessage: string,
|
|
250
|
-
maxTokens = 8192
|
|
251
|
-
): Promise<string> {
|
|
252
|
-
if (!_globalClient) throw new Error("LLM config not set. Call setLLMConfig() first.");
|
|
253
|
-
return _globalClient.chatComplete(system, userMessage, maxTokens);
|
|
254
|
-
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import type { Store, Citation } from "../store";
|
|
2
|
+
import { escapeHtml } from "../utils";
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Parse [^src:SLUG] citation markers in page body.
|
|
6
|
+
* Creates citation DB records and replaces markers with numbered footnote references.
|
|
7
|
+
*/
|
|
8
|
+
export function parseCitations(body: string, pageId: number, store: Store): string {
|
|
9
|
+
const page = store.getPageById(pageId);
|
|
10
|
+
if (!page) return body;
|
|
11
|
+
|
|
12
|
+
// Find all [^src:SLUG] markers
|
|
13
|
+
const markerRegex = /\[\^src:([a-z0-9가-힣][-a-z0-9가-힣]*)\]/gi;
|
|
14
|
+
const markers: Array<{ fullMatch: string; slug: string; index: number }> = [];
|
|
15
|
+
let match: RegExpExecArray | null;
|
|
16
|
+
|
|
17
|
+
while ((match = markerRegex.exec(body)) !== null) {
|
|
18
|
+
markers.push({ fullMatch: match[0], slug: match[1], index: match.index });
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (markers.length === 0) return body;
|
|
22
|
+
|
|
23
|
+
// Delete existing citations for this page to avoid duplicates on re-parse
|
|
24
|
+
store.deleteCitationsForPage(pageId);
|
|
25
|
+
|
|
26
|
+
// Build footnote references
|
|
27
|
+
let result = body;
|
|
28
|
+
let footnoteNum = 0;
|
|
29
|
+
const citationMap = new Map<string, number>(); // slug -> footnote number
|
|
30
|
+
|
|
31
|
+
// Process in reverse order to preserve indices during replacement
|
|
32
|
+
for (let i = markers.length - 1; i >= 0; i--) {
|
|
33
|
+
const marker = markers[i];
|
|
34
|
+
const sourcePage = store.getPage(marker.slug);
|
|
35
|
+
|
|
36
|
+
if (!sourcePage || !sourcePage.source_id) {
|
|
37
|
+
// Remove invalid markers silently
|
|
38
|
+
result = result.slice(0, marker.index) + result.slice(marker.index + marker.fullMatch.length);
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Assign footnote number (reuse if same slug cited multiple times)
|
|
43
|
+
if (!citationMap.has(marker.slug)) {
|
|
44
|
+
footnoteNum = citationMap.size + 1;
|
|
45
|
+
citationMap.set(marker.slug, footnoteNum);
|
|
46
|
+
}
|
|
47
|
+
const num = citationMap.get(marker.slug)!;
|
|
48
|
+
|
|
49
|
+
// Extract context: ~80 chars surrounding the marker
|
|
50
|
+
const contextStart = Math.max(0, marker.index - 80);
|
|
51
|
+
const contextEnd = Math.min(body.length, marker.index + marker.fullMatch.length + 80);
|
|
52
|
+
const context = body.slice(contextStart, contextEnd).replace(/\[\^src:[^\]]+\]/g, '').trim();
|
|
53
|
+
|
|
54
|
+
// Create citation record
|
|
55
|
+
store.addCitation(pageId, sourcePage.source_id, sourcePage.id, null, context);
|
|
56
|
+
|
|
57
|
+
// Replace marker with footnote superscript
|
|
58
|
+
const footnoteRef = `<sup class="citation-ref"><a href="#cite-${num}" title="${escapeCitationTitle(sourcePage.title)}">[${num}]</a></sup>`;
|
|
59
|
+
result = result.slice(0, marker.index) + footnoteRef + result.slice(marker.index + marker.fullMatch.length);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return result;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function escapeCitationTitle(title: string): string {
|
|
66
|
+
return title.replace(/"/g, '"').replace(/</g, '<').replace(/>/g, '>');
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Render a "Sources" footer section from citation records.
|
|
71
|
+
*/
|
|
72
|
+
export function renderCitationFootnotes(citations: Citation[]): string {
|
|
73
|
+
if (!citations.length) return "";
|
|
74
|
+
|
|
75
|
+
// Deduplicate by source_page_id (or source_id if no page)
|
|
76
|
+
const seen = new Map<string, { num: number; citation: Citation }>();
|
|
77
|
+
let num = 0;
|
|
78
|
+
|
|
79
|
+
for (const c of citations) {
|
|
80
|
+
const key = c.source_page_id ? `page:${c.source_page_id}` : `source:${c.source_id}`;
|
|
81
|
+
if (!seen.has(key)) {
|
|
82
|
+
num++;
|
|
83
|
+
seen.set(key, { num, citation: c });
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
const items = Array.from(seen.values())
|
|
88
|
+
.map(({ num, citation }) => {
|
|
89
|
+
const title = citation.source_page_title || citation.source_title || `Source #${citation.source_id}`;
|
|
90
|
+
const slug = citation.source_page_slug;
|
|
91
|
+
const link = slug ? `<a href="/wiki/${encodeURIComponent(slug)}.html">${escapeHtml(title)}</a>` : escapeHtml(title);
|
|
92
|
+
const excerpt = citation.excerpt
|
|
93
|
+
? `<span class="citation-excerpt">"${escapeHtml(citation.excerpt.slice(0, 200))}"</span>`
|
|
94
|
+
: "";
|
|
95
|
+
return `<li id="cite-${num}" class="citation-item"><span class="citation-num">[${num}]</span> ${link}${excerpt ? " — " + excerpt : ""}</li>`;
|
|
96
|
+
})
|
|
97
|
+
.join("\n");
|
|
98
|
+
|
|
99
|
+
return `<aside class="citations-section">
|
|
100
|
+
<h3>Sources</h3>
|
|
101
|
+
<ol class="citation-list">
|
|
102
|
+
${items}
|
|
103
|
+
</ol>
|
|
104
|
+
</aside>`;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// escapeHtml imported from ../utils
|