@open330/kiwimu 0.8.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/ingest/pdf.ts CHANGED
@@ -1,11 +1,13 @@
1
1
  export async function extractTextFromPdf(pdfPath: string): Promise<{ title: string; text: string }> {
2
- let pdfParse: (buffer: Buffer) => Promise<{ info?: { Title?: string }; text: string }>;
2
+ let pdfParseModule: Record<string, unknown>;
3
3
  try {
4
- pdfParse = require("pdf-parse");
4
+ pdfParseModule = await import("pdf-parse");
5
5
  } catch {
6
6
  throw new Error("PDF support requires pdf-parse. Run: bun add pdf-parse");
7
7
  }
8
8
 
9
+ const pdfParse = (pdfParseModule.default ?? pdfParseModule) as (buffer: Buffer) => Promise<{ info?: { Title?: string }; text: string }>;
10
+
9
11
  const buffer = await Bun.file(pdfPath).arrayBuffer();
10
12
  const data = await pdfParse(Buffer.from(buffer));
11
13
 
package/src/llm-client.ts CHANGED
@@ -46,6 +46,21 @@ async function geminiComplete(config: LLMConfig, system: string, userMessage: st
46
46
 
47
47
  type ProviderResult = { text: string; usage?: { prompt_tokens: number; completion_tokens: number; total_tokens: number } };
48
48
 
49
+ function isRetryableError(error: unknown): boolean {
50
+ if (error instanceof Error) {
51
+ // Gemini: raw fetch, status in error message
52
+ if (/\b(429|503)\b/.test(error.message)) return true;
53
+ }
54
+ // OpenAI/Azure/Anthropic SDKs: error objects with status property
55
+ const status = (error as any)?.status;
56
+ if (status === 429 || status === 503) return true;
57
+ return false;
58
+ }
59
+
60
+ function sleep(ms: number): Promise<void> {
61
+ return new Promise(resolve => setTimeout(resolve, ms));
62
+ }
63
+
49
64
  export class LLMClient {
50
65
  private config: LLMConfig;
51
66
  private usage: UsageStats = { totalCalls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 };
@@ -53,6 +68,8 @@ export class LLMClient {
53
68
  private _anthropicClient: InstanceType<typeof import("@anthropic-ai/sdk").default> | null = null;
54
69
  private _azureClient: InstanceType<typeof import("openai").AzureOpenAI> | null = null;
55
70
 
71
+ onRetry?: (attempt: number, maxRetries: number, delayMs: number) => void;
72
+
56
73
  constructor(config: LLMConfig) {
57
74
  this.config = config;
58
75
  }
@@ -65,7 +82,8 @@ export class LLMClient {
65
82
  if (!apiKey) {
66
83
  try {
67
84
  const keyFile = `${process.env.HOME}/keys/openai.azure.com/${this.config.model}.json`;
68
- const raw = require("fs").readFileSync(keyFile, "utf-8");
85
+ const { readFileSync } = await import("fs");
86
+ const raw = readFileSync(keyFile, "utf-8");
69
87
  const keyConfig = JSON.parse(raw)[0] as { key: string; endpoint: string; deployment: string };
70
88
  apiKey = keyConfig.key;
71
89
  endpoint = keyConfig.endpoint.split("/openai/")[0];
@@ -105,7 +123,7 @@ export class LLMClient {
105
123
  this._openaiClient = new OpenAI({ apiKey: this.config.api_key });
106
124
  }
107
125
  const resp = await this._openaiClient.chat.completions.create({
108
- model: this.config.model || "gpt-4o",
126
+ model: this.config.model || "gpt-5.4",
109
127
  messages: [
110
128
  { role: "system", content: system },
111
129
  { role: "user", content: userMessage },
@@ -128,7 +146,7 @@ export class LLMClient {
128
146
  this._anthropicClient = new Anthropic({ apiKey: this.config.api_key });
129
147
  }
130
148
  const resp = await this._anthropicClient.messages.create({
131
- model: this.config.model || "claude-sonnet-4-20250514",
149
+ model: this.config.model || "claude-sonnet-4-6",
132
150
  max_tokens: maxTokens,
133
151
  system: system,
134
152
  messages: [{ role: "user", content: userMessage }],
@@ -145,34 +163,51 @@ export class LLMClient {
145
163
  }
146
164
 
147
165
  async chatComplete(system: string, userMessage: string, maxTokens = 8192): Promise<string> {
148
- let result: ProviderResult;
149
-
150
- switch (this.config.provider) {
151
- case "gemini":
152
- result = await geminiComplete(this.config, system, userMessage, maxTokens);
153
- break;
154
- case "azure-openai":
155
- result = await this.azureComplete(system, userMessage, maxTokens);
156
- break;
157
- case "openai":
158
- result = await this.openaiComplete(system, userMessage, maxTokens);
159
- break;
160
- case "anthropic":
161
- result = await this.anthropicComplete(system, userMessage, maxTokens);
162
- break;
163
- default:
164
- throw new Error(`Unknown LLM provider: ${this.config.provider}`);
165
- }
166
+ const MAX_RETRIES = 5;
167
+ const BASE_DELAY_MS = 2000;
166
168
 
167
- // Track usage
168
- if (result.usage) {
169
- this.usage.totalCalls++;
170
- this.usage.promptTokens += result.usage.prompt_tokens || 0;
171
- this.usage.completionTokens += result.usage.completion_tokens || 0;
172
- this.usage.totalTokens += result.usage.total_tokens || 0;
169
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
170
+ try {
171
+ let result: ProviderResult;
172
+
173
+ switch (this.config.provider) {
174
+ case "gemini":
175
+ result = await geminiComplete(this.config, system, userMessage, maxTokens);
176
+ break;
177
+ case "azure-openai":
178
+ result = await this.azureComplete(system, userMessage, maxTokens);
179
+ break;
180
+ case "openai":
181
+ result = await this.openaiComplete(system, userMessage, maxTokens);
182
+ break;
183
+ case "anthropic":
184
+ result = await this.anthropicComplete(system, userMessage, maxTokens);
185
+ break;
186
+ default:
187
+ throw new Error(`Unknown LLM provider: ${this.config.provider}`);
188
+ }
189
+
190
+ // Track usage
191
+ if (result.usage) {
192
+ this.usage.totalCalls++;
193
+ this.usage.promptTokens += result.usage.prompt_tokens || 0;
194
+ this.usage.completionTokens += result.usage.completion_tokens || 0;
195
+ this.usage.totalTokens += result.usage.total_tokens || 0;
196
+ }
197
+
198
+ return result.text;
199
+ } catch (error) {
200
+ if (isRetryableError(error) && attempt < MAX_RETRIES) {
201
+ const delay = BASE_DELAY_MS * Math.pow(2, attempt) + Math.random() * 1000;
202
+ this.onRetry?.(attempt + 1, MAX_RETRIES, delay);
203
+ await sleep(delay);
204
+ continue;
205
+ }
206
+ throw error;
207
+ }
173
208
  }
174
209
 
175
- return result.text;
210
+ throw new Error("Unreachable: retry loop exited without return or throw");
176
211
  }
177
212
 
178
213
  getUsageStats(): UsageStats {
@@ -211,44 +246,3 @@ export class LLMClient {
211
246
  }
212
247
  }
213
248
 
214
- // ── Deprecated global state wrappers (for backward compatibility) ──
215
-
216
- /** @deprecated Use LLMClient class instead */
217
- let _globalClient: LLMClient | null = null;
218
-
219
- /** @deprecated Use `new LLMClient(config)` instead */
220
- export function setLLMConfig(config: LLMConfig): void {
221
- _globalClient = new LLMClient(config);
222
- }
223
-
224
- /** @deprecated Use LLMClient instance methods instead */
225
- export function getUsageStats(): UsageStats {
226
- if (!_globalClient) return { totalCalls: 0, promptTokens: 0, completionTokens: 0, totalTokens: 0 };
227
- return _globalClient.getUsageStats();
228
- }
229
-
230
- /** @deprecated Use LLMClient instance methods instead */
231
- export function resetUsageStats(): void {
232
- if (_globalClient) _globalClient.resetUsageStats();
233
- }
234
-
235
- /** @deprecated Use LLMClient instance methods instead */
236
- export function getEstimatedCost(): number {
237
- if (!_globalClient) return 0;
238
- return _globalClient.getEstimatedCost();
239
- }
240
-
241
- /** @deprecated Use LLMClient instance methods instead */
242
- export function printUsageSummary(): void {
243
- if (_globalClient) _globalClient.printUsageSummary();
244
- }
245
-
246
- /** @deprecated Use LLMClient instance methods instead */
247
- export async function chatComplete(
248
- system: string,
249
- userMessage: string,
250
- maxTokens = 8192
251
- ): Promise<string> {
252
- if (!_globalClient) throw new Error("LLM config not set. Call setLLMConfig() first.");
253
- return _globalClient.chatComplete(system, userMessage, maxTokens);
254
- }
@@ -0,0 +1,107 @@
1
+ import type { Store, Citation } from "../store";
2
+ import { escapeHtml } from "../utils";
3
+
4
+ /**
5
+ * Parse [^src:SLUG] citation markers in page body.
6
+ * Creates citation DB records and replaces markers with numbered footnote references.
7
+ */
8
+ export function parseCitations(body: string, pageId: number, store: Store): string {
9
+ const page = store.getPageById(pageId);
10
+ if (!page) return body;
11
+
12
+ // Find all [^src:SLUG] markers
13
+ const markerRegex = /\[\^src:([a-z0-9가-힣][-a-z0-9가-힣]*)\]/gi;
14
+ const markers: Array<{ fullMatch: string; slug: string; index: number }> = [];
15
+ let match: RegExpExecArray | null;
16
+
17
+ while ((match = markerRegex.exec(body)) !== null) {
18
+ markers.push({ fullMatch: match[0], slug: match[1], index: match.index });
19
+ }
20
+
21
+ if (markers.length === 0) return body;
22
+
23
+ // Delete existing citations for this page to avoid duplicates on re-parse
24
+ store.deleteCitationsForPage(pageId);
25
+
26
+ // Build footnote references
27
+ let result = body;
28
+ let footnoteNum = 0;
29
+ const citationMap = new Map<string, number>(); // slug -> footnote number
30
+
31
+ // Process in reverse order to preserve indices during replacement
32
+ for (let i = markers.length - 1; i >= 0; i--) {
33
+ const marker = markers[i];
34
+ const sourcePage = store.getPage(marker.slug);
35
+
36
+ if (!sourcePage || !sourcePage.source_id) {
37
+ // Remove invalid markers silently
38
+ result = result.slice(0, marker.index) + result.slice(marker.index + marker.fullMatch.length);
39
+ continue;
40
+ }
41
+
42
+ // Assign footnote number (reuse if same slug cited multiple times)
43
+ if (!citationMap.has(marker.slug)) {
44
+ footnoteNum = citationMap.size + 1;
45
+ citationMap.set(marker.slug, footnoteNum);
46
+ }
47
+ const num = citationMap.get(marker.slug)!;
48
+
49
+ // Extract context: ~80 chars surrounding the marker
50
+ const contextStart = Math.max(0, marker.index - 80);
51
+ const contextEnd = Math.min(body.length, marker.index + marker.fullMatch.length + 80);
52
+ const context = body.slice(contextStart, contextEnd).replace(/\[\^src:[^\]]+\]/g, '').trim();
53
+
54
+ // Create citation record
55
+ store.addCitation(pageId, sourcePage.source_id, sourcePage.id, null, context);
56
+
57
+ // Replace marker with footnote superscript
58
+ const footnoteRef = `<sup class="citation-ref"><a href="#cite-${num}" title="${escapeCitationTitle(sourcePage.title)}">[${num}]</a></sup>`;
59
+ result = result.slice(0, marker.index) + footnoteRef + result.slice(marker.index + marker.fullMatch.length);
60
+ }
61
+
62
+ return result;
63
+ }
64
+
65
+ function escapeCitationTitle(title: string): string {
66
+ return title.replace(/"/g, '&quot;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
67
+ }
68
+
69
+ /**
70
+ * Render a "Sources" footer section from citation records.
71
+ */
72
+ export function renderCitationFootnotes(citations: Citation[]): string {
73
+ if (!citations.length) return "";
74
+
75
+ // Deduplicate by source_page_id (or source_id if no page)
76
+ const seen = new Map<string, { num: number; citation: Citation }>();
77
+ let num = 0;
78
+
79
+ for (const c of citations) {
80
+ const key = c.source_page_id ? `page:${c.source_page_id}` : `source:${c.source_id}`;
81
+ if (!seen.has(key)) {
82
+ num++;
83
+ seen.set(key, { num, citation: c });
84
+ }
85
+ }
86
+
87
+ const items = Array.from(seen.values())
88
+ .map(({ num, citation }) => {
89
+ const title = citation.source_page_title || citation.source_title || `Source #${citation.source_id}`;
90
+ const slug = citation.source_page_slug;
91
+ const link = slug ? `<a href="/wiki/${encodeURIComponent(slug)}.html">${escapeHtml(title)}</a>` : escapeHtml(title);
92
+ const excerpt = citation.excerpt
93
+ ? `<span class="citation-excerpt">"${escapeHtml(citation.excerpt.slice(0, 200))}"</span>`
94
+ : "";
95
+ return `<li id="cite-${num}" class="citation-item"><span class="citation-num">[${num}]</span> ${link}${excerpt ? " — " + excerpt : ""}</li>`;
96
+ })
97
+ .join("\n");
98
+
99
+ return `<aside class="citations-section">
100
+ <h3>Sources</h3>
101
+ <ol class="citation-list">
102
+ ${items}
103
+ </ol>
104
+ </aside>`;
105
+ }
106
+
107
+ // escapeHtml imported from ../utils