@open330/kiwimu 0.8.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,249 @@
1
+ import type { Store } from "../store";
2
+ import { normalizeTitle } from "../utils";
3
+
4
+ export interface LintIssue {
5
+ type: 'orphan' | 'dead_link' | 'disconnected' | 'missing_backlink' | 'thin_content' | 'duplicate';
6
+ severity: 'error' | 'warning' | 'info';
7
+ pageId?: number;
8
+ pageTitle?: string;
9
+ message: string;
10
+ suggestion?: string;
11
+ }
12
+
13
+ export interface LintReport {
14
+ issues: LintIssue[];
15
+ summary: { errors: number; warnings: number; info: number; total_pages: number; total_links: number };
16
+ timestamp: string;
17
+ }
18
+
19
+ export function lintWiki(store: Store): LintReport {
20
+ const pages = store.listPages();
21
+ const links = store.getAllLinks();
22
+
23
+ const pageMap = new Map(pages.map(p => [p.id, p]));
24
+ const issues: LintIssue[] = [];
25
+
26
+ // --- a) Orphan Pages: no incoming links ---
27
+ const incomingCount = new Map<number, number>();
28
+ for (const link of links) {
29
+ incomingCount.set(link.to_page_id, (incomingCount.get(link.to_page_id) || 0) + 1);
30
+ }
31
+ for (const page of pages) {
32
+ if (!incomingCount.has(page.id)) {
33
+ issues.push({
34
+ type: 'orphan',
35
+ severity: 'warning',
36
+ pageId: page.id,
37
+ pageTitle: page.title,
38
+ message: `"${page.title}" has no incoming links (orphan page)`,
39
+ suggestion: 'Add links to this page from related pages',
40
+ });
41
+ }
42
+ }
43
+
44
+ // --- b) Dead Links: links pointing to non-existent pages ---
45
+ for (const link of links) {
46
+ if (!pageMap.has(link.to_page_id)) {
47
+ const fromPage = pageMap.get(link.from_page_id);
48
+ issues.push({
49
+ type: 'dead_link',
50
+ severity: 'error',
51
+ pageId: link.from_page_id,
52
+ pageTitle: fromPage?.title,
53
+ message: `Dead link from "${fromPage?.title || link.from_page_id}" to non-existent page (id: ${link.to_page_id}, anchor: "${link.anchor_text}")`,
54
+ suggestion: 'Remove or fix the broken link',
55
+ });
56
+ }
57
+ if (!pageMap.has(link.from_page_id)) {
58
+ issues.push({
59
+ type: 'dead_link',
60
+ severity: 'error',
61
+ pageId: link.from_page_id,
62
+ message: `Dead link from non-existent page (id: ${link.from_page_id}) to page id ${link.to_page_id}`,
63
+ suggestion: 'Clean up orphaned link records',
64
+ });
65
+ }
66
+ }
67
+
68
+ // --- c) Disconnected Clusters ---
69
+ // Build adjacency list (undirected) for connectivity
70
+ const adj = new Map<number, Set<number>>();
71
+ for (const page of pages) {
72
+ adj.set(page.id, new Set());
73
+ }
74
+ for (const link of links) {
75
+ if (pageMap.has(link.from_page_id) && pageMap.has(link.to_page_id)) {
76
+ adj.get(link.from_page_id)!.add(link.to_page_id);
77
+ adj.get(link.to_page_id)!.add(link.from_page_id);
78
+ }
79
+ }
80
+
81
+ if (pages.length > 0) {
82
+ const visited = new Set<number>();
83
+ const clusters: number[][] = [];
84
+
85
+ for (const page of pages) {
86
+ if (visited.has(page.id)) continue;
87
+ // BFS
88
+ const cluster: number[] = [];
89
+ const queue = [page.id];
90
+ visited.add(page.id);
91
+ while (queue.length > 0) {
92
+ const current = queue.shift()!;
93
+ cluster.push(current);
94
+ for (const neighbor of adj.get(current) || []) {
95
+ if (!visited.has(neighbor)) {
96
+ visited.add(neighbor);
97
+ queue.push(neighbor);
98
+ }
99
+ }
100
+ }
101
+ clusters.push(cluster);
102
+ }
103
+
104
+ if (clusters.length > 1) {
105
+ // Sort by size descending; the largest is the "main" cluster
106
+ clusters.sort((a, b) => b.length - a.length);
107
+ for (let i = 1; i < clusters.length; i++) {
108
+ const clusterPages = clusters[i].map(id => pageMap.get(id)!.title).join(', ');
109
+ for (const id of clusters[i]) {
110
+ const page = pageMap.get(id)!;
111
+ issues.push({
112
+ type: 'disconnected',
113
+ severity: 'warning',
114
+ pageId: page.id,
115
+ pageTitle: page.title,
116
+ message: `"${page.title}" is in a disconnected cluster (${clusters[i].length} pages: ${clusterPages.slice(0, 100)})`,
117
+ suggestion: 'Add links connecting this cluster to the main wiki graph',
118
+ });
119
+ }
120
+ }
121
+ }
122
+ }
123
+
124
+ // --- d) Missing Reciprocal Links ---
125
+ const linkSet = new Set(links.map(l => `${l.from_page_id}->${l.to_page_id}`));
126
+ for (const link of links) {
127
+ if (!pageMap.has(link.from_page_id) || !pageMap.has(link.to_page_id)) continue;
128
+ const reverse = `${link.to_page_id}->${link.from_page_id}`;
129
+ if (!linkSet.has(reverse)) {
130
+ const fromPage = pageMap.get(link.from_page_id)!;
131
+ const toPage = pageMap.get(link.to_page_id)!;
132
+ issues.push({
133
+ type: 'missing_backlink',
134
+ severity: 'info',
135
+ pageId: link.to_page_id,
136
+ pageTitle: toPage.title,
137
+ message: `"${toPage.title}" is linked from "${fromPage.title}" but doesn't link back`,
138
+ suggestion: `Consider adding a link from "${toPage.title}" back to "${fromPage.title}"`,
139
+ });
140
+ }
141
+ }
142
+
143
+ // --- e) Thin Content ---
144
+ for (const page of pages) {
145
+ if (page.content.length < 100) {
146
+ issues.push({
147
+ type: 'thin_content',
148
+ severity: 'warning',
149
+ pageId: page.id,
150
+ pageTitle: page.title,
151
+ message: `"${page.title}" has very short content (${page.content.length} chars)`,
152
+ suggestion: 'Expand this page with more detailed content',
153
+ });
154
+ }
155
+ }
156
+
157
+ // --- f) Duplicate Concepts ---
158
+ // Normalize titles and compare (strip spaces for stricter dedup matching)
159
+ const normalize = (s: string) => normalizeTitle(s).replace(/\s/g, "");
160
+ const seen = new Map<string, { id: number; title: string }>();
161
+ for (const page of pages) {
162
+ const norm = normalize(page.title);
163
+ if (!norm) continue;
164
+ const existing = seen.get(norm);
165
+ if (existing) {
166
+ issues.push({
167
+ type: 'duplicate',
168
+ severity: 'warning',
169
+ pageId: page.id,
170
+ pageTitle: page.title,
171
+ message: `"${page.title}" may be a duplicate of "${existing.title}"`,
172
+ suggestion: 'Consider merging these pages',
173
+ });
174
+ } else {
175
+ seen.set(norm, { id: page.id, title: page.title });
176
+ }
177
+ }
178
+
179
+ // Also check Levenshtein similarity for near-duplicates
180
+ const titles = Array.from(seen.values());
181
+ const reportedPairs = new Set<string>();
182
+ for (let i = 0; i < titles.length; i++) {
183
+ for (let j = i + 1; j < titles.length; j++) {
184
+ const a = normalize(titles[i].title);
185
+ const b = normalize(titles[j].title);
186
+ if (a.length < 3 || b.length < 3) continue;
187
+ const maxLen = Math.max(a.length, b.length);
188
+ // Skip pairs where length difference is too large for 85% similarity
189
+ if (Math.abs(a.length - b.length) > maxLen * 0.2) continue;
190
+ const dist = levenshtein(a, b);
191
+ const similarity = 1 - dist / maxLen;
192
+ if (similarity >= 0.85 && similarity < 1) {
193
+ const pairKey = [titles[i].id, titles[j].id].sort().join('-');
194
+ if (reportedPairs.has(pairKey)) continue;
195
+ reportedPairs.add(pairKey);
196
+ issues.push({
197
+ type: 'duplicate',
198
+ severity: 'info',
199
+ pageId: titles[j].id,
200
+ pageTitle: titles[j].title,
201
+ message: `"${titles[i].title}" and "${titles[j].title}" have similar titles (${Math.round(similarity * 100)}% similar)`,
202
+ suggestion: 'Review if these pages cover the same topic',
203
+ });
204
+ }
205
+ }
206
+ }
207
+
208
+ const errors = issues.filter(i => i.severity === 'error').length;
209
+ const warnings = issues.filter(i => i.severity === 'warning').length;
210
+ const info = issues.filter(i => i.severity === 'info').length;
211
+
212
+ return {
213
+ issues,
214
+ summary: {
215
+ errors,
216
+ warnings,
217
+ info,
218
+ total_pages: pages.length,
219
+ total_links: links.length,
220
+ },
221
+ timestamp: new Date().toISOString(),
222
+ };
223
+ }
224
+
225
+ /** Simple Levenshtein distance */
226
+ function levenshtein(a: string, b: string): number {
227
+ const m = a.length;
228
+ const n = b.length;
229
+ if (m === 0) return n;
230
+ if (n === 0) return m;
231
+
232
+ // Use single-row optimization
233
+ let prev = Array.from({ length: n + 1 }, (_, i) => i);
234
+ let curr = new Array(n + 1);
235
+
236
+ for (let i = 1; i <= m; i++) {
237
+ curr[0] = i;
238
+ for (let j = 1; j <= n; j++) {
239
+ const cost = a[i - 1] === b[j - 1] ? 0 : 1;
240
+ curr[j] = Math.min(
241
+ prev[j] + 1, // deletion
242
+ curr[j - 1] + 1, // insertion
243
+ prev[j - 1] + cost // substitution
244
+ );
245
+ }
246
+ [prev, curr] = [curr, prev];
247
+ }
248
+ return prev[n];
249
+ }
@@ -0,0 +1,150 @@
1
+ import type { Store } from "../store";
2
+ import type { LLMConfig } from "../config";
3
+ import { stripJsonFences } from "../utils";
4
+
5
+ export interface PromoteParams {
6
+ question: string;
7
+ answer: string;
8
+ title: string;
9
+ sourcePageId: number;
10
+ selectedText?: string;
11
+ }
12
+
13
+ export interface PromoteResult {
14
+ pageId: number;
15
+ slug: string;
16
+ title: string;
17
+ isNew: boolean;
18
+ }
19
+
20
+ /**
21
+ * Promote a Q&A answer into a permanent wiki concept page.
22
+ *
23
+ * Handles deduplication (appends to an existing page when titles match),
24
+ * slug generation, wiki-linking the new page to existing pages, parent
25
+ * link creation, and quiz generation.
26
+ */
27
+ export async function promoteToWiki(
28
+ store: Store,
29
+ params: PromoteParams,
30
+ llmConfig: LLMConfig,
31
+ ): Promise<PromoteResult> {
32
+ const { question, answer, title, sourcePageId, selectedText } = params;
33
+
34
+ // Deduplication: check if a similar page already exists
35
+ const existing = store.findSimilarPage(title);
36
+ if (existing) {
37
+ const updatedContent = existing.content + "\n\n---\n\n" + answer;
38
+ store.updatePageContent(existing.id, updatedContent);
39
+ return {
40
+ pageId: existing.id,
41
+ slug: existing.slug,
42
+ title: existing.title,
43
+ isNew: false,
44
+ };
45
+ }
46
+
47
+ // --- Create a new concept page ---
48
+ const { slugify } = await import("../pipeline/chunker");
49
+ let slug = slugify(title);
50
+ if (!slug) slug = slugify(question);
51
+ if (!slug) slug = `qa-${Date.now()}`;
52
+
53
+ let finalSlug = slug;
54
+ let counter = 2;
55
+ while (store.getPage(finalSlug)) {
56
+ finalSlug = `${slug}-${counter++}`;
57
+ }
58
+
59
+ // Build page content with optional quoted context
60
+ let pageContent = answer;
61
+ if (selectedText) {
62
+ pageContent = `> ${selectedText.slice(0, 500)}\n\n${pageContent}`;
63
+ }
64
+
65
+ const page = store.addPage(finalSlug, title, pageContent, undefined, undefined, "concept", 0);
66
+
67
+ // Mark as user-generated origin
68
+ store.updatePageOrigin(finalSlug, "user", question, sourcePageId);
69
+
70
+ // --- Wiki-link the new page to existing pages ---
71
+ const targets = store
72
+ .listPageSummaries()
73
+ .filter((p) => p.id !== page.id && p.title.length >= 3)
74
+ .sort((a, b) => b.title.length - a.title.length);
75
+
76
+ let linkedContent = pageContent;
77
+ const linkedSlugs = new Set<string>();
78
+ for (const target of targets) {
79
+ if (linkedSlugs.has(target.slug)) continue;
80
+ const escaped = target.title.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
81
+ const regex = new RegExp(`(?<!\\[)(?<!\\w)(${escaped})(?!\\w)(?!\\])`, "i");
82
+ const match = regex.exec(linkedContent);
83
+ if (match) {
84
+ const replacement = `[${match[1]}](/wiki/${target.slug})`;
85
+ linkedContent =
86
+ linkedContent.slice(0, match.index) +
87
+ replacement +
88
+ linkedContent.slice(match.index + match[0].length);
89
+ linkedSlugs.add(target.slug);
90
+ store.addLink(page.id, target.id, match[1]);
91
+ }
92
+ }
93
+ if (linkedSlugs.size > 0) {
94
+ store.updatePageContent(page.id, linkedContent);
95
+ }
96
+
97
+ // Add link from source page to new page
98
+ store.addLink(sourcePageId, page.id, title);
99
+
100
+ // --- Generate 1-2 quizzes for the new concept ---
101
+ try {
102
+ const { LLMClient } = await import("../llm-client");
103
+ const llmClient = new LLMClient(llmConfig);
104
+
105
+ const quizSystem = `You are a quiz generator for a study wiki. Generate quiz questions that test UNDERSTANDING, not just memorization.
106
+ Focus on higher-order thinking: "\uc65c?", "\uc5b4\ub5bb\uac8c?", "\ube44\uad50\ud558\ub77c", "\uc124\uba85\ud558\ub77c" style questions.
107
+ Return valid JSON only. No markdown fences.`;
108
+
109
+ const quizPrompt = `Based on this wiki content, generate 1-2 quiz questions that test UNDERSTANDING.
110
+ Types: "fill_blank" (\ube48\uce78 \ucc44\uc6b0\uae30), "ox" (OX \ud034\uc988 - true/false), "short_answer" (\ub2e8\ub2f5\ud615)
111
+
112
+ Content title: ${title}
113
+ Content:
114
+ ${answer.slice(0, 3000)}
115
+
116
+ Respond with a JSON array only:
117
+ [{"question": "...", "answer": "...", "explanation": "...", "type": "fill_blank"}]
118
+
119
+ Rules:
120
+ - For fill_blank: use ___ to mark the blank in the question
121
+ - For ox: question should be a statement, answer should be "O" or "X"
122
+ - For short_answer: question should be answerable in 1-3 words
123
+ - Include "explanation" field: a brief 1-2 sentence explanation of WHY the answer is correct`;
124
+
125
+ const raw = await llmClient.chatComplete(quizSystem, quizPrompt, 2048);
126
+ const cleaned = stripJsonFences(raw);
127
+ const quizzes = JSON.parse(cleaned) as Array<{
128
+ question: string;
129
+ answer: string;
130
+ explanation?: string;
131
+ type: string;
132
+ }>;
133
+
134
+ for (const q of quizzes) {
135
+ if (q.question && q.answer && q.type) {
136
+ store.addQuiz(page.id, q.question, q.answer, q.type, q.explanation || "");
137
+ }
138
+ }
139
+ } catch {
140
+ // Quiz generation is non-critical; silently skip failures
141
+ console.log(`\x1b[33m\u26a0 \ud504\ub85c\ubaa8\ud2b8 \ud034\uc988 \uc0dd\uc131 \uc2e4\ud328\x1b[0m`);
142
+ }
143
+
144
+ return {
145
+ pageId: page.id,
146
+ slug: finalSlug,
147
+ title,
148
+ isNew: true,
149
+ };
150
+ }
package/src/store.test.ts CHANGED
@@ -13,6 +13,17 @@ describe("Store", () => {
13
13
  store.close();
14
14
  });
15
15
 
16
+ test("schema: pages table has all migrated columns on a fresh DB", () => {
17
+ // Guards against CREATE TABLE / ALTER TABLE drift: every column added
18
+ // via the migration block must also exist after a fresh init, otherwise
19
+ // an index that references it (or downstream code) will break.
20
+ const db = (store as any).db;
21
+ const cols = db.query("PRAGMA table_info(pages)").all().map((r: any) => r.name);
22
+ for (const required of ["origin", "user_question", "parent_page_id", "category"]) {
23
+ expect(cols).toContain(required);
24
+ }
25
+ });
26
+
16
27
  test("addSource and listSources", () => {
17
28
  const src = store.addSource("file:///test.pdf", "pdf", "Test PDF", "raw content");
18
29
  expect(src.id).toBeGreaterThan(0);