mcp-astgl-knowledge 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +146 -38
  2. package/data/knowledge.db +0 -0
  3. package/dist/alerts.d.ts +22 -0
  4. package/dist/alerts.js +433 -0
  5. package/dist/alerts.js.map +1 -0
  6. package/dist/citation-test.d.ts +14 -0
  7. package/dist/citation-test.js +298 -0
  8. package/dist/citation-test.js.map +1 -0
  9. package/dist/daily-report.d.ts +15 -0
  10. package/dist/daily-report.js +441 -0
  11. package/dist/daily-report.js.map +1 -0
  12. package/dist/discover.js +3 -1
  13. package/dist/discover.js.map +1 -1
  14. package/dist/freshness.d.ts +20 -0
  15. package/dist/freshness.js +508 -0
  16. package/dist/freshness.js.map +1 -0
  17. package/dist/index.d.ts +6 -1
  18. package/dist/index.js +253 -14
  19. package/dist/index.js.map +1 -1
  20. package/dist/ingest-projects.d.ts +16 -0
  21. package/dist/ingest-projects.js +196 -0
  22. package/dist/ingest-projects.js.map +1 -0
  23. package/dist/knowledge-db.d.ts +13 -0
  24. package/dist/knowledge-db.js +156 -0
  25. package/dist/knowledge-db.js.map +1 -0
  26. package/dist/pipeline.d.ts +12 -0
  27. package/dist/pipeline.js +83 -0
  28. package/dist/pipeline.js.map +1 -0
  29. package/dist/query-log.d.ts +15 -0
  30. package/dist/query-log.js +93 -0
  31. package/dist/query-log.js.map +1 -0
  32. package/dist/rate-limit.d.ts +34 -0
  33. package/dist/rate-limit.js +206 -0
  34. package/dist/rate-limit.js.map +1 -0
  35. package/dist/related-articles.d.ts +15 -0
  36. package/dist/related-articles.js +217 -0
  37. package/dist/related-articles.js.map +1 -0
  38. package/dist/search.d.ts +13 -4
  39. package/dist/search.js +274 -39
  40. package/dist/search.js.map +1 -1
  41. package/dist/structure.d.ts +11 -0
  42. package/dist/structure.js +451 -0
  43. package/dist/structure.js.map +1 -0
  44. package/dist/types.d.ts +65 -0
  45. package/dist/types.js.map +1 -1
  46. package/package.json +10 -2
@@ -0,0 +1,217 @@
1
+ #!/usr/bin/env tsx
2
+ /**
3
+ * Internal linking automation via vector similarity.
4
+ *
5
+ * WHAT: Computes pairwise article similarity and injects related article links
6
+ * WHY: Cross-referencing boosts SEO, AI discoverability, and reader engagement
7
+ *
8
+ * Usage:
9
+ * npm run related Compute + print JSON map
10
+ * npm run related -- --inject Also inject into Astro markdown frontmatter
11
+ * npm run related -- --top 3 Number of related articles per article (default: 3)
12
+ *
13
+ * Requires: Ollama running with nomic-embed-text
14
+ */
15
+ import { join } from "path";
16
+ import { existsSync, readFileSync, writeFileSync, readdirSync } from "fs";
17
+ import Database from "better-sqlite3";
18
+ const DATA_DIR = join(import.meta.dirname, "..", "data");
19
+ const KNOWLEDGE_PATH = join(DATA_DIR, "knowledge.db");
20
+ const OLLAMA_URL = process.env.OLLAMA_URL || "http://localhost:11434";
21
+ const EMBED_MODEL = process.env.EMBED_MODEL || "nomic-embed-text";
22
+ const ASTRO_ANSWERS_DIR = process.env.ASTRO_ANSWERS_DIR ||
23
+ join(import.meta.dirname, "..", "..", "astgl-site", "src", "content", "answers");
24
+ const BASE_URL = "https://astgl.ai/answers";
25
+ // --- Embedding ---
26
+ async function embedTexts(texts) {
27
+ // WHAT: Batch embed via Ollama /api/embed (single call, multiple inputs)
28
+ // WHY: 1 API call for all articles is faster than 20 separate calls
29
+ const resp = await fetch(`${OLLAMA_URL}/api/embed`, {
30
+ method: "POST",
31
+ headers: { "Content-Type": "application/json" },
32
+ body: JSON.stringify({ model: EMBED_MODEL, input: texts }),
33
+ });
34
+ if (!resp.ok) {
35
+ throw new Error(`Ollama embed failed: ${resp.status} ${await resp.text()}`);
36
+ }
37
+ const data = (await resp.json());
38
+ return data.embeddings;
39
+ }
40
+ // WHAT: Cosine similarity between two vectors
41
+ // WHY: Standard similarity metric for embedding comparison (same as sqlite-vec uses)
42
+ function cosineSimilarity(a, b) {
43
+ let dot = 0;
44
+ let normA = 0;
45
+ let normB = 0;
46
+ for (let i = 0; i < a.length; i++) {
47
+ dot += a[i] * b[i];
48
+ normA += a[i] * a[i];
49
+ normB += b[i] * b[i];
50
+ }
51
+ return dot / (Math.sqrt(normA) * Math.sqrt(normB));
52
+ }
53
+ // --- Knowledge DB: Store related articles ---
54
+ function storeRelatedInDb(db, relatedMap) {
55
+ // WHAT: Create table + upsert related article links
56
+ // WHY: MCP server can serve related articles alongside search results
57
+ db.exec(`
58
+ CREATE TABLE IF NOT EXISTS article_related (
59
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
60
+ article_url TEXT NOT NULL,
61
+ related_url TEXT NOT NULL,
62
+ related_title TEXT NOT NULL,
63
+ similarity_score REAL NOT NULL,
64
+ rank INTEGER NOT NULL,
65
+ UNIQUE(article_url, related_url)
66
+ )
67
+ `);
68
+ db.exec("CREATE INDEX IF NOT EXISTS idx_related_article ON article_related(article_url)");
69
+ const upsert = db.prepare(`INSERT OR REPLACE INTO article_related (article_url, related_url, related_title, similarity_score, rank)
70
+ VALUES (?, ?, ?, ?, ?)`);
71
+ const upsertAll = db.transaction(() => {
72
+ // Clear existing data
73
+ db.prepare("DELETE FROM article_related").run();
74
+ for (const [slug, entry] of Object.entries(relatedMap)) {
75
+ const articleUrl = entry.url;
76
+ for (let i = 0; i < entry.related.length; i++) {
77
+ const rel = entry.related[i];
78
+ upsert.run(articleUrl, `${BASE_URL}/${rel.slug}`, rel.title, rel.score, i + 1);
79
+ }
80
+ }
81
+ });
82
+ upsertAll();
83
+ }
84
+ // --- Astro Frontmatter Injection ---
85
+ // WHAT: Parse YAML frontmatter from a markdown file
86
+ // WHY: Need to read existing frontmatter, add/update `related` field, write back
87
+ function injectRelatedFrontmatter(filePath, related) {
88
+ const content = readFileSync(filePath, "utf-8");
89
+ // Split frontmatter from body
90
+ const fmMatch = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
91
+ if (!fmMatch) {
92
+ console.error(` Skipping ${filePath}: no frontmatter found`);
93
+ return false;
94
+ }
95
+ let frontmatter = fmMatch[1];
96
+ const body = fmMatch[2];
97
+ // Remove existing `related:` block if present
98
+ frontmatter = frontmatter.replace(/related:\n(?:- slug:[\s\S]*?(?=\n\w|\n---|\z))/g, "");
99
+ // Cleaner regex: remove the related block entirely
100
+ frontmatter = frontmatter.replace(/related:\n(?:(?:- (?:slug|title):.*\n| .*\n)*)/g, "");
101
+ // Remove trailing whitespace
102
+ frontmatter = frontmatter.trimEnd();
103
+ // Build the related YAML block
104
+ const relatedYaml = related
105
+ .map((r) => `- slug: ${r.slug}\n title: "${r.title.replace(/"/g, '\\"')}"`)
106
+ .join("\n");
107
+ frontmatter += `\nrelated:\n${relatedYaml}`;
108
+ const updated = `---\n${frontmatter}\n---\n${body}`;
109
+ writeFileSync(filePath, updated, "utf-8");
110
+ return true;
111
+ }
112
+ // --- CLI ---
113
+ function parseArgs() {
114
+ const args = process.argv.slice(2);
115
+ return {
116
+ inject: args.includes("--inject"),
117
+ top: (() => {
118
+ const idx = args.indexOf("--top");
119
+ return idx >= 0 && args[idx + 1] ? parseInt(args[idx + 1], 10) : 3;
120
+ })(),
121
+ };
122
+ }
123
+ async function main() {
124
+ const { inject, top } = parseArgs();
125
+ console.error("=== Related Articles Generator ===\n");
126
+ if (!existsSync(KNOWLEDGE_PATH)) {
127
+ console.error("knowledge.db not found. Run 'npm run ingest' first.");
128
+ process.exit(1);
129
+ }
130
+ const db = new Database(KNOWLEDGE_PATH);
131
+ // WHAT: Only process astgl.ai canonical articles (not Substack mirrors)
132
+ // WHY: These are the articles we control and can inject links into
133
+ const articles = db
134
+ .prepare(`SELECT title, description, url, slug
135
+ FROM articles
136
+ WHERE url LIKE 'https://astgl.ai/answers/%'
137
+ ORDER BY rowid`)
138
+ .all();
139
+ console.error(`Found ${articles.length} astgl.ai articles to process\n`);
140
+ if (articles.length === 0) {
141
+ console.error("No articles found.");
142
+ db.close();
143
+ process.exit(0);
144
+ }
145
+ // Step 1: Embed all articles (title + description)
146
+ console.error("Embedding articles...");
147
+ const texts = articles.map((a) => `${a.title}. ${a.description}`);
148
+ const embeddings = await embedTexts(texts);
149
+ console.error(` ${embeddings.length} embeddings generated\n`);
150
+ // Step 2: Compute pairwise similarity
151
+ console.error("Computing pairwise similarity...");
152
+ const relatedMap = {};
153
+ for (let i = 0; i < articles.length; i++) {
154
+ const article = articles[i];
155
+ const similarities = [];
156
+ for (let j = 0; j < articles.length; j++) {
157
+ if (i === j)
158
+ continue;
159
+ const score = Math.round(cosineSimilarity(embeddings[i], embeddings[j]) * 1000) / 1000;
160
+ similarities.push({ index: j, score });
161
+ }
162
+ // Sort by similarity descending, take top N
163
+ similarities.sort((a, b) => b.score - a.score);
164
+ const topRelated = similarities.slice(0, top).map((s) => ({
165
+ slug: articles[s.index].slug,
166
+ title: articles[s.index].title,
167
+ score: s.score,
168
+ }));
169
+ relatedMap[article.slug] = {
170
+ title: article.title,
171
+ url: article.url,
172
+ related: topRelated,
173
+ };
174
+ console.error(` ${article.slug}: ${topRelated.map((r) => `${r.slug} (${r.score})`).join(", ")}`);
175
+ }
176
+ // Step 3: Store in knowledge.db
177
+ console.error("\nStoring in knowledge.db...");
178
+ storeRelatedInDb(db, relatedMap);
179
+ console.error(` ${Object.keys(relatedMap).length * top} relationships stored`);
180
+ db.close();
181
+ // Step 4: Inject into Astro frontmatter (optional)
182
+ if (inject) {
183
+ console.error("\nInjecting into Astro frontmatter...");
184
+ if (!existsSync(ASTRO_ANSWERS_DIR)) {
185
+ console.error(` Astro answers dir not found: ${ASTRO_ANSWERS_DIR}`);
186
+ console.error(" Set ASTRO_ANSWERS_DIR env var to override.");
187
+ }
188
+ else {
189
+ const mdFiles = readdirSync(ASTRO_ANSWERS_DIR).filter((f) => f.endsWith(".md"));
190
+ let injected = 0;
191
+ for (const file of mdFiles) {
192
+ const slug = file.replace(/\.md$/, "");
193
+ const entry = relatedMap[slug];
194
+ if (!entry) {
195
+ console.error(` ${slug}: not in knowledge base, skipping`);
196
+ continue;
197
+ }
198
+ const filePath = join(ASTRO_ANSWERS_DIR, file);
199
+ if (injectRelatedFrontmatter(filePath, entry.related)) {
200
+ console.error(` ${slug}: injected ${entry.related.length} related links`);
201
+ injected++;
202
+ }
203
+ }
204
+ console.error(`\n ${injected} files updated`);
205
+ }
206
+ }
207
+ // Output JSON map to stdout
208
+ console.log(JSON.stringify(relatedMap, null, 2));
209
+ console.error("\n=== Done ===");
210
+ }
211
+ main()
212
+ .then(() => process.exit(0))
213
+ .catch((err) => {
214
+ console.error("Related articles failed:", err);
215
+ process.exit(1);
216
+ });
217
+ //# sourceMappingURL=related-articles.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"related-articles.js","sourceRoot":"","sources":["../src/related-articles.ts"],"names":[],"mappings":";AACA;;;;;;;;;;;;GAYG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,UAAU,EAAE,YAAY,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,IAAI,CAAC;AAC1E,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AAEtC,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;AACzD,MAAM,cAAc,GAAG,IAAI,CAAC,QAAQ,EAAE,cAAc,CAAC,CAAC;AACtD,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,wBAAwB,CAAC;AACtE,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,kBAAkB,CAAC;AAClE,MAAM,iBAAiB,GACrB,OAAO,CAAC,GAAG,CAAC,iBAAiB;IAC7B,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;AACnF,MAAM,QAAQ,GAAG,0BAA0B,CAAC;AAuB5C,oBAAoB;AACpB,KAAK,UAAU,UAAU,CAAC,KAAe;IACvC,yEAAyE;IACzE,oEAAoE;IACpE,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,GAAG,UAAU,YAAY,EAAE;QAClD,MAAM,EAAE,MAAM;QACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;QAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,WAAW,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;KAC3D,CAAC,CAAC;IAEH,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,CAAC,MAAM,IAAI,MAAM,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC,IAAI,EAAE,CAA+B,CAAC;IAC/D,OAAO,IAAI,CAAC,UAAU,CAAC;AACzB,CAAC;AAED,8CAA8C;AAC9C,qFAAqF;AACrF,SAAS,gBAAgB,CAAC,CAAW,EAAE,CAAW;IAChD,IAAI,GAAG,GAAG,CAAC,CAAC;IACZ,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAClC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACnB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACrB,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IACD,OAAO,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;AACrD,CAAC;AAED,+CAA+C;AAC/C,SAAS,gBAAgB,CACvB,EAAiC,EACjC,UAAsB;IAEtB,oDAAoD;IACpD,sEAAsE;IACtE,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;GAUP,CAAC,CAAC;IACH,EAAE,CAAC,IAAI,CACL,gFAAgF,CACjF,CAAC;IAEF,MAAM,MAAM,GAAG,EAAE,CAAC,OAAO,CACvB;4BACwB,CACzB,CAAC;IAEF,MAAM,SAAS,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;QACpC,sBAAsB;QACtB,EAAE,CAAC,OAAO,CAAC,6BAA6B,CAAC,CAAC,GAAG,EAAE,CAAC;QAEhD,KAAK,MAAM,CAAC,IAAI,EAAE,KAAK,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,UAAU,CAAC,EAAE,CAAC;YACvD,MAAM,UAAU,GAAG,KAAK,CAAC,GAAG,CAAC;YAC7B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9C,MAAM,GAAG,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;gBAC7B,MAAM,CAAC,GAAG,CACR,UAAU,EACV,GAAG,QAAQ,IAAI,GAAG,CAAC,IAAI,EAAE,EACzB,GAAG,CAAC,KAAK,EACT,GAAG,CAAC,KAAK,EACT,CAAC,GAAG,CAAC,CACN,CAAC;YACJ,CAAC;QACH,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,SAAS,EAAE,CAAC;AACd,CAAC;AAED,sCAAsC;AACtC,oDAAoD;AACpD,iFAAiF;AACjF,SAAS,wBAAwB,CAC/B,QAAgB,EAChB,OAAuB;IAEvB,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAEhD,8BAA8B;IAC9B,MAAM,OAAO,GAAG,OAAO,CAAC,KAAK,CAAC,mCAAmC,CAAC,CAAC;IACnE,IAAI,CAAC,OAAO,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,cAAc,QAAQ,wBAAwB,CAAC,CAAC;QAC9D,OAAO,KAAK,CAAC;IACf,CAAC;IAED,IAAI,WAAW,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IAC7B,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IAExB,8CAA8C;IAC9C,WAAW,GAAG,WAAW,CAAC,OAAO,CAC/B,iDAAiD,EACjD,EAAE,CACH,CAAC;IACF,mDAAmD;IACnD,WAAW,GAAG,WAAW,CAAC,OAAO,CAC/B,kDAAkD,EAClD,EAAE,CACH,CAAC;IACF,6BAA6B;IAC7B,WAAW,GAAG,WAAW,CAAC,OAAO,EAAE,CAAC;IAEpC,+BAA+B;IAC/B,MAAM,WAAW,GAAG,OAAO;SACxB,GAAG,CACF,CAAC,CAAC,EAAE,EAAE,CACJ,WAAW,CAAC,CAAC,IAAI,eAAe,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,KAAK,CAAC,GAAG,CAClE;SACA,IAAI,CAAC,IAAI,CAAC,CAAC;IAEd,WAAW,IAAI,eAAe,WAAW,EAAE,CAAC;IAE5C,MAAM,OAAO,GAAG,QAAQ,WAAW,UAAU,IAAI,EAAE,CAAC;IACpD,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC;IAC1C,OAAO,IAAI,CAAC;AACd,CAAC;AAED,cAAc;AACd,SAAS,SAAS;IAChB,MAAM,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IACnC,OAAO;QACL,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC;QACjC,GAAG,EAAE,CAAC,GAAG,EAAE;YACT,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAClC,OAAO,GAAG,IAAI,CAAC,IAAI,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrE,CAAC,CAAC,EAAE;KACL,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,MAAM,EAAE,MAAM,EAAE,GAAG,EAAE,GAAG,SAAS,EAAE,CAAC;IAEpC,OAAO,CAAC,KAAK,CAAC,sCAAsC,CAAC,CAAC;IAEtD,IAAI,CAAC,UAAU,CAAC,cAAc,CAAC,EAAE,CAAC;QAChC,OAAO,CAAC,KAAK,CAAC,qDAAqD,CAAC,CAAC;QACrE,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,MAAM,EAAE,GAAG,IAAI,QAAQ,CAAC,cAAc,CAAC,CAAC;IAExC,wEAAwE;IACxE,mEAAmE;IACnE,MAAM,QAAQ,GAAG,EAAE;SAChB,OAAO,CACN;;;sBAGgB,CACjB;SACA,GAAG,EAAmB,CAAC;IAE1B,OAAO,CAAC,KAAK,CAAC,SAAS,QAAQ,CAAC,MAAM,iCAAiC,CAAC,CAAC;IAEzE,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,OAAO,CAAC,KAAK,CAAC,oBAAoB,CAAC,CAAC;QACpC,EAAE,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAClB,CAAC;IAED,mDAAmD;IACnD,OAAO,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;IACvC,MAAM,KAAK,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;IAClE,MAAM,UAAU,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,CAAC;IAC3C,OAAO,CAAC,KAAK,CAAC,KAAK,UAAU,CAAC,MAAM,yBAAyB,CAAC,CAAC;IAE/D,sCAAsC;IACtC,OAAO,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;IAClD,MAAM,UAAU,GAAe,EAAE,CAAC;IAElC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,YAAY,GAA4C,EAAE,CAAC;QAEjE,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,IAAI,CAAC,KAAK,CAAC;gBAAE,SAAS;YACtB,MAAM,KAAK,GACT,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI,CAAC;YAC3E,YAAY,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,4CAA4C;QAC5C,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAC/C,MAAM,UAAU,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;YACxD,IAAI,EAAE,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI;YAC5B,KAAK,EAAE,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK;YAC9B,KAAK,EAAE,CAAC,CAAC,KAAK;SACf,CAAC,CAAC,CAAC;QAEJ,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG;YACzB,KAAK,EAAE,OAAO,CAAC,KAAK;YACpB,GAAG,EAAE,OAAO,CAAC,GAAG;YAChB,OAAO,EAAE,UAAU;SACpB,CAAC;QAEF,OAAO,CAAC,KAAK,CACX,KAAK,OAAO,CAAC,IAAI,KAAK,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CACnF,CAAC;IACJ,CAAC;IAED,gCAAgC;IAChC,OAAO,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;IAC9C,gBAAgB,CAAC,EAAE,EAAE,UAAU,CAAC,CAAC;IACjC,OAAO,CAAC,KAAK,CAAC,KAAK,MAAM,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,MAAM,GAAG,GAAG,uBAAuB,CAAC,CAAC;IAEhF,EAAE,CAAC,KAAK,EAAE,CAAC;IAEX,mDAAmD;IACnD,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,CAAC,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAEvD,IAAI,CAAC,UAAU,CAAC,iBAAiB,CAAC,EAAE,CAAC;YACnC,OAAO,CAAC,KAAK,CAAC,kCAAkC,iBAAiB,EAAE,CAAC,CAAC;YACrE,OAAO,CAAC,KAAK,CAAC,8CAA8C,CAAC,CAAC;QAChE,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,WAAW,CAAC,iBAAiB,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAC1D,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAClB,CAAC;YAEF,IAAI,QAAQ,GAAG,CAAC,CAAC;YACjB,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;gBAC3B,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;gBACvC,MAAM,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;gBAC/B,IAAI,CAAC,KAAK,EAAE,CAAC;oBACX,OAAO,CAAC,KAAK,CAAC,KAAK,IAAI,mCAAmC,CAAC,CAAC;oBAC5D,SAAS;gBACX,CAAC;gBAED,MAAM,QAAQ,GAAG,IAAI,CAAC,iBAAiB,EAAE,IAAI,CAAC,CAAC;gBAC/C,IAAI,wBAAwB,CAAC,QAAQ,EAAE,KAAK,CAAC,OAAO,CAAC,EAAE,CAAC;oBACtD,OAAO,CAAC,KAAK,CAAC,KAAK,IAAI,cAAc,KAAK,CAAC,OAAO,CAAC,MAAM,gBAAgB,CAAC,CAAC;oBAC3E,QAAQ,EAAE,CAAC;gBACb,CAAC;YACH,CAAC;YACD,OAAO,CAAC,KAAK,CAAC,OAAO,QAAQ,gBAAgB,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IAED,4BAA4B;IAC5B,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;IAEjD,OAAO,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;AAClC,CAAC;AAED,IAAI,EAAE;KACH,IAAI,CAAC,GAAG,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;KAC3B,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACb,OAAO,CAAC,KAAK,CAAC,0BAA0B,EAAE,GAAG,CAAC,CAAC;IAC/C,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
package/dist/search.d.ts CHANGED
@@ -2,10 +2,19 @@
2
2
  * Vector search logic against the pre-built knowledge.db.
3
3
  * At runtime, queries are embedded via Ollama and matched against stored vectors.
4
4
  *
5
- * WHAT: Provides search_articles, get_answer, and list_topics functions
5
+ * WHAT: Provides search_articles, get_answer, list_topics, and related functions
6
6
  * WHY: Separating search logic from MCP server keeps concerns clean
7
+ *
8
+ * Performance:
9
+ * - LRU cache on embeddings (200 entries) avoids redundant Ollama calls
10
+ * - Ollama calls have 10s timeout + 1 retry with 500ms delay
11
+ * - Prepared statements cached at module level
12
+ * - listTopics uses GROUP_CONCAT instead of in-memory join
7
13
  */
8
- import { SearchResult, AnswerResult, TopicEntry } from "./types.js";
9
- export declare function searchArticles(query: string, limit?: number): Promise<SearchResult[]>;
10
- export declare function getAnswer(question: string): Promise<AnswerResult>;
14
+ import { SearchResult, AnswerResult, TopicEntry, TutorialResult, ComparisonResult, LatestArticle } from "./types.js";
15
+ export declare function searchArticles(query: string, limit?: number, contentType?: string): Promise<SearchResult[]>;
16
+ export declare function getAnswer(question: string, contentType?: string): Promise<AnswerResult>;
17
+ export declare function getTutorial(query: string): Promise<TutorialResult>;
18
+ export declare function compareTopics(topicA: string, topicB: string): Promise<ComparisonResult>;
19
+ export declare function getLatest(limit?: number): LatestArticle[];
11
20
  export declare function listTopics(): TopicEntry[];
package/dist/search.js CHANGED
@@ -2,8 +2,14 @@
2
2
  * Vector search logic against the pre-built knowledge.db.
3
3
  * At runtime, queries are embedded via Ollama and matched against stored vectors.
4
4
  *
5
- * WHAT: Provides search_articles, get_answer, and list_topics functions
5
+ * WHAT: Provides search_articles, get_answer, list_topics, and related functions
6
6
  * WHY: Separating search logic from MCP server keeps concerns clean
7
+ *
8
+ * Performance:
9
+ * - LRU cache on embeddings (200 entries) avoids redundant Ollama calls
10
+ * - Ollama calls have 10s timeout + 1 retry with 500ms delay
11
+ * - Prepared statements cached at module level
12
+ * - listTopics uses GROUP_CONCAT instead of in-memory join
7
13
  */
8
14
  import { join } from "path";
9
15
  import { existsSync } from "fs";
@@ -12,6 +18,7 @@ import * as sqliteVec from "sqlite-vec";
12
18
  const DB_PATH = join(import.meta.dirname, "..", "data", "knowledge.db");
13
19
  const OLLAMA_URL = process.env.OLLAMA_URL || "http://localhost:11434";
14
20
  const EMBED_MODEL = process.env.EMBED_MODEL || "nomic-embed-text";
21
+ // --- Database Connection (lazy singleton) ---
15
22
  let db = null;
16
23
  function getDb() {
17
24
  if (!db) {
@@ -23,21 +30,100 @@ function getDb() {
23
30
  }
24
31
  return db;
25
32
  }
33
+ // --- Embedding Cache (LRU) ---
34
+ // WHAT: In-memory cache for embedding vectors, keyed by normalized query text
35
+ // WHY: Same query → same embedding. At 500 queries/day, even 20% repeats saves 100 Ollama calls
36
+ const EMBED_CACHE_MAX = 200;
37
+ const embedCache = new Map();
38
+ function cacheKey(text) {
39
+ return text.trim().toLowerCase();
40
+ }
41
+ function evictOldest() {
42
+ if (embedCache.size <= EMBED_CACHE_MAX)
43
+ return;
44
+ let oldestKey = "";
45
+ let oldestTs = Infinity;
46
+ for (const [key, entry] of embedCache) {
47
+ if (entry.ts < oldestTs) {
48
+ oldestTs = entry.ts;
49
+ oldestKey = key;
50
+ }
51
+ }
52
+ if (oldestKey)
53
+ embedCache.delete(oldestKey);
54
+ }
55
+ // --- Embedding with Resilience ---
56
+ // WHAT: Embed query text via Ollama with timeout, retry, and caching
57
+ // WHY: Ollama is the bottleneck (~100-500ms). Cache eliminates redundant calls,
58
+ // timeout prevents hanging, retry handles transient failures
26
59
  async function embedQuery(text) {
27
- const resp = await fetch(`${OLLAMA_URL}/api/embed`, {
28
- method: "POST",
29
- headers: { "Content-Type": "application/json" },
30
- body: JSON.stringify({ model: EMBED_MODEL, input: text }),
31
- });
32
- if (!resp.ok) {
33
- throw new Error(`Ollama embed failed: ${resp.status} ${await resp.text()}`);
60
+ const key = cacheKey(text);
61
+ const cached = embedCache.get(key);
62
+ if (cached) {
63
+ cached.ts = Date.now(); // Touch for LRU
64
+ return cached.vec;
65
+ }
66
+ const vec = await embedWithRetry(text);
67
+ embedCache.set(key, { vec, ts: Date.now() });
68
+ evictOldest();
69
+ return vec;
70
+ }
71
+ async function embedWithRetry(text, retries = 1) {
72
+ for (let attempt = 0; attempt <= retries; attempt++) {
73
+ try {
74
+ const resp = await fetch(`${OLLAMA_URL}/api/embed`, {
75
+ method: "POST",
76
+ headers: { "Content-Type": "application/json" },
77
+ body: JSON.stringify({ model: EMBED_MODEL, input: text }),
78
+ signal: AbortSignal.timeout(10_000),
79
+ });
80
+ if (!resp.ok) {
81
+ throw new Error(`Ollama embed failed: ${resp.status} ${await resp.text()}`);
82
+ }
83
+ const data = (await resp.json());
84
+ return new Float32Array(data.embeddings[0]);
85
+ }
86
+ catch (err) {
87
+ const isLastAttempt = attempt === retries;
88
+ if (isLastAttempt) {
89
+ const message = err instanceof Error ? err.message : String(err);
90
+ throw new Error(`Embedding unavailable after ${retries + 1} attempt(s): ${message}. ` +
91
+ "Ensure Ollama is running with the nomic-embed-text model loaded.");
92
+ }
93
+ // Wait before retry
94
+ await new Promise((resolve) => setTimeout(resolve, 500));
95
+ }
96
+ }
97
+ // Unreachable, but TypeScript needs it
98
+ throw new Error("Embedding failed");
99
+ }
100
+ // --- Prepared Statement Cache ---
101
+ // WHAT: Cache the content_type lookup statement used in post-filtering
102
+ // WHY: Avoid re-preparing the same statement on every search call
103
+ let contentTypeStmt = null;
104
+ function getContentTypeStmt(database) {
105
+ if (!contentTypeStmt) {
106
+ contentTypeStmt = database.prepare("SELECT content_type FROM articles WHERE url = ?");
34
107
  }
35
- const data = (await resp.json());
36
- return new Float32Array(data.embeddings[0]);
108
+ return contentTypeStmt;
37
109
  }
38
- export async function searchArticles(query, limit = 5) {
110
+ // --- Helper: Post-filter by content type ---
111
+ function filterByContentType(rows, contentType, database) {
112
+ if (!contentType)
113
+ return rows;
114
+ const stmt = getContentTypeStmt(database);
115
+ return rows.filter((row) => {
116
+ const article = stmt.get(row.article_url);
117
+ return article?.content_type === contentType;
118
+ });
119
+ }
120
+ // --- search_articles ---
121
+ export async function searchArticles(query, limit = 5, contentType) {
39
122
  const database = getDb();
40
123
  const queryVec = await embedQuery(query);
124
+ // WHAT: Over-fetch when filtering by content_type, then post-filter
125
+ // WHY: sqlite-vec applies k before joins, so we can't pre-filter on content_type
126
+ const fetchK = contentType ? limit * 3 : limit;
41
127
  const rows = database
42
128
  .prepare(`
43
129
  SELECT
@@ -52,8 +138,9 @@ export async function searchArticles(query, limit = 5) {
52
138
  AND k = ?
53
139
  ORDER BY distance
54
140
  `)
55
- .all(queryVec, limit);
56
- return rows.map((row) => ({
141
+ .all(queryVec, fetchK);
142
+ const filtered = filterByContentType(rows, contentType, database);
143
+ return filtered.slice(0, limit).map((row) => ({
57
144
  title: row.article_title,
58
145
  section: row.section_heading,
59
146
  content: row.content,
@@ -62,9 +149,13 @@ export async function searchArticles(query, limit = 5) {
62
149
  relevance_score: Math.round((1 - row.distance / 2) * 1000) / 1000,
63
150
  }));
64
151
  }
65
- export async function getAnswer(question) {
152
+ // --- get_answer ---
153
+ export async function getAnswer(question, contentType) {
66
154
  const database = getDb();
67
155
  const queryVec = await embedQuery(question);
156
+ // WHAT: Over-fetch when filtering by content_type
157
+ // WHY: Same sqlite-vec k-before-join constraint as searchArticles
158
+ const fetchK = contentType ? 30 : 10;
68
159
  // Get the best matching chunk, preferring FAQ entries
69
160
  const rows = database
70
161
  .prepare(`
@@ -78,27 +169,29 @@ export async function getAnswer(question) {
78
169
  FROM vec_chunks
79
170
  LEFT JOIN chunks ON chunks.id = vec_chunks.chunk_id
80
171
  WHERE embedding MATCH ?
81
- AND k = 10
172
+ AND k = ?
82
173
  ORDER BY distance
83
174
  `)
84
- .all(queryVec);
85
- if (rows.length === 0) {
175
+ .all(queryVec, fetchK);
176
+ const filteredRows = filterByContentType(rows, contentType, database);
177
+ if (filteredRows.length === 0) {
86
178
  return {
87
179
  answer: "No relevant information found in the ASTGL knowledge base.",
88
180
  source_title: "",
89
181
  source_url: "",
90
182
  related_articles: [],
183
+ confidence_score: 0,
91
184
  };
92
185
  }
93
186
  // Prefer FAQ entries if they're among the top results (within 20% of best distance)
94
- const bestDistance = rows[0].distance;
95
- const faqCandidate = rows.find((r) => r.chunk_type === "faq" && r.distance <= bestDistance * 1.2);
96
- const best = faqCandidate || rows[0];
187
+ const bestDistance = filteredRows[0].distance;
188
+ const faqCandidate = filteredRows.find((r) => r.chunk_type === "faq" && r.distance <= bestDistance * 1.2);
189
+ const best = faqCandidate || filteredRows[0];
97
190
  // Collect related articles (unique, excluding the source)
98
191
  const seen = new Set();
99
192
  seen.add(best.article_url);
100
193
  const related = [];
101
- for (const row of rows) {
194
+ for (const row of filteredRows) {
102
195
  if (!seen.has(row.article_url)) {
103
196
  seen.add(row.article_url);
104
197
  related.push({ title: row.article_title, url: row.article_url });
@@ -113,34 +206,176 @@ export async function getAnswer(question) {
113
206
  if (aMatch)
114
207
  answer = aMatch[1];
115
208
  }
209
+ // WHAT: Cosine distance 0-2 → confidence 0-1
210
+ // WHY: Gives consumers a normalized quality signal for the answer
211
+ const confidence = Math.round((1 - best.distance / 2) * 1000) / 1000;
116
212
  return {
117
213
  answer,
118
214
  source_title: best.article_title,
119
215
  source_url: best.article_url,
120
216
  related_articles: related,
217
+ confidence_score: confidence,
121
218
  };
122
219
  }
220
+ // --- get_tutorial ---
221
+ // WHAT: Find tutorial/guide content and return ordered steps from section headings
222
+ // WHY: AI assistants asking "how do I X" benefit from structured step-by-step answers
223
+ export async function getTutorial(query) {
224
+ const database = getDb();
225
+ const queryVec = await embedQuery(query);
226
+ const rows = database
227
+ .prepare(`
228
+ SELECT
229
+ chunks.article_title,
230
+ chunks.article_url,
231
+ chunks.section_heading,
232
+ chunks.content,
233
+ chunks.chunk_type,
234
+ chunks.article_order,
235
+ vec_chunks.distance
236
+ FROM vec_chunks
237
+ LEFT JOIN chunks ON chunks.id = vec_chunks.chunk_id
238
+ LEFT JOIN articles ON articles.url = chunks.article_url
239
+ WHERE embedding MATCH ?
240
+ AND k = 30
241
+ ORDER BY distance
242
+ `)
243
+ .all(queryVec);
244
+ if (rows.length === 0) {
245
+ return {
246
+ title: "",
247
+ url: "",
248
+ description: "No matching tutorials found.",
249
+ steps: [],
250
+ confidence_score: 0,
251
+ };
252
+ }
253
+ // WHAT: Pick the best-matching article and collect all its section chunks in order
254
+ // WHY: Tutorial steps come from the sequential sections of a single article
255
+ const bestUrl = rows[0].article_url;
256
+ const bestDistance = rows[0].distance;
257
+ const articleChunks = database
258
+ .prepare(`SELECT section_heading, content, chunk_type, article_order
259
+ FROM chunks
260
+ WHERE article_url = ? AND chunk_type = 'section'
261
+ ORDER BY article_order`)
262
+ .all(bestUrl);
263
+ const description = database
264
+ .prepare("SELECT description FROM articles WHERE url = ?")
265
+ .get(bestUrl);
266
+ const steps = articleChunks.map((c) => `**${c.section_heading}**\n${c.content}`);
267
+ return {
268
+ title: rows[0].article_title,
269
+ url: bestUrl,
270
+ description: description?.description || "",
271
+ steps,
272
+ confidence_score: Math.round((1 - bestDistance / 2) * 1000) / 1000,
273
+ };
274
+ }
275
+ // --- compare_topics ---
276
+ // WHAT: Side-by-side comparison of two topics using vector search
277
+ // WHY: "X vs Y" queries are common in AI-assisted research
278
+ export async function compareTopics(topicA, topicB) {
279
+ const database = getDb();
280
+ const [vecA, vecB] = await Promise.all([
281
+ embedQuery(topicA),
282
+ embedQuery(topicB),
283
+ ]);
284
+ // WHAT: Get best-matching chunks for each topic independently
285
+ // WHY: Each side of the comparison needs its own top-ranked content
286
+ const getTopChunks = (vec) => database
287
+ .prepare(`
288
+ SELECT
289
+ chunks.article_title,
290
+ chunks.article_url,
291
+ chunks.section_heading,
292
+ chunks.content,
293
+ vec_chunks.distance
294
+ FROM vec_chunks
295
+ LEFT JOIN chunks ON chunks.id = vec_chunks.chunk_id
296
+ WHERE embedding MATCH ?
297
+ AND k = 5
298
+ ORDER BY distance
299
+ `)
300
+ .all(vec);
301
+ const rowsA = getTopChunks(vecA);
302
+ const rowsB = getTopChunks(vecB);
303
+ const buildSide = (rows) => {
304
+ if (rows.length === 0) {
305
+ return { title: "No matching content", url: "", key_points: [] };
306
+ }
307
+ // WHAT: Collect key points from top chunks, preferring different sections
308
+ // WHY: Diversity of sections gives a better comparison than repeated content
309
+ const seen = new Set();
310
+ const points = [];
311
+ for (const row of rows) {
312
+ const key = `${row.article_url}:${row.section_heading}`;
313
+ if (seen.has(key))
314
+ continue;
315
+ seen.add(key);
316
+ const point = row.content.length > 300
317
+ ? row.content.slice(0, 300) + "..."
318
+ : row.content;
319
+ points.push(`**${row.section_heading}:** ${point}`);
320
+ if (points.length >= 3)
321
+ break;
322
+ }
323
+ return {
324
+ title: rows[0].article_title,
325
+ url: rows[0].article_url,
326
+ key_points: points,
327
+ };
328
+ };
329
+ const worstDistance = Math.max(rowsA[0]?.distance ?? 2, rowsB[0]?.distance ?? 2);
330
+ return {
331
+ topic_a: buildSide(rowsA),
332
+ topic_b: buildSide(rowsB),
333
+ confidence_score: Math.round((1 - worstDistance / 2) * 1000) / 1000,
334
+ };
335
+ }
336
+ // --- get_latest ---
337
+ // WHAT: Return most recently added articles
338
+ // WHY: "What's new" queries help users discover fresh content
339
+ export function getLatest(limit = 5) {
340
+ const database = getDb();
341
+ // WHAT: Order by processed_at (structuring pipeline) then rowid (ingest order)
342
+ // WHY: Original ingested articles lack processed_at; rowid is the fallback
343
+ const rows = database
344
+ .prepare(`SELECT title, description, url, content_type, processed_at
345
+ FROM articles
346
+ ORDER BY COALESCE(processed_at, '') DESC, rowid DESC
347
+ LIMIT ?`)
348
+ .all(limit);
349
+ return rows.map((r) => ({
350
+ title: r.title,
351
+ description: r.description,
352
+ url: r.url,
353
+ content_type: r.content_type || "article",
354
+ added_at: r.processed_at,
355
+ }));
356
+ }
357
+ // --- list_topics ---
358
+ // WHAT: List all topics with their content types and section headings
359
+ // WHY: Gives AI assistants an overview of available coverage
123
360
  export function listTopics() {
124
361
  const database = getDb();
125
- const articles = database
126
- .prepare("SELECT title, description, url FROM articles ORDER BY rowid")
127
- .all();
128
- // Extract topic keywords from section headings for each article
129
- const sectionsByUrl = new Map();
130
- const sections = database
131
- .prepare("SELECT DISTINCT article_url, section_heading FROM chunks WHERE chunk_type = 'section'")
362
+ // WHAT: Single query with GROUP_CONCAT instead of two queries + in-memory join
363
+ // WHY: Reduces to one DB round-trip and eliminates the Map construction overhead
364
+ const rows = database
365
+ .prepare(`SELECT a.title, a.description, a.url,
366
+ COALESCE(a.content_type, 'article') as content_type,
367
+ GROUP_CONCAT(DISTINCT c.section_heading) as sections
368
+ FROM articles a
369
+ LEFT JOIN chunks c ON c.article_url = a.url AND c.chunk_type = 'section'
370
+ GROUP BY a.url
371
+ ORDER BY a.rowid`)
132
372
  .all();
133
- for (const s of sections) {
134
- if (!sectionsByUrl.has(s.article_url)) {
135
- sectionsByUrl.set(s.article_url, []);
136
- }
137
- sectionsByUrl.get(s.article_url).push(s.section_heading);
138
- }
139
- return articles.map((a) => ({
140
- title: a.title,
141
- description: a.description,
142
- url: a.url,
143
- topics: sectionsByUrl.get(a.url) || [],
373
+ return rows.map((r) => ({
374
+ title: r.title,
375
+ description: r.description,
376
+ url: r.url,
377
+ content_type: r.content_type,
378
+ topics: r.sections ? r.sections.split(",") : [],
144
379
  }));
145
380
  }
146
381
  //# sourceMappingURL=search.js.map