mcp-astgl-knowledge 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +105 -0
- package/data/knowledge.db +0 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.js +84 -0
- package/dist/index.js.map +1 -0
- package/dist/ingest.d.ts +10 -0
- package/dist/ingest.js +269 -0
- package/dist/ingest.js.map +1 -0
- package/dist/search.d.ts +11 -0
- package/dist/search.js +146 -0
- package/dist/search.js.map +1 -0
- package/dist/types.d.ts +49 -0
- package/dist/types.js +3 -0
- package/dist/types.js.map +1 -0
- package/package.json +49 -0
package/README.md
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# mcp-astgl-knowledge
|
|
2
|
+
|
|
3
|
+
An MCP server that lets AI assistants search and cite articles from [As The Geek Learns](https://astgl.ai) — covering MCP servers, local AI, and AI automation.
|
|
4
|
+
|
|
5
|
+
When an AI assistant connects to this server, it gains access to authoritative answers about MCP, local LLMs, and AI workflows. Every response includes a source URL back to astgl.ai.
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
### Claude Desktop
|
|
10
|
+
|
|
11
|
+
Add to your `claude_desktop_config.json`:
|
|
12
|
+
|
|
13
|
+
```json
|
|
14
|
+
{
|
|
15
|
+
"mcpServers": {
|
|
16
|
+
"astgl-knowledge": {
|
|
17
|
+
"command": "npx",
|
|
18
|
+
"args": ["-y", "mcp-astgl-knowledge"]
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Claude Code
|
|
25
|
+
|
|
26
|
+
Add to your project's `.mcp.json`:
|
|
27
|
+
|
|
28
|
+
```json
|
|
29
|
+
{
|
|
30
|
+
"mcpServers": {
|
|
31
|
+
"astgl-knowledge": {
|
|
32
|
+
"command": "npx",
|
|
33
|
+
"args": ["-y", "mcp-astgl-knowledge"]
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Tools
|
|
40
|
+
|
|
41
|
+
### `search_articles`
|
|
42
|
+
|
|
43
|
+
Search ASTGL articles by query. Returns ranked results with relevance scores and source URLs.
|
|
44
|
+
|
|
45
|
+
```
|
|
46
|
+
Input: { query: "how to build an MCP server", limit: 5 }
|
|
47
|
+
Output: Ranked article sections with title, content, URL, and relevance score
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### `get_answer`
|
|
51
|
+
|
|
52
|
+
Get a direct answer to a specific question. Prefers FAQ entries for concise responses.
|
|
53
|
+
|
|
54
|
+
```
|
|
55
|
+
Input: { question: "What is an MCP server?" }
|
|
56
|
+
Output: Direct answer with source article URL and related articles
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### `list_topics`
|
|
60
|
+
|
|
61
|
+
List all topics covered in the knowledge base.
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
Input: {}
|
|
65
|
+
Output: All articles with titles, descriptions, URLs, and section headings
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## How It Works
|
|
69
|
+
|
|
70
|
+
The knowledge base is pre-built from ASTGL articles using semantic embeddings (nomic-embed-text, 768 dimensions). Articles are chunked by section and FAQ entry, embedded, and stored in a SQLite database with sqlite-vec for vector similarity search.
|
|
71
|
+
|
|
72
|
+
**End users don't need Ollama** — all embeddings are pre-computed and shipped inside the npm package. The only runtime requirement is Node.js.
|
|
73
|
+
|
|
74
|
+
## For Maintainers
|
|
75
|
+
|
|
76
|
+
To rebuild the knowledge database after adding or updating articles:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
git clone https://github.com/jamescruce/mcp-astgl-knowledge.git
|
|
80
|
+
cd mcp-astgl-knowledge
|
|
81
|
+
npm install
|
|
82
|
+
npm run ingest # Requires Ollama with nomic-embed-text
|
|
83
|
+
npm run build
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
Set `ASTGL_ARTICLES_DIR` to point to your articles directory if it's not at the default location.
|
|
87
|
+
|
|
88
|
+
## Coverage
|
|
89
|
+
|
|
90
|
+
Currently indexes 10 articles covering:
|
|
91
|
+
|
|
92
|
+
- What MCP servers are and how they work
|
|
93
|
+
- Building your first MCP server
|
|
94
|
+
- Connecting MCP servers to Claude and ChatGPT
|
|
95
|
+
- Best MCP servers available now
|
|
96
|
+
- MCP servers vs traditional APIs
|
|
97
|
+
- Running AI models locally
|
|
98
|
+
- Hardware requirements for local LLMs
|
|
99
|
+
- Cost comparison: local vs cloud AI
|
|
100
|
+
- Security of local AI
|
|
101
|
+
- Automating business workflows with AI
|
|
102
|
+
|
|
103
|
+
## License
|
|
104
|
+
|
|
105
|
+
MIT
|
|
Binary file
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* MCP server for ASTGL knowledge base.
|
|
4
|
+
* Exposes 3 tools: search_articles, get_answer, list_topics
|
|
5
|
+
*
|
|
6
|
+
* WHAT: Lets any MCP-compatible AI assistant search and cite ASTGL articles
|
|
7
|
+
* WHY: Drives traffic and citations back to astgl.ai when AI answers questions
|
|
8
|
+
*/
|
|
9
|
+
export {};
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* MCP server for ASTGL knowledge base.
|
|
4
|
+
* Exposes 3 tools: search_articles, get_answer, list_topics
|
|
5
|
+
*
|
|
6
|
+
* WHAT: Lets any MCP-compatible AI assistant search and cite ASTGL articles
|
|
7
|
+
* WHY: Drives traffic and citations back to astgl.ai when AI answers questions
|
|
8
|
+
*/
|
|
9
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
10
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
11
|
+
import { z } from "zod";
|
|
12
|
+
import { searchArticles, getAnswer, listTopics } from "./search.js";
|
|
13
|
+
const server = new McpServer({
|
|
14
|
+
name: "mcp-astgl-knowledge",
|
|
15
|
+
version: "1.0.0",
|
|
16
|
+
}, {
|
|
17
|
+
instructions: "This server provides authoritative knowledge about MCP servers, local AI, and AI automation from As The Geek Learns (astgl.ai). Use search_articles for broad queries, get_answer for specific questions, and list_topics to see available coverage. Always include the source URL when citing results.",
|
|
18
|
+
});
|
|
19
|
+
server.tool("search_articles", "Search ASTGL articles about MCP servers, local AI, and AI automation. Returns relevant article sections with source URLs for citation.", {
|
|
20
|
+
query: z.string().describe("Search query (e.g., 'how to build an MCP server')"),
|
|
21
|
+
limit: z
|
|
22
|
+
.number()
|
|
23
|
+
.min(1)
|
|
24
|
+
.max(20)
|
|
25
|
+
.default(5)
|
|
26
|
+
.describe("Maximum number of results to return (default: 5)"),
|
|
27
|
+
}, async ({ query, limit }) => {
|
|
28
|
+
const results = await searchArticles(query, limit);
|
|
29
|
+
if (results.length === 0) {
|
|
30
|
+
return {
|
|
31
|
+
content: [
|
|
32
|
+
{
|
|
33
|
+
type: "text",
|
|
34
|
+
text: "No matching articles found for that query.",
|
|
35
|
+
},
|
|
36
|
+
],
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
const formatted = results
|
|
40
|
+
.map((r, i) => `### ${i + 1}. ${r.title} — ${r.section}\n**Source:** ${r.url}\n**Relevance:** ${r.relevance_score}\n\n${r.content}`)
|
|
41
|
+
.join("\n\n---\n\n");
|
|
42
|
+
return {
|
|
43
|
+
content: [{ type: "text", text: formatted }],
|
|
44
|
+
};
|
|
45
|
+
});
|
|
46
|
+
server.tool("get_answer", "Get a direct answer to a question about MCP servers, local AI, or AI automation from ASTGL's knowledge base. Returns the best matching answer with source URL and related articles.", {
|
|
47
|
+
question: z
|
|
48
|
+
.string()
|
|
49
|
+
.describe("A specific question (e.g., 'What is an MCP server?')"),
|
|
50
|
+
}, async ({ question }) => {
|
|
51
|
+
const result = await getAnswer(question);
|
|
52
|
+
let text = `**Answer:** ${result.answer}\n\n`;
|
|
53
|
+
if (result.source_title) {
|
|
54
|
+
text += `**Source:** ${result.source_title}\n**URL:** ${result.source_url}\n`;
|
|
55
|
+
}
|
|
56
|
+
if (result.related_articles.length > 0) {
|
|
57
|
+
text += `\n**Related articles:**\n`;
|
|
58
|
+
for (const related of result.related_articles) {
|
|
59
|
+
text += `- [${related.title}](${related.url})\n`;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return {
|
|
63
|
+
content: [{ type: "text", text }],
|
|
64
|
+
};
|
|
65
|
+
});
|
|
66
|
+
server.tool("list_topics", "List all topics covered in the ASTGL knowledge base. Shows article titles, descriptions, URLs, and section headings.", {}, async () => {
|
|
67
|
+
const topics = listTopics();
|
|
68
|
+
const formatted = topics
|
|
69
|
+
.map((t) => `### ${t.title}\n${t.description}\n**URL:** ${t.url}\n**Sections:** ${t.topics.join(", ") || "N/A"}`)
|
|
70
|
+
.join("\n\n");
|
|
71
|
+
return {
|
|
72
|
+
content: [{ type: "text", text: formatted }],
|
|
73
|
+
};
|
|
74
|
+
});
|
|
75
|
+
async function main() {
|
|
76
|
+
const transport = new StdioServerTransport();
|
|
77
|
+
await server.connect(transport);
|
|
78
|
+
console.error("ASTGL Knowledge MCP Server running on stdio");
|
|
79
|
+
}
|
|
80
|
+
main().catch((err) => {
|
|
81
|
+
console.error("Fatal error:", err);
|
|
82
|
+
process.exit(1);
|
|
83
|
+
});
|
|
84
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":";AACA;;;;;;GAMG;AAEH,OAAO,EAAE,SAAS,EAAE,MAAM,yCAAyC,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,MAAM,2CAA2C,CAAC;AACjF,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAAE,cAAc,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEpE,MAAM,MAAM,GAAG,IAAI,SAAS,CAC1B;IACE,IAAI,EAAE,qBAAqB;IAC3B,OAAO,EAAE,OAAO;CACjB,EACD;IACE,YAAY,EACV,ySAAyS;CAC5S,CACF,CAAC;AAEF,MAAM,CAAC,IAAI,CACT,iBAAiB,EACjB,wIAAwI,EACxI;IACE,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mDAAmD,CAAC;IAC/E,KAAK,EAAE,CAAC;SACL,MAAM,EAAE;SACR,GAAG,CAAC,CAAC,CAAC;SACN,GAAG,CAAC,EAAE,CAAC;SACP,OAAO,CAAC,CAAC,CAAC;SACV,QAAQ,CAAC,kDAAkD,CAAC;CAChE,EACD,KAAK,EAAE,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;IACzB,MAAM,OAAO,GAAG,MAAM,cAAc,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IAEnD,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO;YACL,OAAO,EAAE;gBACP;oBACE,IAAI,EAAE,MAAe;oBACrB,IAAI,EAAE,4CAA4C;iBACnD;aACF;SACF,CAAC;IACJ,CAAC;IAED,MAAM,SAAS,GAAG,OAAO;SACtB,GAAG,CACF,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACP,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,OAAO,iBAAiB,CAAC,CAAC,GAAG,oBAAoB,CAAC,CAAC,eAAe,OAAO,CAAC,CAAC,OAAO,EAAE,CACvH;SACA,IAAI,CAAC,aAAa,CAAC,CAAC;IAEvB,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;KACtD,CAAC;AACJ,CAAC,CACF,CAAC;AAEF,MAAM,CAAC,IAAI,CACT,YAAY,EACZ,qLAAqL,EACrL;IACE,QAAQ,EAAE,CAAC;SACR,MAAM,EAAE;SACR,QAAQ,CAAC,sDAAsD,CAAC;CACpE,EACD,KAAK,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE;IACrB,MAAM,MAAM,GAAG,MAAM,SAAS,CAAC,QAAQ,CAAC,CAAC;IAEzC,IAAI,IAAI,GAAG,eAAe,MAAM,CAAC,MAAM,MAAM,CAAC;IAE9C,IAAI,MAAM,CAAC,YAAY,EAAE,CAAC;QACxB,IAAI,IAAI,eAAe,MAAM,CAAC,YAAY,cAAc,MAAM,CAAC,UAAU,IAAI,CAAC;IAChF,CAAC;IAED,IAAI,MAAM,CAAC,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvC,IAAI,IAAI,2BAA2B,CAAC;QACpC,KAAK,MAAM,OAAO,IAAI,MAAM,CAAC,gBAAgB,EAAE,CAAC;YAC9C,IAAI,IAAI,MAAM,OAAO,CAAC,KAAK,KAAK,OAAO,CAAC,GAAG,KAAK,CAAC;QACnD,CAAC;IACH,CAAC;IAED,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,CAAC;KAC3C,CAAC;AACJ,CAAC,CACF,CAAC;AAEF,MAAM,CAAC,IAAI,CACT,aAAa,EACb,sHAAsH,EACtH,EAAE,EACF,KAAK,IAAI,EAAE;IACT,MAAM,MAAM,GAAG,UAAU,EAAE,CAAC;IAE5B,MAAM,SAAS,GAAG,MAAM;SACrB,GAAG,CACF,CAAC,CAAC,EAAE,EAAE,CACJ,OAAO,CAAC,CAAC,KAAK,KAAK,CAAC,CAAC,WAAW,cAAc,CAAC,CAAC,GAAG,mBAAmB,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,KAAK,EAAE,CACvG;SACA,IAAI,CAAC,MAAM,CAAC,CAAC;IAEhB,OAAO;QACL,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAe,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;KACtD,CAAC;AACJ,CAAC,CACF,CAAC;AAEF,KAAK,UAAU,IAAI;IACjB,MAAM,SAAS,GAAG,IAAI,oBAAoB,EAAE,CAAC;IAC7C,MAAM,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;IAChC,OAAO,CAAC,KAAK,CAAC,6CAA6C,CAAC,CAAC;AAC/D,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,cAAc,EAAE,GAAG,CAAC,CAAC;IACnC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
package/dist/ingest.d.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
#!/usr/bin/env tsx
|
|
2
|
+
/**
|
|
3
|
+
* Build-time ingestion script.
|
|
4
|
+
* Reads ASTGL articles, chunks by h2 section + FAQ entries,
|
|
5
|
+
* embeds via Ollama (nomic-embed-text), and writes to SQLite + sqlite-vec.
|
|
6
|
+
*
|
|
7
|
+
* Usage: npm run ingest
|
|
8
|
+
* Requires: Ollama running with nomic-embed-text model
|
|
9
|
+
*/
|
|
10
|
+
export {};
|
package/dist/ingest.js
ADDED
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
#!/usr/bin/env tsx
|
|
2
|
+
/**
|
|
3
|
+
* Build-time ingestion script.
|
|
4
|
+
* Reads ASTGL articles, chunks by h2 section + FAQ entries,
|
|
5
|
+
* embeds via Ollama (nomic-embed-text), and writes to SQLite + sqlite-vec.
|
|
6
|
+
*
|
|
7
|
+
* Usage: npm run ingest
|
|
8
|
+
* Requires: Ollama running with nomic-embed-text model
|
|
9
|
+
*/
|
|
10
|
+
import { readFileSync, readdirSync, existsSync, mkdirSync } from "fs";
|
|
11
|
+
import { join, basename } from "path";
|
|
12
|
+
import Database from "better-sqlite3";
|
|
13
|
+
import * as sqliteVec from "sqlite-vec";
|
|
14
|
+
import { EMBEDDING_DIM, BASE_URL } from "./types.js";
|
|
15
|
+
const ARTICLES_DIR = process.env.ASTGL_ARTICLES_DIR ||
|
|
16
|
+
join(process.env.HOME, "Projects/astgl-site/src/content/answers");
|
|
17
|
+
const DB_PATH = join(import.meta.dirname, "..", "data", "knowledge.db");
|
|
18
|
+
const OLLAMA_URL = process.env.OLLAMA_URL || "http://localhost:11434";
|
|
19
|
+
const EMBED_MODEL = process.env.EMBED_MODEL || "nomic-embed-text";
|
|
20
|
+
// WHAT: Parse YAML-ish frontmatter without adding a dependency
|
|
21
|
+
// WHY: Our frontmatter is simple enough that we don't need gray-matter
|
|
22
|
+
function parseFrontmatter(raw) {
|
|
23
|
+
const fmMatch = raw.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/);
|
|
24
|
+
if (!fmMatch)
|
|
25
|
+
throw new Error("No frontmatter found");
|
|
26
|
+
const fmText = fmMatch[1];
|
|
27
|
+
const body = fmMatch[2];
|
|
28
|
+
// Parse simple key-value pairs
|
|
29
|
+
const fm = {};
|
|
30
|
+
const faqEntries = [];
|
|
31
|
+
let currentFaqEntry = null;
|
|
32
|
+
let inFaq = false;
|
|
33
|
+
let inNext = false;
|
|
34
|
+
const nextObj = {};
|
|
35
|
+
for (const line of fmText.split("\n")) {
|
|
36
|
+
if (line === "faq:") {
|
|
37
|
+
inFaq = true;
|
|
38
|
+
inNext = false;
|
|
39
|
+
continue;
|
|
40
|
+
}
|
|
41
|
+
if (line === "next:") {
|
|
42
|
+
inNext = true;
|
|
43
|
+
inFaq = false;
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
if (inFaq) {
|
|
47
|
+
const itemMatch = line.match(/^\s+-\s+question:\s+"(.+)"$/);
|
|
48
|
+
if (itemMatch) {
|
|
49
|
+
if (currentFaqEntry)
|
|
50
|
+
faqEntries.push(currentFaqEntry);
|
|
51
|
+
currentFaqEntry = { question: itemMatch[1] };
|
|
52
|
+
continue;
|
|
53
|
+
}
|
|
54
|
+
const answerMatch = line.match(/^\s+answer:\s+"(.+)"$/);
|
|
55
|
+
if (answerMatch && currentFaqEntry) {
|
|
56
|
+
currentFaqEntry.answer = answerMatch[1];
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
// Non-indented line means we've left the faq block
|
|
60
|
+
if (!line.startsWith(" ") && line.trim() !== "") {
|
|
61
|
+
if (currentFaqEntry)
|
|
62
|
+
faqEntries.push(currentFaqEntry);
|
|
63
|
+
currentFaqEntry = null;
|
|
64
|
+
inFaq = false;
|
|
65
|
+
}
|
|
66
|
+
else {
|
|
67
|
+
continue;
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
if (inNext) {
|
|
71
|
+
const nextMatch = line.match(/^\s+(\w+):\s+"?(.+?)"?$/);
|
|
72
|
+
if (nextMatch) {
|
|
73
|
+
nextObj[nextMatch[1]] = nextMatch[2];
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
if (!line.startsWith(" ") && line.trim() !== "") {
|
|
77
|
+
inNext = false;
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
continue;
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
const kvMatch = line.match(/^(\w+):\s+"?(.+?)"?$/);
|
|
84
|
+
if (kvMatch) {
|
|
85
|
+
const [, key, value] = kvMatch;
|
|
86
|
+
fm[key] = key === "order" ? parseInt(value, 10) : value;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
// Flush last FAQ entry
|
|
90
|
+
if (currentFaqEntry)
|
|
91
|
+
faqEntries.push(currentFaqEntry);
|
|
92
|
+
return {
|
|
93
|
+
frontmatter: {
|
|
94
|
+
...fm,
|
|
95
|
+
faq: faqEntries,
|
|
96
|
+
next: Object.keys(nextObj).length > 0 ? nextObj : undefined,
|
|
97
|
+
},
|
|
98
|
+
body,
|
|
99
|
+
};
|
|
100
|
+
}
|
|
101
|
+
// WHAT: Split article body into chunks by h2 headings
|
|
102
|
+
// WHY: h2 sections are self-contained (300-800 tokens), ideal for retrieval
|
|
103
|
+
function chunkArticle(frontmatter, body, slug) {
|
|
104
|
+
const chunks = [];
|
|
105
|
+
const articleUrl = `${BASE_URL}/${slug}`;
|
|
106
|
+
// Split by ## headings
|
|
107
|
+
const sections = body.split(/^## /m);
|
|
108
|
+
// First section is the intro (before any ## heading)
|
|
109
|
+
const intro = sections[0].trim();
|
|
110
|
+
if (intro) {
|
|
111
|
+
chunks.push({
|
|
112
|
+
articleTitle: frontmatter.title,
|
|
113
|
+
articleUrl,
|
|
114
|
+
articleOrder: frontmatter.order,
|
|
115
|
+
sectionHeading: "Introduction",
|
|
116
|
+
chunkType: "intro",
|
|
117
|
+
content: intro,
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
// Remaining sections
|
|
121
|
+
for (let i = 1; i < sections.length; i++) {
|
|
122
|
+
const section = sections[i];
|
|
123
|
+
const newlineIdx = section.indexOf("\n");
|
|
124
|
+
const heading = newlineIdx !== -1 ? section.slice(0, newlineIdx).trim() : section.trim();
|
|
125
|
+
const content = newlineIdx !== -1 ? section.slice(newlineIdx + 1).trim() : "";
|
|
126
|
+
if (content) {
|
|
127
|
+
chunks.push({
|
|
128
|
+
articleTitle: frontmatter.title,
|
|
129
|
+
articleUrl,
|
|
130
|
+
articleOrder: frontmatter.order,
|
|
131
|
+
sectionHeading: heading,
|
|
132
|
+
chunkType: "section",
|
|
133
|
+
content: `## ${heading}\n\n${content}`,
|
|
134
|
+
});
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
// FAQ entries as separate chunks
|
|
138
|
+
for (const faq of frontmatter.faq) {
|
|
139
|
+
chunks.push({
|
|
140
|
+
articleTitle: frontmatter.title,
|
|
141
|
+
articleUrl,
|
|
142
|
+
articleOrder: frontmatter.order,
|
|
143
|
+
sectionHeading: faq.question,
|
|
144
|
+
chunkType: "faq",
|
|
145
|
+
content: `Q: ${faq.question}\nA: ${faq.answer}`,
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
return chunks;
|
|
149
|
+
}
|
|
150
|
+
// WHAT: Embed text via Ollama API
|
|
151
|
+
// WHY: nomic-embed-text gives 768-dim vectors, already running locally
|
|
152
|
+
async function embed(texts) {
|
|
153
|
+
const resp = await fetch(`${OLLAMA_URL}/api/embed`, {
|
|
154
|
+
method: "POST",
|
|
155
|
+
headers: { "Content-Type": "application/json" },
|
|
156
|
+
body: JSON.stringify({ model: EMBED_MODEL, input: texts }),
|
|
157
|
+
});
|
|
158
|
+
if (!resp.ok) {
|
|
159
|
+
throw new Error(`Ollama embed failed: ${resp.status} ${await resp.text()}`);
|
|
160
|
+
}
|
|
161
|
+
const data = (await resp.json());
|
|
162
|
+
return data.embeddings;
|
|
163
|
+
}
|
|
164
|
+
async function main() {
|
|
165
|
+
console.error(`Reading articles from: ${ARTICLES_DIR}`);
|
|
166
|
+
if (!existsSync(ARTICLES_DIR)) {
|
|
167
|
+
throw new Error(`Articles directory not found: ${ARTICLES_DIR}`);
|
|
168
|
+
}
|
|
169
|
+
// Read all article files
|
|
170
|
+
const files = readdirSync(ARTICLES_DIR).filter((f) => f.endsWith(".md"));
|
|
171
|
+
console.error(`Found ${files.length} articles`);
|
|
172
|
+
// Parse and chunk all articles
|
|
173
|
+
const allChunks = [];
|
|
174
|
+
const articles = [];
|
|
175
|
+
for (const file of files) {
|
|
176
|
+
const raw = readFileSync(join(ARTICLES_DIR, file), "utf-8");
|
|
177
|
+
const slug = basename(file, ".md");
|
|
178
|
+
const { frontmatter, body } = parseFrontmatter(raw);
|
|
179
|
+
const chunks = chunkArticle(frontmatter, body, slug);
|
|
180
|
+
allChunks.push(...chunks);
|
|
181
|
+
articles.push({
|
|
182
|
+
title: frontmatter.title,
|
|
183
|
+
description: frontmatter.description,
|
|
184
|
+
url: `${BASE_URL}/${slug}`,
|
|
185
|
+
slug,
|
|
186
|
+
});
|
|
187
|
+
console.error(` ${file}: ${chunks.length} chunks`);
|
|
188
|
+
}
|
|
189
|
+
console.error(`\nTotal chunks: ${allChunks.length}`);
|
|
190
|
+
console.error(`Embedding via ${EMBED_MODEL}...`);
|
|
191
|
+
// Embed in batches of 20
|
|
192
|
+
const BATCH_SIZE = 20;
|
|
193
|
+
const allEmbeddings = [];
|
|
194
|
+
for (let i = 0; i < allChunks.length; i += BATCH_SIZE) {
|
|
195
|
+
const batch = allChunks.slice(i, i + BATCH_SIZE);
|
|
196
|
+
const embeddings = await embed(batch.map((c) => c.content));
|
|
197
|
+
allEmbeddings.push(...embeddings);
|
|
198
|
+
console.error(` Embedded ${Math.min(i + BATCH_SIZE, allChunks.length)}/${allChunks.length}`);
|
|
199
|
+
}
|
|
200
|
+
// Initialize SQLite database
|
|
201
|
+
const dataDir = join(import.meta.dirname, "..", "data");
|
|
202
|
+
if (!existsSync(dataDir))
|
|
203
|
+
mkdirSync(dataDir, { recursive: true });
|
|
204
|
+
const db = new Database(DB_PATH);
|
|
205
|
+
sqliteVec.load(db);
|
|
206
|
+
console.error(`\nWriting to: ${DB_PATH}`);
|
|
207
|
+
// Create tables
|
|
208
|
+
db.exec("DROP TABLE IF EXISTS chunks");
|
|
209
|
+
db.exec("DROP TABLE IF EXISTS vec_chunks");
|
|
210
|
+
db.exec("DROP TABLE IF EXISTS articles");
|
|
211
|
+
db.exec(`
|
|
212
|
+
CREATE TABLE articles (
|
|
213
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
214
|
+
title TEXT NOT NULL,
|
|
215
|
+
description TEXT NOT NULL,
|
|
216
|
+
url TEXT NOT NULL,
|
|
217
|
+
slug TEXT NOT NULL UNIQUE
|
|
218
|
+
)
|
|
219
|
+
`);
|
|
220
|
+
db.exec(`
|
|
221
|
+
CREATE TABLE chunks (
|
|
222
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
223
|
+
article_title TEXT NOT NULL,
|
|
224
|
+
article_url TEXT NOT NULL,
|
|
225
|
+
article_order INTEGER NOT NULL,
|
|
226
|
+
section_heading TEXT NOT NULL,
|
|
227
|
+
chunk_type TEXT NOT NULL,
|
|
228
|
+
content TEXT NOT NULL
|
|
229
|
+
)
|
|
230
|
+
`);
|
|
231
|
+
db.exec(`
|
|
232
|
+
CREATE VIRTUAL TABLE vec_chunks USING vec0(
|
|
233
|
+
chunk_id INTEGER PRIMARY KEY,
|
|
234
|
+
embedding float[${EMBEDDING_DIM}] distance_metric=cosine
|
|
235
|
+
)
|
|
236
|
+
`);
|
|
237
|
+
// Insert articles
|
|
238
|
+
const insertArticle = db.prepare("INSERT INTO articles (title, description, url, slug) VALUES (?, ?, ?, ?)");
|
|
239
|
+
for (const article of articles) {
|
|
240
|
+
insertArticle.run(article.title, article.description, article.url, article.slug);
|
|
241
|
+
}
|
|
242
|
+
// Insert chunks and vectors
|
|
243
|
+
const insertChunk = db.prepare(`INSERT INTO chunks (article_title, article_url, article_order, section_heading, chunk_type, content)
|
|
244
|
+
VALUES (?, ?, ?, ?, ?, ?)`);
|
|
245
|
+
const insertVec = db.prepare("INSERT INTO vec_chunks (chunk_id, embedding) VALUES (?, ?)");
|
|
246
|
+
const insertAll = db.transaction(() => {
|
|
247
|
+
for (let i = 0; i < allChunks.length; i++) {
|
|
248
|
+
const chunk = allChunks[i];
|
|
249
|
+
const result = insertChunk.run(chunk.articleTitle, chunk.articleUrl, chunk.articleOrder, chunk.sectionHeading, chunk.chunkType, chunk.content);
|
|
250
|
+
const chunkId = result.lastInsertRowid;
|
|
251
|
+
insertVec.run(BigInt(chunkId), new Float32Array(allEmbeddings[i]));
|
|
252
|
+
}
|
|
253
|
+
});
|
|
254
|
+
insertAll();
|
|
255
|
+
// Verify
|
|
256
|
+
const chunkCount = db.prepare("SELECT COUNT(*) as count FROM chunks").get().count;
|
|
257
|
+
const vecCount = db.prepare("SELECT COUNT(*) as count FROM vec_chunks").get().count;
|
|
258
|
+
const articleCount = db.prepare("SELECT COUNT(*) as count FROM articles").get().count;
|
|
259
|
+
console.error(`\nDone!`);
|
|
260
|
+
console.error(` Articles: ${articleCount}`);
|
|
261
|
+
console.error(` Chunks: ${chunkCount}`);
|
|
262
|
+
console.error(` Vectors: ${vecCount}`);
|
|
263
|
+
db.close();
|
|
264
|
+
}
|
|
265
|
+
main().catch((err) => {
|
|
266
|
+
console.error("Ingestion failed:", err);
|
|
267
|
+
process.exit(1);
|
|
268
|
+
});
|
|
269
|
+
//# sourceMappingURL=ingest.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ingest.js","sourceRoot":"","sources":["../src/ingest.ts"],"names":[],"mappings":";AACA;;;;;;;GAOG;AAEH,OAAO,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,SAAS,EAAE,MAAM,IAAI,CAAC;AACtE,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,MAAM,MAAM,CAAC;AACtC,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AACtC,OAAO,KAAK,SAAS,MAAM,YAAY,CAAC;AACxC,OAAO,EAA6B,aAAa,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAEhF,MAAM,YAAY,GAChB,OAAO,CAAC,GAAG,CAAC,kBAAkB;IAC9B,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,IAAK,EAAE,yCAAyC,CAAC,CAAC;AAErE,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,cAAc,CAAC,CAAC;AAExE,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,wBAAwB,CAAC;AACtE,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,kBAAkB,CAAC;AAElE,+DAA+D;AAC/D,uEAAuE;AACvE,SAAS,gBAAgB,CAAC,GAAW;IAInC,MAAM,OAAO,GAAG,GAAG,CAAC,KAAK,CAAC,mCAAmC,CAAC,CAAC;IAC/D,IAAI,CAAC,OAAO;QAAE,MAAM,IAAI,KAAK,CAAC,sBAAsB,CAAC,CAAC;IAEtD,MAAM,MAAM,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IAC1B,MAAM,IAAI,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;IAExB,+BAA+B;IAC/B,MAAM,EAAE,GAA4B,EAAE,CAAC;IACvC,MAAM,UAAU,GAAgD,EAAE,CAAC;IACnE,IAAI,eAAe,GAAkC,IAAI,CAAC;IAC1D,IAAI,KAAK,GAAG,KAAK,CAAC;IAClB,IAAI,MAAM,GAAG,KAAK,CAAC;IACnB,MAAM,OAAO,GAA2B,EAAE,CAAC;IAE3C,KAAK,MAAM,IAAI,IAAI,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACtC,IAAI,IAAI,KAAK,MAAM,EAAE,CAAC;YACpB,KAAK,GAAG,IAAI,CAAC;YACb,MAAM,GAAG,KAAK,CAAC;YACf,SAAS;QACX,CAAC;QACD,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;YACrB,MAAM,GAAG,IAAI,CAAC;YACd,KAAK,GAAG,KAAK,CAAC;YACd,SAAS;QACX,CAAC;QAED,IAAI,KAAK,EAAE,CAAC;YACV,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,6BAA6B,CAAC,CAAC;YAC5D,IAAI,SAAS,EAAE,CAAC;gBACd,IAAI,eAAe;oBAAE,UAAU,CAAC,IAAI,CAAC,eAAuD,CAAC,CAAC;gBAC9F,eAAe,GAAG,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;gBAC7C,SAAS;YACX,CAAC;YACD,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;YACxD,IAAI,WAAW,IAAI,eAAe,EAAE,CAAC;gBACnC,eAAe,CAAC,MAAM,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;gBACxC,SAAS;YACX,CAAC;YACD,mDAAmD;YACnD,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACjD,IAAI,eAAe;oBAAE,UAAU,CAAC,IAAI,CAAC,eAAuD,CAAC,CAAC;gBAC9F,eAAe,GAAG,IAAI,CAAC;gBACvB,KAAK,GAAG,KAAK,CAAC;YAChB,CAAC;iBAAM,CAAC;gBACN,SAAS;YACX,CAAC;QACH,CAAC;QAED,IAAI,MAAM,EAAE,CAAC;YACX,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,yBAAyB,CAAC,CAAC;YACxD,IAAI,SAAS,EAAE,CAAC;gBACd,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;gBACrC,SAAS;YACX,CAAC;YACD,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC;gBACjD,MAAM,GAAG,KAAK,CAAC;YACjB,CAAC;iBAAM,CAAC;gBACN,SAAS;YACX,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,sBAAsB,CAAC,CAAC;QACnD,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,CAAC,EAAE,GAAG,EAAE,KAAK,CAAC,GAAG,OAAO,CAAC;YAC/B,EAAE,CAAC,GAAG,CAAC,GAAG,GAAG,KAAK,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;QAC1D,CAAC;IACH,CAAC;IAED,uBAAuB;IACvB,IAAI,eAAe;QAAE,UAAU,CAAC,IAAI,CAAC,eAAuD,CAAC,CAAC;IAE9F,OAAO;QACL,WAAW,EAAE;YACX,GAAG,EAAE;YACL,GAAG,EAAE,UAAU;YACf,IAAI,EAAE,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAE,OAA2C,CAAC,CAAC,CAAC,SAAS;SAC3E;QACvB,IAAI;KACL,CAAC;AACJ,CAAC;AAED,sDAAsD;AACtD,4EAA4E;AAC5E,SAAS,YAAY,CACnB,WAA+B,EAC/B,IAAY,EACZ,IAAY;IAEZ,MAAM,MAAM,GAAY,EAAE,CAAC;IAC3B,MAAM,UAAU,GAAG,GAAG,QAAQ,IAAI,IAAI,EAAE,CAAC;IAEzC,uBAAuB;IACvB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAErC,qDAAqD;IACrD,MAAM,KAAK,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;IACjC,IAAI,KAAK,EAAE,CAAC;QACV,MAAM,CAAC,IAAI,CAAC;YACV,YAAY,EAAE,WAAW,CAAC,KAAK;YAC/B,UAAU;YACV,YAAY,EAAE,WAAW,CAAC,KAAK;YAC/B,cAAc,EAAE,cAAc;YAC9B,SAAS,EAAE,OAAO;YAClB,OAAO,EAAE,KAAK;SACf,CAAC,CAAC;IACL,CAAC;IAED,qBAAqB;IACrB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACzC,MAAM,OAAO,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,UAAU,GAAG,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;QACzC,MAAM,OAAO,GAAG,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QACzF,MAAM,OAAO,GAAG,UAAU,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAE9E,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,CAAC,IAAI,CAAC;gBACV,YAAY,EAAE,WAAW,CAAC,KAAK;gBAC/B,UAAU;gBACV,YAAY,EAAE,WAAW,CAAC,KAAK;gBAC/B,cAAc,EAAE,OAAO;gBACvB,SAAS,EAAE,SAAS;gBACpB,OAAO,EAAE,MAAM,OAAO,OAAO,OAAO,EAAE;aACvC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,iCAAiC;IACjC,KAAK,MAAM,GAAG,IAAI,WAAW,CAAC,GAAG,EAAE,CAAC;QAClC,MAAM,CAAC,IAAI,CAAC;YACV,YAAY,EAAE,WAAW,CAAC,KAAK;YAC/B,UAAU;YACV,YAAY,EAAE,WAAW,CAAC,KAAK;YAC/B,cAAc,EAAE,GAAG,CAAC,QAAQ;YAC5B,SAAS,EAAE,KAAK;YAChB,OAAO,EAAE,MAAM,GAAG,CAAC,QAAQ,QAAQ,GAAG,CAAC,MAAM,EAAE;SAChD,CAAC,CAAC;IACL,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,kCAAkC;AAClC,uEAAuE;AACvE,KAAK,UAAU,KAAK,CAAC,KAAe;IAClC,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,GAAG,UAAU,YAAY,EAAE;QAClD,MAAM,EAAE,MAAM;QACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;QAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,WAAW,EAAE,KAAK,EAAE,KAAK,EAAE,CAAC;KAC3D,CAAC,CAAC;IAEH,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,CAAC,MAAM,IAAI,MAAM,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC,IAAI,EAAE,CAA+B,CAAC;IAC/D,OAAO,IAAI,CAAC,UAAU,CAAC;AACzB,CAAC;AAED,KAAK,UAAU,IAAI;IACjB,OAAO,CAAC,KAAK,CAAC,0BAA0B,YAAY,EAAE,CAAC,CAAC;IAExD,IAAI,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CAAC,iCAAiC,YAAY,EAAE,CAAC,CAAC;IACnE,CAAC;IAED,yBAAyB;IACzB,MAAM,KAAK,GAAG,WAAW,CAAC,YAAY,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IACzE,OAAO,CAAC,KAAK,CAAC,SAAS,KAAK,CAAC,MAAM,WAAW,CAAC,CAAC;IAEhD,+BAA+B;IAC/B,MAAM,SAAS,GAAY,EAAE,CAAC;IAC9B,MAAM,QAAQ,GAA6E,EAAE,CAAC;IAE9F,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,YAAY,CAAC,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,EAAE,OAAO,CAAC,CAAC;QAC5D,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QACnC,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,GAAG,gBAAgB,CAAC,GAAG,CAAC,CAAC;QACpD,MAAM,MAAM,GAAG,YAAY,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,CAAC,CAAC;QACrD,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,CAAC;QAC1B,QAAQ,CAAC,IAAI,CAAC;YACZ,KAAK,EAAE,WAAW,CAAC,KAAK;YACxB,WAAW,EAAE,WAAW,CAAC,WAAW;YACpC,GAAG,EAAE,GAAG,QAAQ,IAAI,IAAI,EAAE;YAC1B,IAAI;SACL,CAAC,CAAC;QACH,OAAO,CAAC,KAAK,CAAC,KAAK,IAAI,KAAK,MAAM,CAAC,MAAM,SAAS,CAAC,CAAC;IACtD,CAAC;IAED,OAAO,CAAC,KAAK,CAAC,mBAAmB,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;IACrD,OAAO,CAAC,KAAK,CAAC,iBAAiB,WAAW,KAAK,CAAC,CAAC;IAEjD,yBAAyB;IACzB,MAAM,UAAU,GAAG,EAAE,CAAC;IACtB,MAAM,aAAa,GAAe,EAAE,CAAC;IACrC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC;QACtD,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;QACjD,MAAM,UAAU,GAAG,MAAM,KAAK,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC;QAC5D,aAAa,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;QAClC,OAAO,CAAC,KAAK,CAAC,cAAc,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,UAAU,EAAE,SAAS,CAAC,MAAM,CAAC,IAAI,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;IAChG,CAAC;IAED,6BAA6B;IAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,MAAM,CAAC,CAAC;IACxD,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC;QAAE,SAAS,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAElE,MAAM,EAAE,GAAG,IAAI,QAAQ,CAAC,OAAO,CAAC,CAAC;IACjC,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEnB,OAAO,CAAC,KAAK,CAAC,iBAAiB,OAAO,EAAE,CAAC,CAAC;IAE1C,gBAAgB;IAChB,EAAE,CAAC,IAAI,CAAC,6BAA6B,CAAC,CAAC;IACvC,EAAE,CAAC,IAAI,CAAC,iCAAiC,CAAC,CAAC;IAC3C,EAAE,CAAC,IAAI,CAAC,+BAA+B,CAAC,CAAC;IAEzC,EAAE,CAAC,IAAI,CAAC;;;;;;;;GAQP,CAAC,CAAC;IAEH,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;GAUP,CAAC,CAAC;IAEH,EAAE,CAAC,IAAI,CAAC;;;wBAGc,aAAa;;GAElC,CAAC,CAAC;IAEH,kBAAkB;IAClB,MAAM,aAAa,GAAG,EAAE,CAAC,OAAO,CAC9B,0EAA0E,CAC3E,CAAC;IACF,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,aAAa,CAAC,GAAG,CAAC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,WAAW,EAAE,OAAO,CAAC,GAAG,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC;IACnF,CAAC;IAED,4BAA4B;IAC5B,MAAM,WAAW,GAAG,EAAE,CAAC,OAAO,CAC5B;+BAC2B,CAC5B,CAAC;IACF,MAAM,SAAS,GAAG,EAAE,CAAC,OAAO,CAC1B,4DAA4D,CAC7D,CAAC;IAEF,MAAM,SAAS,GAAG,EAAE,CAAC,WAAW,CAAC,GAAG,EAAE;QACpC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YAC3B,MAAM,MAAM,GAAG,WAAW,CAAC,GAAG,CAC5B,KAAK,CAAC,YAAY,EAClB,KAAK,CAAC,UAAU,EAChB,KAAK,CAAC,YAAY,EAClB,KAAK,CAAC,cAAc,EACpB,KAAK,CAAC,SAAS,EACf,KAAK,CAAC,OAAO,CACd,CAAC;YACF,MAAM,OAAO,GAAG,MAAM,CAAC,eAAe,CAAC;YACvC,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,OAAiB,CAAC,EAAE,IAAI,YAAY,CAAC,aAAa,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC/E,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,SAAS,EAAE,CAAC;IAEZ,SAAS;IACT,MAAM,UAAU,GAAI,EAAE,CAAC,OAAO,CAAC,sCAAsC,CAAC,CAAC,GAAG,EAAwB,CAAC,KAAK,CAAC;IACzG,MAAM,QAAQ,GAAI,EAAE,CAAC,OAAO,CAAC,0CAA0C,CAAC,CAAC,GAAG,EAAwB,CAAC,KAAK,CAAC;IAC3G,MAAM,YAAY,GAAI,EAAE,CAAC,OAAO,CAAC,wCAAwC,CAAC,CAAC,GAAG,EAAwB,CAAC,KAAK,CAAC;IAE7G,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;IACzB,OAAO,CAAC,KAAK,CAAC,eAAe,YAAY,EAAE,CAAC,CAAC;IAC7C,OAAO,CAAC,KAAK,CAAC,aAAa,UAAU,EAAE,CAAC,CAAC;IACzC,OAAO,CAAC,KAAK,CAAC,cAAc,QAAQ,EAAE,CAAC,CAAC;IAExC,EAAE,CAAC,KAAK,EAAE,CAAC;AACb,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,EAAE,EAAE;IACnB,OAAO,CAAC,KAAK,CAAC,mBAAmB,EAAE,GAAG,CAAC,CAAC;IACxC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;AAClB,CAAC,CAAC,CAAC"}
|
package/dist/search.d.ts
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector search logic against the pre-built knowledge.db.
|
|
3
|
+
* At runtime, queries are embedded via Ollama and matched against stored vectors.
|
|
4
|
+
*
|
|
5
|
+
* WHAT: Provides search_articles, get_answer, and list_topics functions
|
|
6
|
+
* WHY: Separating search logic from MCP server keeps concerns clean
|
|
7
|
+
*/
|
|
8
|
+
import { SearchResult, AnswerResult, TopicEntry } from "./types.js";
|
|
9
|
+
export declare function searchArticles(query: string, limit?: number): Promise<SearchResult[]>;
|
|
10
|
+
export declare function getAnswer(question: string): Promise<AnswerResult>;
|
|
11
|
+
export declare function listTopics(): TopicEntry[];
|
package/dist/search.js
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector search logic against the pre-built knowledge.db.
|
|
3
|
+
* At runtime, queries are embedded via Ollama and matched against stored vectors.
|
|
4
|
+
*
|
|
5
|
+
* WHAT: Provides search_articles, get_answer, and list_topics functions
|
|
6
|
+
* WHY: Separating search logic from MCP server keeps concerns clean
|
|
7
|
+
*/
|
|
8
|
+
import { join } from "path";
|
|
9
|
+
import { existsSync } from "fs";
|
|
10
|
+
import Database from "better-sqlite3";
|
|
11
|
+
import * as sqliteVec from "sqlite-vec";
|
|
12
|
+
const DB_PATH = join(import.meta.dirname, "..", "data", "knowledge.db");
|
|
13
|
+
const OLLAMA_URL = process.env.OLLAMA_URL || "http://localhost:11434";
|
|
14
|
+
const EMBED_MODEL = process.env.EMBED_MODEL || "nomic-embed-text";
|
|
15
|
+
let db = null;
|
|
16
|
+
function getDb() {
|
|
17
|
+
if (!db) {
|
|
18
|
+
if (!existsSync(DB_PATH)) {
|
|
19
|
+
throw new Error(`Knowledge database not found at ${DB_PATH}. Run 'npm run ingest' first.`);
|
|
20
|
+
}
|
|
21
|
+
db = new Database(DB_PATH, { readonly: true });
|
|
22
|
+
sqliteVec.load(db);
|
|
23
|
+
}
|
|
24
|
+
return db;
|
|
25
|
+
}
|
|
26
|
+
async function embedQuery(text) {
|
|
27
|
+
const resp = await fetch(`${OLLAMA_URL}/api/embed`, {
|
|
28
|
+
method: "POST",
|
|
29
|
+
headers: { "Content-Type": "application/json" },
|
|
30
|
+
body: JSON.stringify({ model: EMBED_MODEL, input: text }),
|
|
31
|
+
});
|
|
32
|
+
if (!resp.ok) {
|
|
33
|
+
throw new Error(`Ollama embed failed: ${resp.status} ${await resp.text()}`);
|
|
34
|
+
}
|
|
35
|
+
const data = (await resp.json());
|
|
36
|
+
return new Float32Array(data.embeddings[0]);
|
|
37
|
+
}
|
|
38
|
+
export async function searchArticles(query, limit = 5) {
|
|
39
|
+
const database = getDb();
|
|
40
|
+
const queryVec = await embedQuery(query);
|
|
41
|
+
const rows = database
|
|
42
|
+
.prepare(`
|
|
43
|
+
SELECT
|
|
44
|
+
chunks.article_title,
|
|
45
|
+
chunks.section_heading,
|
|
46
|
+
chunks.content,
|
|
47
|
+
chunks.article_url,
|
|
48
|
+
vec_chunks.distance
|
|
49
|
+
FROM vec_chunks
|
|
50
|
+
LEFT JOIN chunks ON chunks.id = vec_chunks.chunk_id
|
|
51
|
+
WHERE embedding MATCH ?
|
|
52
|
+
AND k = ?
|
|
53
|
+
ORDER BY distance
|
|
54
|
+
`)
|
|
55
|
+
.all(queryVec, limit);
|
|
56
|
+
return rows.map((row) => ({
|
|
57
|
+
title: row.article_title,
|
|
58
|
+
section: row.section_heading,
|
|
59
|
+
content: row.content,
|
|
60
|
+
url: row.article_url,
|
|
61
|
+
// cosine distance ranges 0-2; convert to 0-1 relevance score
|
|
62
|
+
relevance_score: Math.round((1 - row.distance / 2) * 1000) / 1000,
|
|
63
|
+
}));
|
|
64
|
+
}
|
|
65
|
+
export async function getAnswer(question) {
|
|
66
|
+
const database = getDb();
|
|
67
|
+
const queryVec = await embedQuery(question);
|
|
68
|
+
// Get the best matching chunk, preferring FAQ entries
|
|
69
|
+
const rows = database
|
|
70
|
+
.prepare(`
|
|
71
|
+
SELECT
|
|
72
|
+
chunks.article_title,
|
|
73
|
+
chunks.section_heading,
|
|
74
|
+
chunks.content,
|
|
75
|
+
chunks.article_url,
|
|
76
|
+
chunks.chunk_type,
|
|
77
|
+
vec_chunks.distance
|
|
78
|
+
FROM vec_chunks
|
|
79
|
+
LEFT JOIN chunks ON chunks.id = vec_chunks.chunk_id
|
|
80
|
+
WHERE embedding MATCH ?
|
|
81
|
+
AND k = 10
|
|
82
|
+
ORDER BY distance
|
|
83
|
+
`)
|
|
84
|
+
.all(queryVec);
|
|
85
|
+
if (rows.length === 0) {
|
|
86
|
+
return {
|
|
87
|
+
answer: "No relevant information found in the ASTGL knowledge base.",
|
|
88
|
+
source_title: "",
|
|
89
|
+
source_url: "",
|
|
90
|
+
related_articles: [],
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
// Prefer FAQ entries if they're among the top results (within 20% of best distance)
|
|
94
|
+
const bestDistance = rows[0].distance;
|
|
95
|
+
const faqCandidate = rows.find((r) => r.chunk_type === "faq" && r.distance <= bestDistance * 1.2);
|
|
96
|
+
const best = faqCandidate || rows[0];
|
|
97
|
+
// Collect related articles (unique, excluding the source)
|
|
98
|
+
const seen = new Set();
|
|
99
|
+
seen.add(best.article_url);
|
|
100
|
+
const related = [];
|
|
101
|
+
for (const row of rows) {
|
|
102
|
+
if (!seen.has(row.article_url)) {
|
|
103
|
+
seen.add(row.article_url);
|
|
104
|
+
related.push({ title: row.article_title, url: row.article_url });
|
|
105
|
+
}
|
|
106
|
+
if (related.length >= 3)
|
|
107
|
+
break;
|
|
108
|
+
}
|
|
109
|
+
// Extract answer text — for FAQ chunks, just use the answer portion
|
|
110
|
+
let answer = best.content;
|
|
111
|
+
if (best.chunk_type === "faq") {
|
|
112
|
+
const aMatch = answer.match(/^A: (.+)$/m);
|
|
113
|
+
if (aMatch)
|
|
114
|
+
answer = aMatch[1];
|
|
115
|
+
}
|
|
116
|
+
return {
|
|
117
|
+
answer,
|
|
118
|
+
source_title: best.article_title,
|
|
119
|
+
source_url: best.article_url,
|
|
120
|
+
related_articles: related,
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
export function listTopics() {
|
|
124
|
+
const database = getDb();
|
|
125
|
+
const articles = database
|
|
126
|
+
.prepare("SELECT title, description, url FROM articles ORDER BY rowid")
|
|
127
|
+
.all();
|
|
128
|
+
// Extract topic keywords from section headings for each article
|
|
129
|
+
const sectionsByUrl = new Map();
|
|
130
|
+
const sections = database
|
|
131
|
+
.prepare("SELECT DISTINCT article_url, section_heading FROM chunks WHERE chunk_type = 'section'")
|
|
132
|
+
.all();
|
|
133
|
+
for (const s of sections) {
|
|
134
|
+
if (!sectionsByUrl.has(s.article_url)) {
|
|
135
|
+
sectionsByUrl.set(s.article_url, []);
|
|
136
|
+
}
|
|
137
|
+
sectionsByUrl.get(s.article_url).push(s.section_heading);
|
|
138
|
+
}
|
|
139
|
+
return articles.map((a) => ({
|
|
140
|
+
title: a.title,
|
|
141
|
+
description: a.description,
|
|
142
|
+
url: a.url,
|
|
143
|
+
topics: sectionsByUrl.get(a.url) || [],
|
|
144
|
+
}));
|
|
145
|
+
}
|
|
146
|
+
//# sourceMappingURL=search.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"search.js","sourceRoot":"","sources":["../src/search.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAChC,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AACtC,OAAO,KAAK,SAAS,MAAM,YAAY,CAAC;AAGxC,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,cAAc,CAAC,CAAC;AACxE,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,UAAU,IAAI,wBAAwB,CAAC;AACtE,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC,WAAW,IAAI,kBAAkB,CAAC;AAElE,IAAI,EAAE,GAAuC,IAAI,CAAC;AAElD,SAAS,KAAK;IACZ,IAAI,CAAC,EAAE,EAAE,CAAC;QACR,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;YACzB,MAAM,IAAI,KAAK,CACb,mCAAmC,OAAO,+BAA+B,CAC1E,CAAC;QACJ,CAAC;QACD,EAAE,GAAG,IAAI,QAAQ,CAAC,OAAO,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QAC/C,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACrB,CAAC;IACD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,KAAK,UAAU,UAAU,CAAC,IAAY;IACpC,MAAM,IAAI,GAAG,MAAM,KAAK,CAAC,GAAG,UAAU,YAAY,EAAE;QAClD,MAAM,EAAE,MAAM;QACd,OAAO,EAAE,EAAE,cAAc,EAAE,kBAAkB,EAAE;QAC/C,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,WAAW,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC;KAC1D,CAAC,CAAC;IAEH,IAAI,CAAC,IAAI,CAAC,EAAE,EAAE,CAAC;QACb,MAAM,IAAI,KAAK,CAAC,wBAAwB,IAAI,CAAC,MAAM,IAAI,MAAM,IAAI,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;IAC9E,CAAC;IAED,MAAM,IAAI,GAAG,CAAC,MAAM,IAAI,CAAC,IAAI,EAAE,CAA+B,CAAC;IAC/D,OAAO,IAAI,YAAY,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;AAC9C,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,KAAa,EACb,QAAgB,CAAC;IAEjB,MAAM,QAAQ,GAAG,KAAK,EAAE,CAAC;IACzB,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,KAAK,CAAC,CAAC;IAEzC,MAAM,IAAI,GAAG,QAAQ;SAClB,OAAO,CACN;;;;;;;;;;;;KAYD,CACA;SACA,GAAG,CAAC,QAAQ,EAAE,KAAK,CAMpB,CAAC;IAEH,OAAO,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACxB,KAAK,EAAE,GAAG,CAAC,aAAa;QACxB,OAAO,EAAE,GAAG,CAAC,eAAe;QAC5B,OAAO,EAAE,GAAG,CAAC,OAAO;QACpB,GAAG,EAAE,GAAG,CAAC,WAAW;QACpB,6DAA6D;QAC7D,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,QAAQ,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,IAAI;KAClE,CAAC,CAAC,CAAC;AACN,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,SAAS,CAAC,QAAgB;IAC9C,MAAM,QAAQ,GAAG,KAAK,EAAE,CAAC;IACzB,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,QAAQ,CAAC,CAAC;IAE5C,sDAAsD;IACtD,MAAM,IAAI,GAAG,QAAQ;SAClB,OAAO,CACN;;;;;;;;;;;;;KAaD,CACA;SACA,GAAG,CAAC,QAAQ,CAOb,CAAC;IAEH,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,MAAM,EAAE,4DAA4D;YACpE,YAAY,EAAE,EAAE;YAChB,UAAU,EAAE,EAAE;YACd,gBAAgB,EAAE,EAAE;SACrB,CAAC;IACJ,CAAC;IAED,oFAAoF;IACpF,MAAM,YAAY,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;IACtC,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAC5B,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,KAAK,KAAK,IAAI,CAAC,CAAC,QAAQ,IAAI,YAAY,GAAG,GAAG,CAClE,CAAC;IACF,MAAM,IAAI,GAAG,YAAY,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;IAErC,0DAA0D;IAC1D,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IAC3B,MAAM,OAAO,GAA0C,EAAE,CAAC;IAC1D,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC;YAC/B,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;YAC1B,OAAO,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,GAAG,CAAC,aAAa,EAAE,GAAG,EAAE,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC;QACnE,CAAC;QACD,IAAI,OAAO,CAAC,MAAM,IAAI,CAAC;YAAE,MAAM;IACjC,CAAC;IAED,oEAAoE;IACpE,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC;IAC1B,IAAI,IAAI,CAAC,UAAU,KAAK,KAAK,EAAE,CAAC;QAC9B,MAAM,MAAM,GAAG,MAAM,CAAC,KAAK,CAAC,YAAY,CAAC,CAAC;QAC1C,IAAI,MAAM;YAAE,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACjC,CAAC;IAED,OAAO;QACL,MAAM;QACN,YAAY,EAAE,IAAI,CAAC,aAAa;QAChC,UAAU,EAAE,IAAI,CAAC,WAAW;QAC5B,gBAAgB,EAAE,OAAO;KAC1B,CAAC;AACJ,CAAC;AAED,MAAM,UAAU,UAAU;IACxB,MAAM,QAAQ,GAAG,KAAK,EAAE,CAAC;IAEzB,MAAM,QAAQ,GAAG,QAAQ;SACtB,OAAO,CAAC,6DAA6D,CAAC;SACtE,GAAG,EAAgE,CAAC;IAEvE,gEAAgE;IAChE,MAAM,aAAa,GAAG,IAAI,GAAG,EAAoB,CAAC;IAClD,MAAM,QAAQ,GAAG,QAAQ;SACtB,OAAO,CACN,uFAAuF,CACxF;SACA,GAAG,EAA6D,CAAC;IAEpE,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAC,EAAE,CAAC;YACtC,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC,CAAC;QACvC,CAAC;QACD,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,WAAW,CAAE,CAAC,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC;IAC5D,CAAC;IAED,OAAO,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAC1B,KAAK,EAAE,CAAC,CAAC,KAAK;QACd,WAAW,EAAE,CAAC,CAAC,WAAW;QAC1B,GAAG,EAAE,CAAC,CAAC,GAAG;QACV,MAAM,EAAE,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,IAAI,EAAE;KACvC,CAAC,CAAC,CAAC;AACN,CAAC"}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
export interface ArticleFrontmatter {
|
|
2
|
+
title: string;
|
|
3
|
+
description: string;
|
|
4
|
+
date: string;
|
|
5
|
+
author: string;
|
|
6
|
+
series: string;
|
|
7
|
+
order: number;
|
|
8
|
+
faq: Array<{
|
|
9
|
+
question: string;
|
|
10
|
+
answer: string;
|
|
11
|
+
}>;
|
|
12
|
+
next?: {
|
|
13
|
+
slug: string;
|
|
14
|
+
title: string;
|
|
15
|
+
};
|
|
16
|
+
}
|
|
17
|
+
export interface Chunk {
|
|
18
|
+
id?: number;
|
|
19
|
+
articleTitle: string;
|
|
20
|
+
articleUrl: string;
|
|
21
|
+
articleOrder: number;
|
|
22
|
+
sectionHeading: string;
|
|
23
|
+
chunkType: "intro" | "section" | "faq";
|
|
24
|
+
content: string;
|
|
25
|
+
}
|
|
26
|
+
export interface SearchResult {
|
|
27
|
+
title: string;
|
|
28
|
+
section: string;
|
|
29
|
+
content: string;
|
|
30
|
+
url: string;
|
|
31
|
+
relevance_score: number;
|
|
32
|
+
}
|
|
33
|
+
export interface AnswerResult {
|
|
34
|
+
answer: string;
|
|
35
|
+
source_title: string;
|
|
36
|
+
source_url: string;
|
|
37
|
+
related_articles: Array<{
|
|
38
|
+
title: string;
|
|
39
|
+
url: string;
|
|
40
|
+
}>;
|
|
41
|
+
}
|
|
42
|
+
export interface TopicEntry {
|
|
43
|
+
title: string;
|
|
44
|
+
description: string;
|
|
45
|
+
url: string;
|
|
46
|
+
topics: string[];
|
|
47
|
+
}
|
|
48
|
+
export declare const EMBEDDING_DIM = 768;
|
|
49
|
+
export declare const BASE_URL = "https://astgl.ai/answers";
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AA2CA,MAAM,CAAC,MAAM,aAAa,GAAG,GAAG,CAAC;AACjC,MAAM,CAAC,MAAM,QAAQ,GAAG,0BAA0B,CAAC"}
|
package/package.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "mcp-astgl-knowledge",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "MCP server for searching and citing ASTGL articles about MCP servers, local AI, and AI automation",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"mcp-astgl-knowledge": "dist/index.js"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"dist",
|
|
11
|
+
"data/knowledge.db",
|
|
12
|
+
"README.md"
|
|
13
|
+
],
|
|
14
|
+
"scripts": {
|
|
15
|
+
"build": "tsc",
|
|
16
|
+
"ingest": "tsx src/ingest.ts",
|
|
17
|
+
"start": "node dist/index.js",
|
|
18
|
+
"dev": "tsx src/index.ts",
|
|
19
|
+
"prepublishOnly": "npm run build"
|
|
20
|
+
},
|
|
21
|
+
"repository": {
|
|
22
|
+
"type": "git",
|
|
23
|
+
"url": "git+https://github.com/jamescruce/mcp-astgl-knowledge.git"
|
|
24
|
+
},
|
|
25
|
+
"homepage": "https://astgl.ai",
|
|
26
|
+
"keywords": [
|
|
27
|
+
"mcp",
|
|
28
|
+
"mcp-server",
|
|
29
|
+
"model-context-protocol",
|
|
30
|
+
"ai",
|
|
31
|
+
"local-ai",
|
|
32
|
+
"knowledge-base",
|
|
33
|
+
"astgl"
|
|
34
|
+
],
|
|
35
|
+
"author": "James Cruce",
|
|
36
|
+
"license": "MIT",
|
|
37
|
+
"dependencies": {
|
|
38
|
+
"@modelcontextprotocol/sdk": "^1.12.1",
|
|
39
|
+
"better-sqlite3": "^11.9.1",
|
|
40
|
+
"sqlite-vec": "^0.1.6",
|
|
41
|
+
"zod": "^3.24.4"
|
|
42
|
+
},
|
|
43
|
+
"devDependencies": {
|
|
44
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
45
|
+
"@types/node": "^22.15.3",
|
|
46
|
+
"tsx": "^4.19.4",
|
|
47
|
+
"typescript": "^5.8.3"
|
|
48
|
+
}
|
|
49
|
+
}
|