mcp-astgl-knowledge 1.0.2 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/README.md +146 -38
  2. package/data/knowledge.db +0 -0
  3. package/dist/alerts.d.ts +22 -0
  4. package/dist/alerts.js +433 -0
  5. package/dist/alerts.js.map +1 -0
  6. package/dist/citation-test.d.ts +14 -0
  7. package/dist/citation-test.js +298 -0
  8. package/dist/citation-test.js.map +1 -0
  9. package/dist/daily-report.d.ts +15 -0
  10. package/dist/daily-report.js +441 -0
  11. package/dist/daily-report.js.map +1 -0
  12. package/dist/discover.js +3 -1
  13. package/dist/discover.js.map +1 -1
  14. package/dist/freshness.d.ts +20 -0
  15. package/dist/freshness.js +508 -0
  16. package/dist/freshness.js.map +1 -0
  17. package/dist/index.d.ts +6 -1
  18. package/dist/index.js +253 -14
  19. package/dist/index.js.map +1 -1
  20. package/dist/ingest-projects.d.ts +16 -0
  21. package/dist/ingest-projects.js +196 -0
  22. package/dist/ingest-projects.js.map +1 -0
  23. package/dist/knowledge-db.d.ts +13 -0
  24. package/dist/knowledge-db.js +156 -0
  25. package/dist/knowledge-db.js.map +1 -0
  26. package/dist/pipeline.d.ts +12 -0
  27. package/dist/pipeline.js +83 -0
  28. package/dist/pipeline.js.map +1 -0
  29. package/dist/query-log.d.ts +15 -0
  30. package/dist/query-log.js +93 -0
  31. package/dist/query-log.js.map +1 -0
  32. package/dist/rate-limit.d.ts +34 -0
  33. package/dist/rate-limit.js +206 -0
  34. package/dist/rate-limit.js.map +1 -0
  35. package/dist/related-articles.d.ts +15 -0
  36. package/dist/related-articles.js +217 -0
  37. package/dist/related-articles.js.map +1 -0
  38. package/dist/search.d.ts +13 -4
  39. package/dist/search.js +274 -39
  40. package/dist/search.js.map +1 -1
  41. package/dist/structure.d.ts +11 -0
  42. package/dist/structure.js +451 -0
  43. package/dist/structure.js.map +1 -0
  44. package/dist/types.d.ts +65 -0
  45. package/dist/types.js.map +1 -1
  46. package/package.json +10 -2
package/README.md CHANGED
@@ -1,8 +1,8 @@
1
1
  # mcp-astgl-knowledge
2
2
 
3
- An MCP server that lets AI assistants search and cite articles from [As The Geek Learns](https://astgl.ai) — covering MCP servers, local AI, and AI automation.
3
+ An MCP server that lets AI assistants search and cite content from [As The Geek Learns](https://astgl.ai) — covering MCP servers, local AI, AI automation, and ASTGL project documentation.
4
4
 
5
- When an AI assistant connects to this server, it gains access to authoritative answers about MCP, local LLMs, and AI workflows. Every response includes a source URL back to astgl.ai.
5
+ When an AI assistant connects to this server, it gains access to 49 indexed entries (articles, tutorials, comparisons, guides, and project docs). Every response includes source URLs back to astgl.ai.
6
6
 
7
7
  ## Quick Start
8
8
 
@@ -36,69 +36,177 @@ Add to your project's `.mcp.json`:
36
36
  }
37
37
  ```
38
38
 
39
+ ### Cursor / Generic MCP Client
40
+
41
+ ```json
42
+ {
43
+ "mcpServers": {
44
+ "astgl-knowledge": {
45
+ "command": "npx",
46
+ "args": ["-y", "mcp-astgl-knowledge"]
47
+ }
48
+ }
49
+ }
50
+ ```
51
+
52
+ ### With Registration (500 queries/day)
53
+
54
+ Register via the `register` tool to get an API key, then add it to your config:
55
+
56
+ ```json
57
+ {
58
+ "mcpServers": {
59
+ "astgl-knowledge": {
60
+ "command": "npx",
61
+ "args": ["-y", "mcp-astgl-knowledge"],
62
+ "env": {
63
+ "ASTGL_API_KEY": "astgl_your_api_key_here"
64
+ }
65
+ }
66
+ }
67
+ }
68
+ ```
69
+
39
70
  ## Tools
40
71
 
41
72
  ### `search_articles`
42
73
 
43
- Search ASTGL articles by query. Returns ranked results with relevance scores and source URLs.
74
+ Search the knowledge base by query. Returns ranked results with relevance scores and source URLs.
44
75
 
45
- ```
46
- Input: { query: "how to build an MCP server", limit: 5 }
47
- Output: Ranked article sections with title, content, URL, and relevance score
48
- ```
76
+ | Parameter | Type | Required | Description |
77
+ |-----------|------|----------|-------------|
78
+ | `query` | string | Yes | Search query (e.g., "how to build an MCP server") |
79
+ | `limit` | number | No | Max results, 1-20 (default: 5) |
80
+ | `content_type` | string | No | Filter by type: article, tutorial, faq, comparison, guide, newsletter, project |
49
81
 
50
82
  ### `get_answer`
51
83
 
52
84
  Get a direct answer to a specific question. Prefers FAQ entries for concise responses.
53
85
 
54
- ```
55
- Input: { question: "What is an MCP server?" }
56
- Output: Direct answer with source article URL and related articles
57
- ```
86
+ | Parameter | Type | Required | Description |
87
+ |-----------|------|----------|-------------|
88
+ | `question` | string | Yes | A specific question (e.g., "What is an MCP server?") |
89
+ | `content_type` | string | No | Filter by content type |
90
+
91
+ ### `get_tutorial`
92
+
93
+ Get step-by-step instructions from tutorial and guide content.
94
+
95
+ | Parameter | Type | Required | Description |
96
+ |-----------|------|----------|-------------|
97
+ | `query` | string | Yes | What you want to learn (e.g., "setup Ollama on Mac") |
98
+
99
+ ### `compare_topics`
100
+
101
+ Side-by-side comparison of two topics.
102
+
103
+ | Parameter | Type | Required | Description |
104
+ |-----------|------|----------|-------------|
105
+ | `topic_a` | string | Yes | First topic |
106
+ | `topic_b` | string | Yes | Second topic |
107
+
108
+ ### `get_latest`
109
+
110
+ Get the most recently added content.
111
+
112
+ | Parameter | Type | Required | Description |
113
+ |-----------|------|----------|-------------|
114
+ | `limit` | number | No | Max results, 1-20 (default: 5) |
58
115
 
59
116
  ### `list_topics`
60
117
 
61
- List all topics covered in the knowledge base.
118
+ Browse all topics in the knowledge base with content types and section headings.
62
119
 
63
- ```
64
- Input: {}
65
- Output: All articles with titles, descriptions, URLs, and section headings
66
- ```
120
+ ### `register`
121
+
122
+ Register your email to unlock 500 queries/day (up from 50).
123
+
124
+ | Parameter | Type | Required | Description |
125
+ |-----------|------|----------|-------------|
126
+ | `email` | string | Yes | Your email address |
127
+
128
+ ## Content Types
129
+
130
+ | Type | Count | Description |
131
+ |------|-------|-------------|
132
+ | article | 29 | Informational content about MCP, local AI, automation |
133
+ | project | 9 | ASTGL project documentation (KlockThingy, Revri, Cortex, etc.) |
134
+ | tutorial | 8 | Step-by-step how-to guides |
135
+ | comparison | 2 | Side-by-side topic analysis |
136
+ | guide | 1 | Comprehensive reference material |
137
+ | newsletter | — | Personal updates and announcements |
138
+ | faq | — | Primarily Q&A content |
139
+
140
+ ## Rate Limits
141
+
142
+ | Tier | Limit | How to Get |
143
+ |------|-------|------------|
144
+ | Public | 50 queries/day | Default (anonymous) |
145
+ | Registered | 500 queries/day | Use the `register` tool with your email |
146
+
147
+ Limits reset at midnight UTC. Rate limit info is included in every response.
67
148
 
68
149
  ## How It Works
69
150
 
70
- The knowledge base is pre-built from ASTGL articles using semantic embeddings (nomic-embed-text, 768 dimensions). Articles are chunked by section and FAQ entry, embedded, and stored in a SQLite database with sqlite-vec for vector similarity search.
151
+ The knowledge base is pre-built from ASTGL articles using semantic embeddings (nomic-embed-text, 768 dimensions). Content is chunked by section and FAQ entry, embedded, and stored in a SQLite database with sqlite-vec for vector similarity search.
152
+
153
+ **End users don't need Ollama** — all embeddings are pre-computed and shipped in the npm package. The only runtime requirement is Node.js.
154
+
155
+ ### Performance
71
156
 
72
- **End users don't need Ollama** — all embeddings are pre-computed and shipped inside the npm package. The only runtime requirement is Node.js.
157
+ - Typical response time: 100-500ms (embedding lookup + vector search)
158
+ - Embedding results are cached in memory (LRU, 200 entries) — repeated queries are near-instant
159
+ - Ollama calls include 10s timeout + automatic retry
160
+ - Query logging is async/batched to avoid blocking responses
161
+ - Rate limit checks are cached for 5 seconds
73
162
 
74
163
  ## For Maintainers
75
164
 
76
- To rebuild the knowledge database after adding or updating articles:
165
+ ### Setup
77
166
 
78
167
  ```bash
79
- git clone https://github.com/jamescruce/mcp-astgl-knowledge.git
168
+ git clone https://github.com/Jmeg8r/mcp-astgl-knowledge.git
80
169
  cd mcp-astgl-knowledge
81
170
  npm install
82
- npm run ingest # Requires Ollama with nomic-embed-text
83
- npm run build
84
171
  ```
85
172
 
86
- Set `ASTGL_ARTICLES_DIR` to point to your articles directory if it's not at the default location.
87
-
88
- ## Coverage
89
-
90
- Currently indexes 10 articles covering:
91
-
92
- - What MCP servers are and how they work
93
- - Building your first MCP server
94
- - Connecting MCP servers to Claude and ChatGPT
95
- - Best MCP servers available now
96
- - MCP servers vs traditional APIs
97
- - Running AI models locally
98
- - Hardware requirements for local LLMs
99
- - Cost comparison: local vs cloud AI
100
- - Security of local AI
101
- - Automating business workflows with AI
173
+ ### Scripts
174
+
175
+ | Script | Description |
176
+ |--------|-------------|
177
+ | `npm run build` | Compile TypeScript |
178
+ | `npm run dev` | Run MCP server in dev mode (tsx) |
179
+ | `npm start` | Run compiled MCP server |
180
+ | `npm run ingest` | Rebuild knowledge.db from local markdown (requires Ollama) |
181
+ | `npm run ingest-projects` | Index project docs from astgl-site projects.json |
182
+ | `npm run discover` | Poll RSS/sitemap for new content |
183
+ | `npm run structure` | Process discovered content (classify, embed, index) |
184
+ | `npm run pipeline` | Discover + structure in one step |
185
+ | `npm run daily-report` | Generate AEO analytics report |
186
+ | `npm run alerts` | Run content gap alert checks |
187
+ | `npm run freshness` | Check for stale content and ecosystem version changes |
188
+ | `npm run citation-test` | Manual AI citation testing |
189
+ | `npm run related` | Generate internal article links via vector similarity |
190
+
191
+ ### Environment Variables
192
+
193
+ | Variable | Default | Description |
194
+ |----------|---------|-------------|
195
+ | `OLLAMA_URL` | `http://localhost:11434` | Ollama endpoint (dev/rebuild only) |
196
+ | `EMBED_MODEL` | `nomic-embed-text` | Embedding model |
197
+ | `DISCORD_WEBHOOK_URL` | — | Discord webhook for reports/alerts |
198
+ | `ASTGL_API_KEY` | — | Registered tier API key |
199
+ | `ASTGL_ARTICLES_DIR` | `~/Projects/astgl-site/src/content/answers` | Local markdown source |
200
+ | `ASTGL_PROJECTS_JSON` | `~/Projects/astgl-site/src/data/projects.json` | Projects data source |
201
+
202
+ ### Automated Jobs
203
+
204
+ | Job | Schedule | Purpose |
205
+ |-----|----------|---------|
206
+ | Content pipeline | Every 6h | Discover + structure new content |
207
+ | Daily report | 8 AM | Query analytics + health metrics → Discord |
208
+ | Content alerts | 9 AM | Gap detection, zero-citation, competitor scan → Discord |
209
+ | Freshness check | 10 AM | Stale content + ecosystem version tracking → Discord |
102
210
 
103
211
  ## License
104
212
 
package/data/knowledge.db CHANGED
Binary file
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env tsx
2
+ /**
3
+ * Content gap alert engine.
4
+ *
5
+ * WHAT: Auto-detects AEO problems and sends Discord alerts when thresholds are crossed
6
+ * WHY: Proactive alerting catches citation drops, content gaps, and competitor moves
7
+ * before they become invisible losses
8
+ *
9
+ * Alert types:
10
+ * 1. Unknown topic spike — many queries with low confidence in a short window
11
+ * 2. Zero-citation articles — high-value articles never appearing in cited content
12
+ * 3. Repeated low-confidence — same query failing multiple times (content gap signal)
13
+ * 4. Competitor MCP servers — new servers on registries covering similar topics
14
+ *
15
+ * Usage:
16
+ * npm run alerts Check all alerts, print to stdout (JSON)
17
+ * npm run alerts -- --discord Also send triggered alerts to Discord
18
+ * npm run alerts -- --days 7 Look back 7 days (default: 1)
19
+ *
20
+ * Env: DISCORD_WEBHOOK_URL — Discord webhook for alert delivery
21
+ */
22
+ export {};
package/dist/alerts.js ADDED
@@ -0,0 +1,433 @@
1
+ #!/usr/bin/env tsx
2
+ /**
3
+ * Content gap alert engine.
4
+ *
5
+ * WHAT: Auto-detects AEO problems and sends Discord alerts when thresholds are crossed
6
+ * WHY: Proactive alerting catches citation drops, content gaps, and competitor moves
7
+ * before they become invisible losses
8
+ *
9
+ * Alert types:
10
+ * 1. Unknown topic spike — many queries with low confidence in a short window
11
+ * 2. Zero-citation articles — high-value articles never appearing in cited content
12
+ * 3. Repeated low-confidence — same query failing multiple times (content gap signal)
13
+ * 4. Competitor MCP servers — new servers on registries covering similar topics
14
+ *
15
+ * Usage:
16
+ * npm run alerts Check all alerts, print to stdout (JSON)
17
+ * npm run alerts -- --discord Also send triggered alerts to Discord
18
+ * npm run alerts -- --days 7 Look back 7 days (default: 1)
19
+ *
20
+ * Env: DISCORD_WEBHOOK_URL — Discord webhook for alert delivery
21
+ */
22
+ import { join } from "path";
23
+ import { existsSync, mkdirSync } from "fs";
24
+ import Database from "better-sqlite3";
25
+ const DATA_DIR = join(import.meta.dirname, "..", "data");
26
+ const QUERY_LOG_PATH = join(DATA_DIR, "query-log.db");
27
+ const KNOWLEDGE_PATH = join(DATA_DIR, "knowledge.db");
28
+ const ALERT_DB_PATH = join(DATA_DIR, "alerts.db");
29
+ const DISCORD_WEBHOOK_URL = process.env.DISCORD_WEBHOOK_URL || "";
30
+ // --- Thresholds ---
31
+ const LOW_CONFIDENCE_THRESHOLD = 0.5;
32
+ const UNKNOWN_TOPIC_SPIKE_MIN = 5;
33
+ const REPEATED_QUERY_MIN = 3;
34
+ const ZERO_CITATION_LOOKBACK_DAYS = 14;
35
+ const ALERT_COOLDOWN_HOURS = 24;
36
+ // --- Alert History DB ---
37
+ // WHAT: Track which alerts have been sent to avoid spamming Discord
38
+ // WHY: Same alert condition can persist for days — only notify once per cooldown window
39
+ function initAlertDb() {
40
+ if (!existsSync(DATA_DIR))
41
+ mkdirSync(DATA_DIR, { recursive: true });
42
+ const db = new Database(ALERT_DB_PATH);
43
+ db.exec(`
44
+ CREATE TABLE IF NOT EXISTS alert_history (
45
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
46
+ alert_type TEXT NOT NULL,
47
+ alert_key TEXT NOT NULL,
48
+ fired_at TEXT NOT NULL,
49
+ details TEXT,
50
+ UNIQUE(alert_type, alert_key, fired_at)
51
+ )
52
+ `);
53
+ db.exec("CREATE INDEX IF NOT EXISTS idx_alert_history_key ON alert_history(alert_type, alert_key)");
54
+ return db;
55
+ }
56
+ function wasRecentlyFired(alertDb, type, key) {
57
+ const cutoff = new Date();
58
+ cutoff.setHours(cutoff.getHours() - ALERT_COOLDOWN_HOURS);
59
+ const row = alertDb
60
+ .prepare(`SELECT id FROM alert_history
61
+ WHERE alert_type = ? AND alert_key = ? AND fired_at > ?
62
+ LIMIT 1`)
63
+ .get(type, key, cutoff.toISOString());
64
+ return !!row;
65
+ }
66
+ function recordAlert(alertDb, alert, key) {
67
+ alertDb
68
+ .prepare("INSERT OR IGNORE INTO alert_history (alert_type, alert_key, fired_at, details) VALUES (?, ?, ?, ?)")
69
+ .run(alert.type, key, new Date().toISOString(), alert.title);
70
+ }
71
+ // --- Alert Check #1: Unknown Topic Spike ---
72
+ // WHAT: Detect when many queries have very low confidence scores
73
+ // WHY: Indicates users are asking about topics we don't cover — content gap opportunity
74
+ function checkUnknownTopicSpike(fromDate, alertDb) {
75
+ if (!existsSync(QUERY_LOG_PATH))
76
+ return [];
77
+ const db = new Database(QUERY_LOG_PATH, { readonly: true });
78
+ const tableCheck = db
79
+ .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='query_log'")
80
+ .get();
81
+ if (!tableCheck) {
82
+ db.close();
83
+ return [];
84
+ }
85
+ const lowConfQueries = db
86
+ .prepare(`SELECT query_params, confidence_score, tool_name, COUNT(*) as occurrences
87
+ FROM query_log
88
+ WHERE timestamp >= ? AND confidence_score IS NOT NULL AND confidence_score < ?
89
+ GROUP BY query_params
90
+ ORDER BY occurrences DESC`)
91
+ .all(fromDate, LOW_CONFIDENCE_THRESHOLD);
92
+ db.close();
93
+ if (lowConfQueries.length < UNKNOWN_TOPIC_SPIKE_MIN)
94
+ return [];
95
+ const key = `spike-${lowConfQueries.length}-${fromDate.split("T")[0]}`;
96
+ if (wasRecentlyFired(alertDb, "unknown_topic_spike", key))
97
+ return [];
98
+ const topQueries = lowConfQueries.slice(0, 5).map((q) => {
99
+ try {
100
+ const parsed = JSON.parse(q.query_params);
101
+ return parsed.query || parsed.question || q.query_params;
102
+ }
103
+ catch {
104
+ return q.query_params;
105
+ }
106
+ });
107
+ const alert = {
108
+ type: "unknown_topic_spike",
109
+ severity: lowConfQueries.length >= 10 ? "critical" : "warning",
110
+ title: `Unknown topic spike: ${lowConfQueries.length} low-confidence queries`,
111
+ details: [
112
+ `${lowConfQueries.length} unique queries scored below ${LOW_CONFIDENCE_THRESHOLD} confidence.`,
113
+ "Top queries:",
114
+ ...topQueries.map((q, i) => ` ${i + 1}. "${q}"`),
115
+ "",
116
+ "Action: Review these topics for new article opportunities.",
117
+ ].join("\n"),
118
+ data: {
119
+ count: lowConfQueries.length,
120
+ threshold: LOW_CONFIDENCE_THRESHOLD,
121
+ top_queries: topQueries,
122
+ },
123
+ };
124
+ recordAlert(alertDb, alert, key);
125
+ return [alert];
126
+ }
127
+ // --- Alert Check #2: Zero-Citation Articles ---
128
+ // WHAT: Find articles that exist in the knowledge base but are never cited in query results
129
+ // WHY: Content that's never surfaced is invisible — may need better embeddings or rewriting
130
+ function checkZeroCitationArticles(fromDate, alertDb) {
131
+ if (!existsSync(QUERY_LOG_PATH) || !existsSync(KNOWLEDGE_PATH))
132
+ return [];
133
+ const knowledgeDb = new Database(KNOWLEDGE_PATH, { readonly: true });
134
+ const allArticles = knowledgeDb
135
+ .prepare("SELECT title, url FROM articles")
136
+ .all();
137
+ knowledgeDb.close();
138
+ const logDb = new Database(QUERY_LOG_PATH, { readonly: true });
139
+ const tableCheck = logDb
140
+ .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='query_log'")
141
+ .get();
142
+ if (!tableCheck) {
143
+ logDb.close();
144
+ return [];
145
+ }
146
+ // WHAT: Collect all URLs that appeared in content_cited over the lookback window
147
+ // WHY: If an article URL never shows up, it's either irrelevant or poorly embedded
148
+ const lookback = new Date();
149
+ lookback.setDate(lookback.getDate() - ZERO_CITATION_LOOKBACK_DAYS);
150
+ const citedRows = logDb
151
+ .prepare(`SELECT content_cited FROM query_log
152
+ WHERE timestamp >= ? AND content_cited IS NOT NULL`)
153
+ .all(lookback.toISOString());
154
+ logDb.close();
155
+ // Need at least some query activity to make this check meaningful
156
+ if (citedRows.length < 10)
157
+ return [];
158
+ const citedUrls = new Set();
159
+ for (const row of citedRows) {
160
+ try {
161
+ const urls = JSON.parse(row.content_cited);
162
+ for (const url of urls)
163
+ citedUrls.add(url);
164
+ }
165
+ catch {
166
+ // Skip malformed
167
+ }
168
+ }
169
+ // WHAT: Filter to astgl.ai articles only (these are the "high-value" ones we control)
170
+ // WHY: Substack mirror URLs being uncited is expected — the canonical ones matter
171
+ const uncitedHighValue = allArticles.filter((a) => a.url.startsWith("https://astgl.ai/") && !citedUrls.has(a.url));
172
+ if (uncitedHighValue.length === 0)
173
+ return [];
174
+ const key = `uncited-${uncitedHighValue.length}`;
175
+ if (wasRecentlyFired(alertDb, "zero_citation", key))
176
+ return [];
177
+ const alert = {
178
+ type: "zero_citation",
179
+ severity: uncitedHighValue.length >= 10 ? "warning" : "info",
180
+ title: `${uncitedHighValue.length} high-value articles with zero citations (${ZERO_CITATION_LOOKBACK_DAYS}d)`,
181
+ details: [
182
+ `These astgl.ai articles were never cited in the last ${ZERO_CITATION_LOOKBACK_DAYS} days:`,
183
+ ...uncitedHighValue.slice(0, 10).map((a) => ` - ${a.title}\n ${a.url}`),
184
+ uncitedHighValue.length > 10 ? ` ... and ${uncitedHighValue.length - 10} more` : "",
185
+ "",
186
+ "Action: Check embeddings quality, consider rewriting descriptions, or verify chunking.",
187
+ ].join("\n"),
188
+ data: {
189
+ count: uncitedHighValue.length,
190
+ lookback_days: ZERO_CITATION_LOOKBACK_DAYS,
191
+ articles: uncitedHighValue.slice(0, 10).map((a) => ({ title: a.title, url: a.url })),
192
+ },
193
+ };
194
+ recordAlert(alertDb, alert, key);
195
+ return [alert];
196
+ }
197
+ // --- Alert Check #3: Repeated Low-Confidence Queries ---
198
+ // WHAT: Same query text appearing multiple times with consistently low confidence
199
+ // WHY: Repeated failures = a real user need we're not serving — highest-signal content gap
200
+ function checkRepeatedLowConfidence(fromDate, alertDb) {
201
+ if (!existsSync(QUERY_LOG_PATH))
202
+ return [];
203
+ const db = new Database(QUERY_LOG_PATH, { readonly: true });
204
+ const tableCheck = db
205
+ .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='query_log'")
206
+ .get();
207
+ if (!tableCheck) {
208
+ db.close();
209
+ return [];
210
+ }
211
+ const repeats = db
212
+ .prepare(`SELECT query_params, tool_name,
213
+ COUNT(*) as occurrences,
214
+ ROUND(AVG(confidence_score), 3) as avg_confidence
215
+ FROM query_log
216
+ WHERE timestamp >= ?
217
+ AND confidence_score IS NOT NULL
218
+ AND confidence_score < ?
219
+ GROUP BY query_params
220
+ HAVING COUNT(*) >= ?
221
+ ORDER BY occurrences DESC`)
222
+ .all(fromDate, LOW_CONFIDENCE_THRESHOLD, REPEATED_QUERY_MIN);
223
+ db.close();
224
+ if (repeats.length === 0)
225
+ return [];
226
+ const alerts = [];
227
+ for (const r of repeats.slice(0, 5)) {
228
+ let queryText = r.query_params;
229
+ try {
230
+ const parsed = JSON.parse(r.query_params);
231
+ queryText = parsed.query || parsed.question || r.query_params;
232
+ }
233
+ catch {
234
+ // Use raw
235
+ }
236
+ const key = `repeat-${queryText.slice(0, 50)}`;
237
+ if (wasRecentlyFired(alertDb, "repeated_low_confidence", key))
238
+ continue;
239
+ const alert = {
240
+ type: "repeated_low_confidence",
241
+ severity: r.occurrences >= 5 ? "critical" : "warning",
242
+ title: `Repeated content gap: "${queryText}" (${r.occurrences}x, avg ${r.avg_confidence})`,
243
+ details: [
244
+ `Query "${queryText}" has been asked ${r.occurrences} times with avg confidence ${r.avg_confidence}.`,
245
+ `Tool: ${r.tool_name}`,
246
+ "",
247
+ "Action: Write or improve content targeting this specific question.",
248
+ ].join("\n"),
249
+ data: {
250
+ query: queryText,
251
+ occurrences: r.occurrences,
252
+ avg_confidence: r.avg_confidence,
253
+ tool: r.tool_name,
254
+ },
255
+ };
256
+ recordAlert(alertDb, alert, key);
257
+ alerts.push(alert);
258
+ }
259
+ return alerts;
260
+ }
261
+ // --- Alert Check #4: Competitor MCP Servers ---
262
+ // WHAT: Scan Smithery registry for new MCP servers covering similar topics
263
+ // WHY: Competitors publishing knowledge MCP servers dilutes ASTGL's AEO position
264
+ async function checkCompetitorServers(alertDb) {
265
+ const SEARCH_TERMS = ["knowledge base", "ai articles", "local ai", "mcp guide"];
266
+ const OUR_SLUGS = ["astgl-knowledge", "mcp-astgl-knowledge"];
267
+ const alerts = [];
268
+ for (const term of SEARCH_TERMS) {
269
+ try {
270
+ // WHAT: Smithery registry search API
271
+ // WHY: Public API, no auth needed, returns server metadata
272
+ const resp = await fetch(`https://registry.smithery.ai/servers?q=${encodeURIComponent(term)}&pageSize=10`, {
273
+ headers: { Accept: "application/json" },
274
+ signal: AbortSignal.timeout(10_000),
275
+ });
276
+ if (!resp.ok)
277
+ continue;
278
+ const data = (await resp.json());
279
+ if (!data.servers)
280
+ continue;
281
+ for (const server of data.servers) {
282
+ // Skip our own server
283
+ if (OUR_SLUGS.some((slug) => server.qualifiedName.includes(slug)))
284
+ continue;
285
+ // WHAT: Only alert on servers created in the last 30 days
286
+ // WHY: Old servers aren't news — we want to catch new entrants
287
+ const createdAt = new Date(server.createdAt);
288
+ const thirtyDaysAgo = new Date();
289
+ thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
290
+ if (createdAt < thirtyDaysAgo)
291
+ continue;
292
+ const key = `competitor-${server.qualifiedName}`;
293
+ if (wasRecentlyFired(alertDb, "competitor_detected", key))
294
+ continue;
295
+ const alert = {
296
+ type: "competitor_detected",
297
+ severity: "info",
298
+ title: `New competitor MCP server: ${server.displayName}`,
299
+ details: [
300
+ `**Server:** ${server.qualifiedName}`,
301
+ `**Name:** ${server.displayName}`,
302
+ `**Description:** ${server.description?.slice(0, 200) || "N/A"}`,
303
+ `**Created:** ${server.createdAt}`,
304
+ `**Found via:** search for "${term}"`,
305
+ "",
306
+ "Action: Review server to assess overlap with ASTGL content coverage.",
307
+ ].join("\n"),
308
+ data: {
309
+ qualified_name: server.qualifiedName,
310
+ display_name: server.displayName,
311
+ description: server.description,
312
+ search_term: term,
313
+ },
314
+ };
315
+ recordAlert(alertDb, alert, key);
316
+ alerts.push(alert);
317
+ }
318
+ }
319
+ catch {
320
+ // Network error — skip this search term silently
321
+ }
322
+ }
323
+ return alerts;
324
+ }
325
+ // --- Discord Delivery ---
326
+ function severityColor(severity) {
327
+ switch (severity) {
328
+ case "critical": return 0xff0000;
329
+ case "warning": return 0xffa500;
330
+ case "info": return 0x2196f3;
331
+ }
332
+ }
333
+ function severityEmoji(severity) {
334
+ switch (severity) {
335
+ case "critical": return "🔴";
336
+ case "warning": return "🟡";
337
+ case "info": return "🔵";
338
+ }
339
+ }
340
+ async function sendAlertsToDiscord(alerts) {
341
+ if (!DISCORD_WEBHOOK_URL) {
342
+ console.error("DISCORD_WEBHOOK_URL not set.");
343
+ return;
344
+ }
345
+ if (alerts.length === 0)
346
+ return;
347
+ // WHAT: Group alerts into a single Discord message with multiple embeds
348
+ // WHY: One webhook call is better than N separate messages
349
+ const embeds = alerts.slice(0, 10).map((alert) => ({
350
+ title: `${severityEmoji(alert.severity)} ${alert.title}`,
351
+ description: alert.details,
352
+ color: severityColor(alert.severity),
353
+ footer: { text: `Alert type: ${alert.type}` },
354
+ }));
355
+ const resp = await fetch(DISCORD_WEBHOOK_URL, {
356
+ method: "POST",
357
+ headers: { "Content-Type": "application/json" },
358
+ body: JSON.stringify({ embeds }),
359
+ });
360
+ if (!resp.ok) {
361
+ const body = await resp.text();
362
+ console.error(`Discord webhook failed: ${resp.status} ${body}`);
363
+ }
364
+ else {
365
+ console.error(`${alerts.length} alert(s) sent to Discord.`);
366
+ }
367
+ }
368
+ // --- CLI ---
369
+ function parseArgs() {
370
+ const args = process.argv.slice(2);
371
+ return {
372
+ sendToDiscord: args.includes("--discord"),
373
+ days: (() => {
374
+ const idx = args.indexOf("--days");
375
+ return idx >= 0 && args[idx + 1] ? parseInt(args[idx + 1], 10) : 1;
376
+ })(),
377
+ };
378
+ }
379
+ async function main() {
380
+ const { sendToDiscord, days } = parseArgs();
381
+ const from = new Date();
382
+ from.setDate(from.getDate() - days);
383
+ const fromDate = from.toISOString();
384
+ const alertDb = initAlertDb();
385
+ const checksRun = [];
386
+ let suppressed = 0;
387
+ const allAlerts = [];
388
+ // Run all checks
389
+ console.error("=== ASTGL Content Gap Alert Engine ===\n");
390
+ console.error("Checking: unknown topic spike...");
391
+ checksRun.push("unknown_topic_spike");
392
+ const spikeAlerts = checkUnknownTopicSpike(fromDate, alertDb);
393
+ allAlerts.push(...spikeAlerts);
394
+ console.error(` ${spikeAlerts.length} alert(s)\n`);
395
+ console.error("Checking: zero-citation articles...");
396
+ checksRun.push("zero_citation");
397
+ const zeroCiteAlerts = checkZeroCitationArticles(fromDate, alertDb);
398
+ allAlerts.push(...zeroCiteAlerts);
399
+ console.error(` ${zeroCiteAlerts.length} alert(s)\n`);
400
+ console.error("Checking: repeated low-confidence queries...");
401
+ checksRun.push("repeated_low_confidence");
402
+ const repeatAlerts = checkRepeatedLowConfidence(fromDate, alertDb);
403
+ allAlerts.push(...repeatAlerts);
404
+ console.error(` ${repeatAlerts.length} alert(s)\n`);
405
+ console.error("Checking: competitor MCP servers...");
406
+ checksRun.push("competitor_detected");
407
+ const competitorAlerts = await checkCompetitorServers(alertDb);
408
+ allAlerts.push(...competitorAlerts);
409
+ console.error(` ${competitorAlerts.length} alert(s)\n`);
410
+ alertDb.close();
411
+ const report = {
412
+ generated_at: new Date().toISOString(),
413
+ period_days: days,
414
+ alerts_fired: allAlerts,
415
+ alerts_suppressed: suppressed,
416
+ checks_run: checksRun,
417
+ };
418
+ console.log(JSON.stringify(report, null, 2));
419
+ if (sendToDiscord && allAlerts.length > 0) {
420
+ await sendAlertsToDiscord(allAlerts);
421
+ }
422
+ else if (sendToDiscord && allAlerts.length === 0) {
423
+ console.error("No alerts to send.");
424
+ }
425
+ console.error(`\n=== Done: ${allAlerts.length} alert(s) fired, ${suppressed} suppressed ===`);
426
+ }
427
+ main()
428
+ .then(() => process.exit(0))
429
+ .catch((err) => {
430
+ console.error("Alert engine failed:", err);
431
+ process.exit(1);
432
+ });
433
+ //# sourceMappingURL=alerts.js.map