mcp-astgl-knowledge 1.0.2 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +146 -38
- package/data/knowledge.db +0 -0
- package/dist/alerts.d.ts +22 -0
- package/dist/alerts.js +433 -0
- package/dist/alerts.js.map +1 -0
- package/dist/citation-test.d.ts +14 -0
- package/dist/citation-test.js +298 -0
- package/dist/citation-test.js.map +1 -0
- package/dist/daily-report.d.ts +15 -0
- package/dist/daily-report.js +441 -0
- package/dist/daily-report.js.map +1 -0
- package/dist/discover.js +3 -1
- package/dist/discover.js.map +1 -1
- package/dist/freshness.d.ts +20 -0
- package/dist/freshness.js +508 -0
- package/dist/freshness.js.map +1 -0
- package/dist/index.d.ts +6 -1
- package/dist/index.js +253 -14
- package/dist/index.js.map +1 -1
- package/dist/ingest-projects.d.ts +16 -0
- package/dist/ingest-projects.js +196 -0
- package/dist/ingest-projects.js.map +1 -0
- package/dist/knowledge-db.d.ts +13 -0
- package/dist/knowledge-db.js +156 -0
- package/dist/knowledge-db.js.map +1 -0
- package/dist/pipeline.d.ts +12 -0
- package/dist/pipeline.js +83 -0
- package/dist/pipeline.js.map +1 -0
- package/dist/query-log.d.ts +15 -0
- package/dist/query-log.js +93 -0
- package/dist/query-log.js.map +1 -0
- package/dist/rate-limit.d.ts +34 -0
- package/dist/rate-limit.js +206 -0
- package/dist/rate-limit.js.map +1 -0
- package/dist/related-articles.d.ts +15 -0
- package/dist/related-articles.js +217 -0
- package/dist/related-articles.js.map +1 -0
- package/dist/search.d.ts +13 -4
- package/dist/search.js +274 -39
- package/dist/search.js.map +1 -1
- package/dist/structure.d.ts +11 -0
- package/dist/structure.js +451 -0
- package/dist/structure.js.map +1 -0
- package/dist/types.d.ts +65 -0
- package/dist/types.js.map +1 -1
- package/package.json +10 -2
package/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# mcp-astgl-knowledge
|
|
2
2
|
|
|
3
|
-
An MCP server that lets AI assistants search and cite
|
|
3
|
+
An MCP server that lets AI assistants search and cite content from [As The Geek Learns](https://astgl.ai) — covering MCP servers, local AI, AI automation, and ASTGL project documentation.
|
|
4
4
|
|
|
5
|
-
When an AI assistant connects to this server, it gains access to
|
|
5
|
+
When an AI assistant connects to this server, it gains access to 49 indexed entries (articles, tutorials, comparisons, guides, and project docs). Every response includes source URLs back to astgl.ai.
|
|
6
6
|
|
|
7
7
|
## Quick Start
|
|
8
8
|
|
|
@@ -36,69 +36,177 @@ Add to your project's `.mcp.json`:
|
|
|
36
36
|
}
|
|
37
37
|
```
|
|
38
38
|
|
|
39
|
+
### Cursor / Generic MCP Client
|
|
40
|
+
|
|
41
|
+
```json
|
|
42
|
+
{
|
|
43
|
+
"mcpServers": {
|
|
44
|
+
"astgl-knowledge": {
|
|
45
|
+
"command": "npx",
|
|
46
|
+
"args": ["-y", "mcp-astgl-knowledge"]
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### With Registration (500 queries/day)
|
|
53
|
+
|
|
54
|
+
Register via the `register` tool to get an API key, then add it to your config:
|
|
55
|
+
|
|
56
|
+
```json
|
|
57
|
+
{
|
|
58
|
+
"mcpServers": {
|
|
59
|
+
"astgl-knowledge": {
|
|
60
|
+
"command": "npx",
|
|
61
|
+
"args": ["-y", "mcp-astgl-knowledge"],
|
|
62
|
+
"env": {
|
|
63
|
+
"ASTGL_API_KEY": "astgl_your_api_key_here"
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
```
|
|
69
|
+
|
|
39
70
|
## Tools
|
|
40
71
|
|
|
41
72
|
### `search_articles`
|
|
42
73
|
|
|
43
|
-
Search
|
|
74
|
+
Search the knowledge base by query. Returns ranked results with relevance scores and source URLs.
|
|
44
75
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
76
|
+
| Parameter | Type | Required | Description |
|
|
77
|
+
|-----------|------|----------|-------------|
|
|
78
|
+
| `query` | string | Yes | Search query (e.g., "how to build an MCP server") |
|
|
79
|
+
| `limit` | number | No | Max results, 1-20 (default: 5) |
|
|
80
|
+
| `content_type` | string | No | Filter by type: article, tutorial, faq, comparison, guide, newsletter, project |
|
|
49
81
|
|
|
50
82
|
### `get_answer`
|
|
51
83
|
|
|
52
84
|
Get a direct answer to a specific question. Prefers FAQ entries for concise responses.
|
|
53
85
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
86
|
+
| Parameter | Type | Required | Description |
|
|
87
|
+
|-----------|------|----------|-------------|
|
|
88
|
+
| `question` | string | Yes | A specific question (e.g., "What is an MCP server?") |
|
|
89
|
+
| `content_type` | string | No | Filter by content type |
|
|
90
|
+
|
|
91
|
+
### `get_tutorial`
|
|
92
|
+
|
|
93
|
+
Get step-by-step instructions from tutorial and guide content.
|
|
94
|
+
|
|
95
|
+
| Parameter | Type | Required | Description |
|
|
96
|
+
|-----------|------|----------|-------------|
|
|
97
|
+
| `query` | string | Yes | What you want to learn (e.g., "setup Ollama on Mac") |
|
|
98
|
+
|
|
99
|
+
### `compare_topics`
|
|
100
|
+
|
|
101
|
+
Side-by-side comparison of two topics.
|
|
102
|
+
|
|
103
|
+
| Parameter | Type | Required | Description |
|
|
104
|
+
|-----------|------|----------|-------------|
|
|
105
|
+
| `topic_a` | string | Yes | First topic |
|
|
106
|
+
| `topic_b` | string | Yes | Second topic |
|
|
107
|
+
|
|
108
|
+
### `get_latest`
|
|
109
|
+
|
|
110
|
+
Get the most recently added content.
|
|
111
|
+
|
|
112
|
+
| Parameter | Type | Required | Description |
|
|
113
|
+
|-----------|------|----------|-------------|
|
|
114
|
+
| `limit` | number | No | Max results, 1-20 (default: 5) |
|
|
58
115
|
|
|
59
116
|
### `list_topics`
|
|
60
117
|
|
|
61
|
-
|
|
118
|
+
Browse all topics in the knowledge base with content types and section headings.
|
|
62
119
|
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
120
|
+
### `register`
|
|
121
|
+
|
|
122
|
+
Register your email to unlock 500 queries/day (up from 50).
|
|
123
|
+
|
|
124
|
+
| Parameter | Type | Required | Description |
|
|
125
|
+
|-----------|------|----------|-------------|
|
|
126
|
+
| `email` | string | Yes | Your email address |
|
|
127
|
+
|
|
128
|
+
## Content Types
|
|
129
|
+
|
|
130
|
+
| Type | Count | Description |
|
|
131
|
+
|------|-------|-------------|
|
|
132
|
+
| article | 29 | Informational content about MCP, local AI, automation |
|
|
133
|
+
| project | 9 | ASTGL project documentation (KlockThingy, Revri, Cortex, etc.) |
|
|
134
|
+
| tutorial | 8 | Step-by-step how-to guides |
|
|
135
|
+
| comparison | 2 | Side-by-side topic analysis |
|
|
136
|
+
| guide | 1 | Comprehensive reference material |
|
|
137
|
+
| newsletter | — | Personal updates and announcements |
|
|
138
|
+
| faq | — | Primarily Q&A content |
|
|
139
|
+
|
|
140
|
+
## Rate Limits
|
|
141
|
+
|
|
142
|
+
| Tier | Limit | How to Get |
|
|
143
|
+
|------|-------|------------|
|
|
144
|
+
| Public | 50 queries/day | Default (anonymous) |
|
|
145
|
+
| Registered | 500 queries/day | Use the `register` tool with your email |
|
|
146
|
+
|
|
147
|
+
Limits reset at midnight UTC. Rate limit info is included in every response.
|
|
67
148
|
|
|
68
149
|
## How It Works
|
|
69
150
|
|
|
70
|
-
The knowledge base is pre-built from ASTGL articles using semantic embeddings (nomic-embed-text, 768 dimensions).
|
|
151
|
+
The knowledge base is pre-built from ASTGL articles using semantic embeddings (nomic-embed-text, 768 dimensions). Content is chunked by section and FAQ entry, embedded, and stored in a SQLite database with sqlite-vec for vector similarity search.
|
|
152
|
+
|
|
153
|
+
**End users don't need Ollama** — all embeddings are pre-computed and shipped in the npm package. The only runtime requirement is Node.js.
|
|
154
|
+
|
|
155
|
+
### Performance
|
|
71
156
|
|
|
72
|
-
|
|
157
|
+
- Typical response time: 100-500ms (embedding lookup + vector search)
|
|
158
|
+
- Embedding results are cached in memory (LRU, 200 entries) — repeated queries are near-instant
|
|
159
|
+
- Ollama calls include 10s timeout + automatic retry
|
|
160
|
+
- Query logging is async/batched to avoid blocking responses
|
|
161
|
+
- Rate limit checks are cached for 5 seconds
|
|
73
162
|
|
|
74
163
|
## For Maintainers
|
|
75
164
|
|
|
76
|
-
|
|
165
|
+
### Setup
|
|
77
166
|
|
|
78
167
|
```bash
|
|
79
|
-
git clone https://github.com/
|
|
168
|
+
git clone https://github.com/Jmeg8r/mcp-astgl-knowledge.git
|
|
80
169
|
cd mcp-astgl-knowledge
|
|
81
170
|
npm install
|
|
82
|
-
npm run ingest # Requires Ollama with nomic-embed-text
|
|
83
|
-
npm run build
|
|
84
171
|
```
|
|
85
172
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
-
|
|
173
|
+
### Scripts
|
|
174
|
+
|
|
175
|
+
| Script | Description |
|
|
176
|
+
|--------|-------------|
|
|
177
|
+
| `npm run build` | Compile TypeScript |
|
|
178
|
+
| `npm run dev` | Run MCP server in dev mode (tsx) |
|
|
179
|
+
| `npm start` | Run compiled MCP server |
|
|
180
|
+
| `npm run ingest` | Rebuild knowledge.db from local markdown (requires Ollama) |
|
|
181
|
+
| `npm run ingest-projects` | Index project docs from astgl-site projects.json |
|
|
182
|
+
| `npm run discover` | Poll RSS/sitemap for new content |
|
|
183
|
+
| `npm run structure` | Process discovered content (classify, embed, index) |
|
|
184
|
+
| `npm run pipeline` | Discover + structure in one step |
|
|
185
|
+
| `npm run daily-report` | Generate AEO analytics report |
|
|
186
|
+
| `npm run alerts` | Run content gap alert checks |
|
|
187
|
+
| `npm run freshness` | Check for stale content and ecosystem version changes |
|
|
188
|
+
| `npm run citation-test` | Manual AI citation testing |
|
|
189
|
+
| `npm run related` | Generate internal article links via vector similarity |
|
|
190
|
+
|
|
191
|
+
### Environment Variables
|
|
192
|
+
|
|
193
|
+
| Variable | Default | Description |
|
|
194
|
+
|----------|---------|-------------|
|
|
195
|
+
| `OLLAMA_URL` | `http://localhost:11434` | Ollama endpoint (dev/rebuild only) |
|
|
196
|
+
| `EMBED_MODEL` | `nomic-embed-text` | Embedding model |
|
|
197
|
+
| `DISCORD_WEBHOOK_URL` | — | Discord webhook for reports/alerts |
|
|
198
|
+
| `ASTGL_API_KEY` | — | Registered tier API key |
|
|
199
|
+
| `ASTGL_ARTICLES_DIR` | `~/Projects/astgl-site/src/content/answers` | Local markdown source |
|
|
200
|
+
| `ASTGL_PROJECTS_JSON` | `~/Projects/astgl-site/src/data/projects.json` | Projects data source |
|
|
201
|
+
|
|
202
|
+
### Automated Jobs
|
|
203
|
+
|
|
204
|
+
| Job | Schedule | Purpose |
|
|
205
|
+
|-----|----------|---------|
|
|
206
|
+
| Content pipeline | Every 6h | Discover + structure new content |
|
|
207
|
+
| Daily report | 8 AM | Query analytics + health metrics → Discord |
|
|
208
|
+
| Content alerts | 9 AM | Gap detection, zero-citation, competitor scan → Discord |
|
|
209
|
+
| Freshness check | 10 AM | Stale content + ecosystem version tracking → Discord |
|
|
102
210
|
|
|
103
211
|
## License
|
|
104
212
|
|
package/data/knowledge.db
CHANGED
|
Binary file
|
package/dist/alerts.d.ts
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#!/usr/bin/env tsx
|
|
2
|
+
/**
|
|
3
|
+
* Content gap alert engine.
|
|
4
|
+
*
|
|
5
|
+
* WHAT: Auto-detects AEO problems and sends Discord alerts when thresholds are crossed
|
|
6
|
+
* WHY: Proactive alerting catches citation drops, content gaps, and competitor moves
|
|
7
|
+
* before they become invisible losses
|
|
8
|
+
*
|
|
9
|
+
* Alert types:
|
|
10
|
+
* 1. Unknown topic spike — many queries with low confidence in a short window
|
|
11
|
+
* 2. Zero-citation articles — high-value articles never appearing in cited content
|
|
12
|
+
* 3. Repeated low-confidence — same query failing multiple times (content gap signal)
|
|
13
|
+
* 4. Competitor MCP servers — new servers on registries covering similar topics
|
|
14
|
+
*
|
|
15
|
+
* Usage:
|
|
16
|
+
* npm run alerts Check all alerts, print to stdout (JSON)
|
|
17
|
+
* npm run alerts -- --discord Also send triggered alerts to Discord
|
|
18
|
+
* npm run alerts -- --days 7 Look back 7 days (default: 1)
|
|
19
|
+
*
|
|
20
|
+
* Env: DISCORD_WEBHOOK_URL — Discord webhook for alert delivery
|
|
21
|
+
*/
|
|
22
|
+
export {};
|
package/dist/alerts.js
ADDED
|
@@ -0,0 +1,433 @@
|
|
|
1
|
+
#!/usr/bin/env tsx
|
|
2
|
+
/**
|
|
3
|
+
* Content gap alert engine.
|
|
4
|
+
*
|
|
5
|
+
* WHAT: Auto-detects AEO problems and sends Discord alerts when thresholds are crossed
|
|
6
|
+
* WHY: Proactive alerting catches citation drops, content gaps, and competitor moves
|
|
7
|
+
* before they become invisible losses
|
|
8
|
+
*
|
|
9
|
+
* Alert types:
|
|
10
|
+
* 1. Unknown topic spike — many queries with low confidence in a short window
|
|
11
|
+
* 2. Zero-citation articles — high-value articles never appearing in cited content
|
|
12
|
+
* 3. Repeated low-confidence — same query failing multiple times (content gap signal)
|
|
13
|
+
* 4. Competitor MCP servers — new servers on registries covering similar topics
|
|
14
|
+
*
|
|
15
|
+
* Usage:
|
|
16
|
+
* npm run alerts Check all alerts, print to stdout (JSON)
|
|
17
|
+
* npm run alerts -- --discord Also send triggered alerts to Discord
|
|
18
|
+
* npm run alerts -- --days 7 Look back 7 days (default: 1)
|
|
19
|
+
*
|
|
20
|
+
* Env: DISCORD_WEBHOOK_URL — Discord webhook for alert delivery
|
|
21
|
+
*/
|
|
22
|
+
import { join } from "path";
|
|
23
|
+
import { existsSync, mkdirSync } from "fs";
|
|
24
|
+
import Database from "better-sqlite3";
|
|
25
|
+
const DATA_DIR = join(import.meta.dirname, "..", "data");
|
|
26
|
+
const QUERY_LOG_PATH = join(DATA_DIR, "query-log.db");
|
|
27
|
+
const KNOWLEDGE_PATH = join(DATA_DIR, "knowledge.db");
|
|
28
|
+
const ALERT_DB_PATH = join(DATA_DIR, "alerts.db");
|
|
29
|
+
const DISCORD_WEBHOOK_URL = process.env.DISCORD_WEBHOOK_URL || "";
|
|
30
|
+
// --- Thresholds ---
|
|
31
|
+
const LOW_CONFIDENCE_THRESHOLD = 0.5;
|
|
32
|
+
const UNKNOWN_TOPIC_SPIKE_MIN = 5;
|
|
33
|
+
const REPEATED_QUERY_MIN = 3;
|
|
34
|
+
const ZERO_CITATION_LOOKBACK_DAYS = 14;
|
|
35
|
+
const ALERT_COOLDOWN_HOURS = 24;
|
|
36
|
+
// --- Alert History DB ---
|
|
37
|
+
// WHAT: Track which alerts have been sent to avoid spamming Discord
|
|
38
|
+
// WHY: Same alert condition can persist for days — only notify once per cooldown window
|
|
39
|
+
function initAlertDb() {
|
|
40
|
+
if (!existsSync(DATA_DIR))
|
|
41
|
+
mkdirSync(DATA_DIR, { recursive: true });
|
|
42
|
+
const db = new Database(ALERT_DB_PATH);
|
|
43
|
+
db.exec(`
|
|
44
|
+
CREATE TABLE IF NOT EXISTS alert_history (
|
|
45
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
46
|
+
alert_type TEXT NOT NULL,
|
|
47
|
+
alert_key TEXT NOT NULL,
|
|
48
|
+
fired_at TEXT NOT NULL,
|
|
49
|
+
details TEXT,
|
|
50
|
+
UNIQUE(alert_type, alert_key, fired_at)
|
|
51
|
+
)
|
|
52
|
+
`);
|
|
53
|
+
db.exec("CREATE INDEX IF NOT EXISTS idx_alert_history_key ON alert_history(alert_type, alert_key)");
|
|
54
|
+
return db;
|
|
55
|
+
}
|
|
56
|
+
function wasRecentlyFired(alertDb, type, key) {
|
|
57
|
+
const cutoff = new Date();
|
|
58
|
+
cutoff.setHours(cutoff.getHours() - ALERT_COOLDOWN_HOURS);
|
|
59
|
+
const row = alertDb
|
|
60
|
+
.prepare(`SELECT id FROM alert_history
|
|
61
|
+
WHERE alert_type = ? AND alert_key = ? AND fired_at > ?
|
|
62
|
+
LIMIT 1`)
|
|
63
|
+
.get(type, key, cutoff.toISOString());
|
|
64
|
+
return !!row;
|
|
65
|
+
}
|
|
66
|
+
function recordAlert(alertDb, alert, key) {
|
|
67
|
+
alertDb
|
|
68
|
+
.prepare("INSERT OR IGNORE INTO alert_history (alert_type, alert_key, fired_at, details) VALUES (?, ?, ?, ?)")
|
|
69
|
+
.run(alert.type, key, new Date().toISOString(), alert.title);
|
|
70
|
+
}
|
|
71
|
+
// --- Alert Check #1: Unknown Topic Spike ---
|
|
72
|
+
// WHAT: Detect when many queries have very low confidence scores
|
|
73
|
+
// WHY: Indicates users are asking about topics we don't cover — content gap opportunity
|
|
74
|
+
function checkUnknownTopicSpike(fromDate, alertDb) {
|
|
75
|
+
if (!existsSync(QUERY_LOG_PATH))
|
|
76
|
+
return [];
|
|
77
|
+
const db = new Database(QUERY_LOG_PATH, { readonly: true });
|
|
78
|
+
const tableCheck = db
|
|
79
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='query_log'")
|
|
80
|
+
.get();
|
|
81
|
+
if (!tableCheck) {
|
|
82
|
+
db.close();
|
|
83
|
+
return [];
|
|
84
|
+
}
|
|
85
|
+
const lowConfQueries = db
|
|
86
|
+
.prepare(`SELECT query_params, confidence_score, tool_name, COUNT(*) as occurrences
|
|
87
|
+
FROM query_log
|
|
88
|
+
WHERE timestamp >= ? AND confidence_score IS NOT NULL AND confidence_score < ?
|
|
89
|
+
GROUP BY query_params
|
|
90
|
+
ORDER BY occurrences DESC`)
|
|
91
|
+
.all(fromDate, LOW_CONFIDENCE_THRESHOLD);
|
|
92
|
+
db.close();
|
|
93
|
+
if (lowConfQueries.length < UNKNOWN_TOPIC_SPIKE_MIN)
|
|
94
|
+
return [];
|
|
95
|
+
const key = `spike-${lowConfQueries.length}-${fromDate.split("T")[0]}`;
|
|
96
|
+
if (wasRecentlyFired(alertDb, "unknown_topic_spike", key))
|
|
97
|
+
return [];
|
|
98
|
+
const topQueries = lowConfQueries.slice(0, 5).map((q) => {
|
|
99
|
+
try {
|
|
100
|
+
const parsed = JSON.parse(q.query_params);
|
|
101
|
+
return parsed.query || parsed.question || q.query_params;
|
|
102
|
+
}
|
|
103
|
+
catch {
|
|
104
|
+
return q.query_params;
|
|
105
|
+
}
|
|
106
|
+
});
|
|
107
|
+
const alert = {
|
|
108
|
+
type: "unknown_topic_spike",
|
|
109
|
+
severity: lowConfQueries.length >= 10 ? "critical" : "warning",
|
|
110
|
+
title: `Unknown topic spike: ${lowConfQueries.length} low-confidence queries`,
|
|
111
|
+
details: [
|
|
112
|
+
`${lowConfQueries.length} unique queries scored below ${LOW_CONFIDENCE_THRESHOLD} confidence.`,
|
|
113
|
+
"Top queries:",
|
|
114
|
+
...topQueries.map((q, i) => ` ${i + 1}. "${q}"`),
|
|
115
|
+
"",
|
|
116
|
+
"Action: Review these topics for new article opportunities.",
|
|
117
|
+
].join("\n"),
|
|
118
|
+
data: {
|
|
119
|
+
count: lowConfQueries.length,
|
|
120
|
+
threshold: LOW_CONFIDENCE_THRESHOLD,
|
|
121
|
+
top_queries: topQueries,
|
|
122
|
+
},
|
|
123
|
+
};
|
|
124
|
+
recordAlert(alertDb, alert, key);
|
|
125
|
+
return [alert];
|
|
126
|
+
}
|
|
127
|
+
// --- Alert Check #2: Zero-Citation Articles ---
|
|
128
|
+
// WHAT: Find articles that exist in the knowledge base but are never cited in query results
|
|
129
|
+
// WHY: Content that's never surfaced is invisible — may need better embeddings or rewriting
|
|
130
|
+
function checkZeroCitationArticles(fromDate, alertDb) {
|
|
131
|
+
if (!existsSync(QUERY_LOG_PATH) || !existsSync(KNOWLEDGE_PATH))
|
|
132
|
+
return [];
|
|
133
|
+
const knowledgeDb = new Database(KNOWLEDGE_PATH, { readonly: true });
|
|
134
|
+
const allArticles = knowledgeDb
|
|
135
|
+
.prepare("SELECT title, url FROM articles")
|
|
136
|
+
.all();
|
|
137
|
+
knowledgeDb.close();
|
|
138
|
+
const logDb = new Database(QUERY_LOG_PATH, { readonly: true });
|
|
139
|
+
const tableCheck = logDb
|
|
140
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='query_log'")
|
|
141
|
+
.get();
|
|
142
|
+
if (!tableCheck) {
|
|
143
|
+
logDb.close();
|
|
144
|
+
return [];
|
|
145
|
+
}
|
|
146
|
+
// WHAT: Collect all URLs that appeared in content_cited over the lookback window
|
|
147
|
+
// WHY: If an article URL never shows up, it's either irrelevant or poorly embedded
|
|
148
|
+
const lookback = new Date();
|
|
149
|
+
lookback.setDate(lookback.getDate() - ZERO_CITATION_LOOKBACK_DAYS);
|
|
150
|
+
const citedRows = logDb
|
|
151
|
+
.prepare(`SELECT content_cited FROM query_log
|
|
152
|
+
WHERE timestamp >= ? AND content_cited IS NOT NULL`)
|
|
153
|
+
.all(lookback.toISOString());
|
|
154
|
+
logDb.close();
|
|
155
|
+
// Need at least some query activity to make this check meaningful
|
|
156
|
+
if (citedRows.length < 10)
|
|
157
|
+
return [];
|
|
158
|
+
const citedUrls = new Set();
|
|
159
|
+
for (const row of citedRows) {
|
|
160
|
+
try {
|
|
161
|
+
const urls = JSON.parse(row.content_cited);
|
|
162
|
+
for (const url of urls)
|
|
163
|
+
citedUrls.add(url);
|
|
164
|
+
}
|
|
165
|
+
catch {
|
|
166
|
+
// Skip malformed
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
// WHAT: Filter to astgl.ai articles only (these are the "high-value" ones we control)
|
|
170
|
+
// WHY: Substack mirror URLs being uncited is expected — the canonical ones matter
|
|
171
|
+
const uncitedHighValue = allArticles.filter((a) => a.url.startsWith("https://astgl.ai/") && !citedUrls.has(a.url));
|
|
172
|
+
if (uncitedHighValue.length === 0)
|
|
173
|
+
return [];
|
|
174
|
+
const key = `uncited-${uncitedHighValue.length}`;
|
|
175
|
+
if (wasRecentlyFired(alertDb, "zero_citation", key))
|
|
176
|
+
return [];
|
|
177
|
+
const alert = {
|
|
178
|
+
type: "zero_citation",
|
|
179
|
+
severity: uncitedHighValue.length >= 10 ? "warning" : "info",
|
|
180
|
+
title: `${uncitedHighValue.length} high-value articles with zero citations (${ZERO_CITATION_LOOKBACK_DAYS}d)`,
|
|
181
|
+
details: [
|
|
182
|
+
`These astgl.ai articles were never cited in the last ${ZERO_CITATION_LOOKBACK_DAYS} days:`,
|
|
183
|
+
...uncitedHighValue.slice(0, 10).map((a) => ` - ${a.title}\n ${a.url}`),
|
|
184
|
+
uncitedHighValue.length > 10 ? ` ... and ${uncitedHighValue.length - 10} more` : "",
|
|
185
|
+
"",
|
|
186
|
+
"Action: Check embeddings quality, consider rewriting descriptions, or verify chunking.",
|
|
187
|
+
].join("\n"),
|
|
188
|
+
data: {
|
|
189
|
+
count: uncitedHighValue.length,
|
|
190
|
+
lookback_days: ZERO_CITATION_LOOKBACK_DAYS,
|
|
191
|
+
articles: uncitedHighValue.slice(0, 10).map((a) => ({ title: a.title, url: a.url })),
|
|
192
|
+
},
|
|
193
|
+
};
|
|
194
|
+
recordAlert(alertDb, alert, key);
|
|
195
|
+
return [alert];
|
|
196
|
+
}
|
|
197
|
+
// --- Alert Check #3: Repeated Low-Confidence Queries ---
|
|
198
|
+
// WHAT: Same query text appearing multiple times with consistently low confidence
|
|
199
|
+
// WHY: Repeated failures = a real user need we're not serving — highest-signal content gap
|
|
200
|
+
function checkRepeatedLowConfidence(fromDate, alertDb) {
|
|
201
|
+
if (!existsSync(QUERY_LOG_PATH))
|
|
202
|
+
return [];
|
|
203
|
+
const db = new Database(QUERY_LOG_PATH, { readonly: true });
|
|
204
|
+
const tableCheck = db
|
|
205
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='query_log'")
|
|
206
|
+
.get();
|
|
207
|
+
if (!tableCheck) {
|
|
208
|
+
db.close();
|
|
209
|
+
return [];
|
|
210
|
+
}
|
|
211
|
+
const repeats = db
|
|
212
|
+
.prepare(`SELECT query_params, tool_name,
|
|
213
|
+
COUNT(*) as occurrences,
|
|
214
|
+
ROUND(AVG(confidence_score), 3) as avg_confidence
|
|
215
|
+
FROM query_log
|
|
216
|
+
WHERE timestamp >= ?
|
|
217
|
+
AND confidence_score IS NOT NULL
|
|
218
|
+
AND confidence_score < ?
|
|
219
|
+
GROUP BY query_params
|
|
220
|
+
HAVING COUNT(*) >= ?
|
|
221
|
+
ORDER BY occurrences DESC`)
|
|
222
|
+
.all(fromDate, LOW_CONFIDENCE_THRESHOLD, REPEATED_QUERY_MIN);
|
|
223
|
+
db.close();
|
|
224
|
+
if (repeats.length === 0)
|
|
225
|
+
return [];
|
|
226
|
+
const alerts = [];
|
|
227
|
+
for (const r of repeats.slice(0, 5)) {
|
|
228
|
+
let queryText = r.query_params;
|
|
229
|
+
try {
|
|
230
|
+
const parsed = JSON.parse(r.query_params);
|
|
231
|
+
queryText = parsed.query || parsed.question || r.query_params;
|
|
232
|
+
}
|
|
233
|
+
catch {
|
|
234
|
+
// Use raw
|
|
235
|
+
}
|
|
236
|
+
const key = `repeat-${queryText.slice(0, 50)}`;
|
|
237
|
+
if (wasRecentlyFired(alertDb, "repeated_low_confidence", key))
|
|
238
|
+
continue;
|
|
239
|
+
const alert = {
|
|
240
|
+
type: "repeated_low_confidence",
|
|
241
|
+
severity: r.occurrences >= 5 ? "critical" : "warning",
|
|
242
|
+
title: `Repeated content gap: "${queryText}" (${r.occurrences}x, avg ${r.avg_confidence})`,
|
|
243
|
+
details: [
|
|
244
|
+
`Query "${queryText}" has been asked ${r.occurrences} times with avg confidence ${r.avg_confidence}.`,
|
|
245
|
+
`Tool: ${r.tool_name}`,
|
|
246
|
+
"",
|
|
247
|
+
"Action: Write or improve content targeting this specific question.",
|
|
248
|
+
].join("\n"),
|
|
249
|
+
data: {
|
|
250
|
+
query: queryText,
|
|
251
|
+
occurrences: r.occurrences,
|
|
252
|
+
avg_confidence: r.avg_confidence,
|
|
253
|
+
tool: r.tool_name,
|
|
254
|
+
},
|
|
255
|
+
};
|
|
256
|
+
recordAlert(alertDb, alert, key);
|
|
257
|
+
alerts.push(alert);
|
|
258
|
+
}
|
|
259
|
+
return alerts;
|
|
260
|
+
}
|
|
261
|
+
// --- Alert Check #4: Competitor MCP Servers ---
|
|
262
|
+
// WHAT: Scan Smithery registry for new MCP servers covering similar topics
|
|
263
|
+
// WHY: Competitors publishing knowledge MCP servers dilutes ASTGL's AEO position
|
|
264
|
+
async function checkCompetitorServers(alertDb) {
|
|
265
|
+
const SEARCH_TERMS = ["knowledge base", "ai articles", "local ai", "mcp guide"];
|
|
266
|
+
const OUR_SLUGS = ["astgl-knowledge", "mcp-astgl-knowledge"];
|
|
267
|
+
const alerts = [];
|
|
268
|
+
for (const term of SEARCH_TERMS) {
|
|
269
|
+
try {
|
|
270
|
+
// WHAT: Smithery registry search API
|
|
271
|
+
// WHY: Public API, no auth needed, returns server metadata
|
|
272
|
+
const resp = await fetch(`https://registry.smithery.ai/servers?q=${encodeURIComponent(term)}&pageSize=10`, {
|
|
273
|
+
headers: { Accept: "application/json" },
|
|
274
|
+
signal: AbortSignal.timeout(10_000),
|
|
275
|
+
});
|
|
276
|
+
if (!resp.ok)
|
|
277
|
+
continue;
|
|
278
|
+
const data = (await resp.json());
|
|
279
|
+
if (!data.servers)
|
|
280
|
+
continue;
|
|
281
|
+
for (const server of data.servers) {
|
|
282
|
+
// Skip our own server
|
|
283
|
+
if (OUR_SLUGS.some((slug) => server.qualifiedName.includes(slug)))
|
|
284
|
+
continue;
|
|
285
|
+
// WHAT: Only alert on servers created in the last 30 days
|
|
286
|
+
// WHY: Old servers aren't news — we want to catch new entrants
|
|
287
|
+
const createdAt = new Date(server.createdAt);
|
|
288
|
+
const thirtyDaysAgo = new Date();
|
|
289
|
+
thirtyDaysAgo.setDate(thirtyDaysAgo.getDate() - 30);
|
|
290
|
+
if (createdAt < thirtyDaysAgo)
|
|
291
|
+
continue;
|
|
292
|
+
const key = `competitor-${server.qualifiedName}`;
|
|
293
|
+
if (wasRecentlyFired(alertDb, "competitor_detected", key))
|
|
294
|
+
continue;
|
|
295
|
+
const alert = {
|
|
296
|
+
type: "competitor_detected",
|
|
297
|
+
severity: "info",
|
|
298
|
+
title: `New competitor MCP server: ${server.displayName}`,
|
|
299
|
+
details: [
|
|
300
|
+
`**Server:** ${server.qualifiedName}`,
|
|
301
|
+
`**Name:** ${server.displayName}`,
|
|
302
|
+
`**Description:** ${server.description?.slice(0, 200) || "N/A"}`,
|
|
303
|
+
`**Created:** ${server.createdAt}`,
|
|
304
|
+
`**Found via:** search for "${term}"`,
|
|
305
|
+
"",
|
|
306
|
+
"Action: Review server to assess overlap with ASTGL content coverage.",
|
|
307
|
+
].join("\n"),
|
|
308
|
+
data: {
|
|
309
|
+
qualified_name: server.qualifiedName,
|
|
310
|
+
display_name: server.displayName,
|
|
311
|
+
description: server.description,
|
|
312
|
+
search_term: term,
|
|
313
|
+
},
|
|
314
|
+
};
|
|
315
|
+
recordAlert(alertDb, alert, key);
|
|
316
|
+
alerts.push(alert);
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
catch {
|
|
320
|
+
// Network error — skip this search term silently
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
return alerts;
|
|
324
|
+
}
|
|
325
|
+
// --- Discord Delivery ---
|
|
326
|
+
function severityColor(severity) {
|
|
327
|
+
switch (severity) {
|
|
328
|
+
case "critical": return 0xff0000;
|
|
329
|
+
case "warning": return 0xffa500;
|
|
330
|
+
case "info": return 0x2196f3;
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
function severityEmoji(severity) {
|
|
334
|
+
switch (severity) {
|
|
335
|
+
case "critical": return "🔴";
|
|
336
|
+
case "warning": return "🟡";
|
|
337
|
+
case "info": return "🔵";
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
async function sendAlertsToDiscord(alerts) {
|
|
341
|
+
if (!DISCORD_WEBHOOK_URL) {
|
|
342
|
+
console.error("DISCORD_WEBHOOK_URL not set.");
|
|
343
|
+
return;
|
|
344
|
+
}
|
|
345
|
+
if (alerts.length === 0)
|
|
346
|
+
return;
|
|
347
|
+
// WHAT: Group alerts into a single Discord message with multiple embeds
|
|
348
|
+
// WHY: One webhook call is better than N separate messages
|
|
349
|
+
const embeds = alerts.slice(0, 10).map((alert) => ({
|
|
350
|
+
title: `${severityEmoji(alert.severity)} ${alert.title}`,
|
|
351
|
+
description: alert.details,
|
|
352
|
+
color: severityColor(alert.severity),
|
|
353
|
+
footer: { text: `Alert type: ${alert.type}` },
|
|
354
|
+
}));
|
|
355
|
+
const resp = await fetch(DISCORD_WEBHOOK_URL, {
|
|
356
|
+
method: "POST",
|
|
357
|
+
headers: { "Content-Type": "application/json" },
|
|
358
|
+
body: JSON.stringify({ embeds }),
|
|
359
|
+
});
|
|
360
|
+
if (!resp.ok) {
|
|
361
|
+
const body = await resp.text();
|
|
362
|
+
console.error(`Discord webhook failed: ${resp.status} ${body}`);
|
|
363
|
+
}
|
|
364
|
+
else {
|
|
365
|
+
console.error(`${alerts.length} alert(s) sent to Discord.`);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
// --- CLI ---
|
|
369
|
+
function parseArgs() {
|
|
370
|
+
const args = process.argv.slice(2);
|
|
371
|
+
return {
|
|
372
|
+
sendToDiscord: args.includes("--discord"),
|
|
373
|
+
days: (() => {
|
|
374
|
+
const idx = args.indexOf("--days");
|
|
375
|
+
return idx >= 0 && args[idx + 1] ? parseInt(args[idx + 1], 10) : 1;
|
|
376
|
+
})(),
|
|
377
|
+
};
|
|
378
|
+
}
|
|
379
|
+
async function main() {
|
|
380
|
+
const { sendToDiscord, days } = parseArgs();
|
|
381
|
+
const from = new Date();
|
|
382
|
+
from.setDate(from.getDate() - days);
|
|
383
|
+
const fromDate = from.toISOString();
|
|
384
|
+
const alertDb = initAlertDb();
|
|
385
|
+
const checksRun = [];
|
|
386
|
+
let suppressed = 0;
|
|
387
|
+
const allAlerts = [];
|
|
388
|
+
// Run all checks
|
|
389
|
+
console.error("=== ASTGL Content Gap Alert Engine ===\n");
|
|
390
|
+
console.error("Checking: unknown topic spike...");
|
|
391
|
+
checksRun.push("unknown_topic_spike");
|
|
392
|
+
const spikeAlerts = checkUnknownTopicSpike(fromDate, alertDb);
|
|
393
|
+
allAlerts.push(...spikeAlerts);
|
|
394
|
+
console.error(` ${spikeAlerts.length} alert(s)\n`);
|
|
395
|
+
console.error("Checking: zero-citation articles...");
|
|
396
|
+
checksRun.push("zero_citation");
|
|
397
|
+
const zeroCiteAlerts = checkZeroCitationArticles(fromDate, alertDb);
|
|
398
|
+
allAlerts.push(...zeroCiteAlerts);
|
|
399
|
+
console.error(` ${zeroCiteAlerts.length} alert(s)\n`);
|
|
400
|
+
console.error("Checking: repeated low-confidence queries...");
|
|
401
|
+
checksRun.push("repeated_low_confidence");
|
|
402
|
+
const repeatAlerts = checkRepeatedLowConfidence(fromDate, alertDb);
|
|
403
|
+
allAlerts.push(...repeatAlerts);
|
|
404
|
+
console.error(` ${repeatAlerts.length} alert(s)\n`);
|
|
405
|
+
console.error("Checking: competitor MCP servers...");
|
|
406
|
+
checksRun.push("competitor_detected");
|
|
407
|
+
const competitorAlerts = await checkCompetitorServers(alertDb);
|
|
408
|
+
allAlerts.push(...competitorAlerts);
|
|
409
|
+
console.error(` ${competitorAlerts.length} alert(s)\n`);
|
|
410
|
+
alertDb.close();
|
|
411
|
+
const report = {
|
|
412
|
+
generated_at: new Date().toISOString(),
|
|
413
|
+
period_days: days,
|
|
414
|
+
alerts_fired: allAlerts,
|
|
415
|
+
alerts_suppressed: suppressed,
|
|
416
|
+
checks_run: checksRun,
|
|
417
|
+
};
|
|
418
|
+
console.log(JSON.stringify(report, null, 2));
|
|
419
|
+
if (sendToDiscord && allAlerts.length > 0) {
|
|
420
|
+
await sendAlertsToDiscord(allAlerts);
|
|
421
|
+
}
|
|
422
|
+
else if (sendToDiscord && allAlerts.length === 0) {
|
|
423
|
+
console.error("No alerts to send.");
|
|
424
|
+
}
|
|
425
|
+
console.error(`\n=== Done: ${allAlerts.length} alert(s) fired, ${suppressed} suppressed ===`);
|
|
426
|
+
}
|
|
427
|
+
main()
|
|
428
|
+
.then(() => process.exit(0))
|
|
429
|
+
.catch((err) => {
|
|
430
|
+
console.error("Alert engine failed:", err);
|
|
431
|
+
process.exit(1);
|
|
432
|
+
});
|
|
433
|
+
//# sourceMappingURL=alerts.js.map
|