@syndash/research-vault-mcp 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +35 -0
- package/README.md +34 -7
- package/dist/server.js +1114 -323
- package/package.json +6 -5
- package/src/amplify.ts +32 -41
- package/src/evidence_metadata.ts +191 -0
- package/src/guidance.ts +57 -0
- package/src/ingest/html.ts +129 -19
- package/src/profile.ts +15 -0
- package/src/public_safety.ts +110 -0
- package/src/response.ts +73 -0
- package/src/server.ts +304 -108
- package/src/tool_policy.ts +58 -0
- package/src/types.ts +4 -3
- package/src/vault.ts +300 -75
- package/src/vault_get.ts +109 -0
- package/src/vault_write.ts +78 -112
package/package.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@syndash/research-vault-mcp",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.3",
|
|
4
4
|
"description": "Evensong Research Vault MCP module — local-first semantic search, note persistence, and knowledge-base tools for MCP-compatible agents.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
|
-
"research-vault-mcp": "
|
|
7
|
+
"research-vault-mcp": "bin/research-vault-mcp.mjs"
|
|
8
8
|
},
|
|
9
9
|
"scripts": {
|
|
10
10
|
"dev": "bun run src/server.ts",
|
|
@@ -14,12 +14,12 @@
|
|
|
14
14
|
},
|
|
15
15
|
"repository": {
|
|
16
16
|
"type": "git",
|
|
17
|
-
"url": "https://github.com/Fearvox/
|
|
17
|
+
"url": "git+https://github.com/Fearvox/dash-research-vault.git",
|
|
18
18
|
"directory": "packages/research-vault-mcp"
|
|
19
19
|
},
|
|
20
|
-
"homepage": "https://github.com/Fearvox/
|
|
20
|
+
"homepage": "https://github.com/Fearvox/dash-research-vault/tree/main/packages/research-vault-mcp",
|
|
21
21
|
"bugs": {
|
|
22
|
-
"url": "https://github.com/Fearvox/
|
|
22
|
+
"url": "https://github.com/Fearvox/dash-research-vault/issues"
|
|
23
23
|
},
|
|
24
24
|
"license": "Apache-2.0",
|
|
25
25
|
"publishConfig": {
|
|
@@ -30,6 +30,7 @@
|
|
|
30
30
|
"dist/**/*.js",
|
|
31
31
|
"bin/**/*.mjs",
|
|
32
32
|
"README.md",
|
|
33
|
+
"CHANGELOG.md",
|
|
33
34
|
"package.json"
|
|
34
35
|
],
|
|
35
36
|
"keywords": [
|
package/src/amplify.ts
CHANGED
|
@@ -112,57 +112,48 @@ export const amplifyTools = [
|
|
|
112
112
|
throw new Error(`HTTP ${res.status}: ${err}`)
|
|
113
113
|
}
|
|
114
114
|
|
|
115
|
-
// Read SSE stream
|
|
116
115
|
const reader = res.body?.getReader()
|
|
117
116
|
if (!reader) throw new Error('No response body')
|
|
118
117
|
|
|
119
|
-
let fullText = ''
|
|
120
118
|
const decoder = new TextDecoder()
|
|
119
|
+
let buffer = ''
|
|
120
|
+
let fullText = ''
|
|
121
121
|
|
|
122
|
-
|
|
123
|
-
const
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if (
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
122
|
+
const processEventBlock = (block: string) => {
|
|
123
|
+
for (const line of block.split('\n')) {
|
|
124
|
+
if (!line.startsWith('data: ')) continue
|
|
125
|
+
let parsed: any
|
|
126
|
+
try { parsed = JSON.parse(line.slice(6)) } catch { continue }
|
|
127
|
+
let textChunk = ''
|
|
128
|
+
if (parsed?.data?.content) {
|
|
129
|
+
textChunk = parsed.data.content
|
|
130
|
+
} else if (parsed?.data) {
|
|
131
|
+
textChunk = typeof parsed.data === 'string' ? parsed.data : JSON.stringify(parsed.data)
|
|
132
|
+
}
|
|
133
|
+
if (textChunk) {
|
|
134
|
+
fullText += textChunk
|
|
135
|
+
if (stream && onProgress) {
|
|
136
|
+
onProgress({ type: 'chunk', text: textChunk })
|
|
137
|
+
}
|
|
134
138
|
}
|
|
135
139
|
}
|
|
136
140
|
}
|
|
137
141
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
if (!reader2) throw new Error('No response body')
|
|
148
|
-
const decoder2 = new TextDecoder()
|
|
149
|
-
let buffer2 = ''
|
|
150
|
-
while (true) {
|
|
151
|
-
const { done, value } = await reader2.read()
|
|
152
|
-
if (done) break
|
|
153
|
-
buffer2 += decoder2.decode(value, { stream: true })
|
|
154
|
-
for (const line of buffer2.split('\n')) {
|
|
155
|
-
if (line.startsWith('data: ')) {
|
|
156
|
-
try {
|
|
157
|
-
const parsed = JSON.parse(line.slice(6))
|
|
158
|
-
if (parsed.data?.content) {
|
|
159
|
-
onProgress({ type: 'chunk', text: parsed.data.content })
|
|
160
|
-
}
|
|
161
|
-
} catch {}
|
|
162
|
-
}
|
|
163
|
-
}
|
|
142
|
+
while (true) {
|
|
143
|
+
const { done, value } = await reader.read()
|
|
144
|
+
if (done) break
|
|
145
|
+
buffer += decoder.decode(value, { stream: true })
|
|
146
|
+
let sep: number
|
|
147
|
+
while ((sep = buffer.indexOf('\n\n')) !== -1) {
|
|
148
|
+
const eventBlock = buffer.slice(0, sep)
|
|
149
|
+
buffer = buffer.slice(sep + 2)
|
|
150
|
+
processEventBlock(eventBlock)
|
|
164
151
|
}
|
|
165
|
-
|
|
152
|
+
}
|
|
153
|
+
buffer += decoder.decode()
|
|
154
|
+
if (buffer.length > 0) {
|
|
155
|
+
processEventBlock(buffer)
|
|
156
|
+
buffer = ''
|
|
166
157
|
}
|
|
167
158
|
|
|
168
159
|
return {
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import type { DecayScore, VaultEntry } from './types.ts'
|
|
2
|
+
|
|
3
|
+
export type FreshnessVerdict = 'PASS' | 'FLAG'
|
|
4
|
+
|
|
5
|
+
export interface FreshnessShape {
|
|
6
|
+
verdict: FreshnessVerdict
|
|
7
|
+
reason: string
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
const STALE_AFTER_DAYS = 7
|
|
11
|
+
const DAY_MS = 24 * 60 * 60 * 1000
|
|
12
|
+
|
|
13
|
+
function clamp(value: number, min: number, max: number): number {
|
|
14
|
+
return Math.min(max, Math.max(min, value))
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
function lower(value: string | undefined | null): string {
|
|
18
|
+
return (value ?? '').toLowerCase()
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function queryTerms(query?: string): string[] {
|
|
22
|
+
return lower(query)
|
|
23
|
+
.split(/\s+/)
|
|
24
|
+
.map(term => term.trim())
|
|
25
|
+
.filter(Boolean)
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function includesQuery(value: string | undefined, query?: string): boolean {
|
|
29
|
+
const terms = queryTerms(query)
|
|
30
|
+
if (terms.length === 0) return false
|
|
31
|
+
const haystack = lower(value)
|
|
32
|
+
return terms.some(term => haystack.includes(term))
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function matchedFields(entry: VaultEntry & { content?: string }, query?: string): string[] {
|
|
36
|
+
if (!query?.trim()) return []
|
|
37
|
+
|
|
38
|
+
const candidates: Array<[string, string | undefined]> = [
|
|
39
|
+
['title', entry.title],
|
|
40
|
+
['content', entry.content],
|
|
41
|
+
['id', entry.id],
|
|
42
|
+
['category', entry.category],
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
return candidates
|
|
46
|
+
.filter(([, value]) => includesQuery(value, query))
|
|
47
|
+
.map(([field]) => field)
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function whyMatched(entry: VaultEntry & { content?: string }, query: string | undefined, fields: string[]): string {
|
|
51
|
+
if (!query?.trim()) return 'No query provided; result is included by category or default listing.'
|
|
52
|
+
if (fields.length === 0) return 'Result is included after filters; no direct field match was detected.'
|
|
53
|
+
|
|
54
|
+
const labels = fields.map(field => {
|
|
55
|
+
if (field === 'title') return 'title'
|
|
56
|
+
if (field === 'content') return 'note content'
|
|
57
|
+
if (field === 'category') return 'category'
|
|
58
|
+
return field
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
return `Matched query "${query}" in ${labels.join(', ')}.`
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function snippetFromContent(content: string, query?: string, maxChars = 240): string {
|
|
65
|
+
const limit = Math.max(0, Math.floor(maxChars))
|
|
66
|
+
if (limit === 0) return ''
|
|
67
|
+
|
|
68
|
+
const normalized = content.replace(/\s+/g, ' ').trim()
|
|
69
|
+
if (normalized.length <= limit) return normalized
|
|
70
|
+
|
|
71
|
+
const terms = queryTerms(query)
|
|
72
|
+
const lowerContent = lower(normalized)
|
|
73
|
+
const hitIndex = terms
|
|
74
|
+
.map(term => lowerContent.indexOf(term))
|
|
75
|
+
.filter(index => index >= 0)
|
|
76
|
+
.sort((a, b) => a - b)[0]
|
|
77
|
+
|
|
78
|
+
if (hitIndex === undefined) return normalized.slice(0, limit).trimEnd()
|
|
79
|
+
|
|
80
|
+
const halfWindow = Math.floor(limit / 2)
|
|
81
|
+
const start = Math.max(0, Math.min(hitIndex - halfWindow, normalized.length - limit))
|
|
82
|
+
const end = Math.min(normalized.length, start + limit)
|
|
83
|
+
const prefix = start > 0 ? '...' : ''
|
|
84
|
+
const suffix = end < normalized.length ? '...' : ''
|
|
85
|
+
const available = Math.max(0, limit - prefix.length - suffix.length)
|
|
86
|
+
return `${prefix}${normalized.slice(start, start + available).trim()}${suffix}`
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export function staleVerdict(lastAnalyzedAt?: string | null): FreshnessShape {
|
|
90
|
+
if (!lastAnalyzedAt) {
|
|
91
|
+
return { verdict: 'FLAG', reason: 'No analysis timestamp was provided.' }
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const timestamp = Date.parse(lastAnalyzedAt)
|
|
95
|
+
if (Number.isNaN(timestamp)) {
|
|
96
|
+
return { verdict: 'FLAG', reason: 'Analysis timestamp could not be parsed.' }
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
const ageDays = Math.floor((Date.now() - timestamp) / DAY_MS)
|
|
100
|
+
if (ageDays > STALE_AFTER_DAYS) {
|
|
101
|
+
return { verdict: 'FLAG', reason: `Analysis is ${ageDays} days old.` }
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return { verdict: 'PASS', reason: 'Analysis is fresh enough for the read surface.' }
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export function itemFreshness(entry: VaultEntry & { score?: DecayScore & { lastAnalyzedAt?: string } }) {
|
|
108
|
+
const lastAnalyzedAt = entry.score?.lastAnalyzedAt ?? null
|
|
109
|
+
const verdict = staleVerdict(lastAnalyzedAt)
|
|
110
|
+
|
|
111
|
+
return {
|
|
112
|
+
last_analyzed_at: lastAnalyzedAt,
|
|
113
|
+
source_mtime: entry.modified || null,
|
|
114
|
+
freshness_verdict: verdict.verdict,
|
|
115
|
+
freshness_reason: verdict.reason,
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export function queueFreshness(queueItems: Array<{ source_mtime?: string | null }>) {
|
|
120
|
+
const timestamps = queueItems
|
|
121
|
+
.map(item => item.source_mtime ? Date.parse(item.source_mtime) : NaN)
|
|
122
|
+
.filter(timestamp => !Number.isNaN(timestamp))
|
|
123
|
+
|
|
124
|
+
if (timestamps.length === 0) {
|
|
125
|
+
return {
|
|
126
|
+
oldest_pending_age: null as number | null,
|
|
127
|
+
oldest_pending_at: null as string | null,
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const oldest = Math.min(...timestamps)
|
|
132
|
+
return {
|
|
133
|
+
oldest_pending_age: Math.max(0, Math.floor((Date.now() - oldest) / DAY_MS)),
|
|
134
|
+
oldest_pending_at: new Date(oldest).toISOString(),
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export function coverageMetadata(statusData: {
|
|
139
|
+
total: number
|
|
140
|
+
analyzed: number
|
|
141
|
+
scores?: Array<DecayScore & { lastAnalyzedAt?: string }>
|
|
142
|
+
queueItems?: Array<{ source_mtime?: string | null }>
|
|
143
|
+
}) {
|
|
144
|
+
const analyzedCoverage = statusData.total === 0
|
|
145
|
+
? 0
|
|
146
|
+
: Number(clamp(statusData.analyzed / statusData.total, 0, 1).toFixed(4))
|
|
147
|
+
const analyzedAt = (statusData.scores ?? [])
|
|
148
|
+
.map(score => score.lastAnalyzedAt)
|
|
149
|
+
.filter((value): value is string => Boolean(value))
|
|
150
|
+
.sort()
|
|
151
|
+
.at(-1) ?? null
|
|
152
|
+
const recentThroughput = (statusData.scores ?? []).filter(score => {
|
|
153
|
+
if (!score.lastAnalyzedAt) return false
|
|
154
|
+
const timestamp = Date.parse(score.lastAnalyzedAt)
|
|
155
|
+
return !Number.isNaN(timestamp) && Date.now() - timestamp <= STALE_AFTER_DAYS * DAY_MS
|
|
156
|
+
}).length
|
|
157
|
+
|
|
158
|
+
return {
|
|
159
|
+
as_of: new Date().toISOString(),
|
|
160
|
+
last_analyzed_at: analyzedAt,
|
|
161
|
+
analyzed_coverage: analyzedCoverage,
|
|
162
|
+
oldest_pending_age: queueFreshness(statusData.queueItems ?? []).oldest_pending_age,
|
|
163
|
+
recent_throughput: recentThroughput,
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export function releaseMetadata(
|
|
168
|
+
env: Pick<NodeJS.ProcessEnv, 'RESEARCH_VAULT_NPM_LATEST_VERSION' | 'RESEARCH_VAULT_NPM_MODIFIED_AT' | 'RESEARCH_VAULT_PUBLIC_REPO_URL'>,
|
|
169
|
+
packageJson: { name?: string; version?: string },
|
|
170
|
+
) {
|
|
171
|
+
const npmLatestVersion = env.RESEARCH_VAULT_NPM_LATEST_VERSION ?? null
|
|
172
|
+
const npmModifiedAt = env.RESEARCH_VAULT_NPM_MODIFIED_AT ?? null
|
|
173
|
+
const publicRepo = env.RESEARCH_VAULT_PUBLIC_REPO_URL ?? null
|
|
174
|
+
const modifiedVerdict = staleVerdict(npmModifiedAt)
|
|
175
|
+
const provided = Boolean(npmLatestVersion && npmModifiedAt && publicRepo)
|
|
176
|
+
|
|
177
|
+
return {
|
|
178
|
+
package_name: packageJson.name ?? null,
|
|
179
|
+
local_version: packageJson.version ?? null,
|
|
180
|
+
npm_latest_version: npmLatestVersion,
|
|
181
|
+
npm_modified_at: npmModifiedAt,
|
|
182
|
+
days_since_npm_update: npmModifiedAt && !Number.isNaN(Date.parse(npmModifiedAt))
|
|
183
|
+
? Math.max(0, Math.floor((Date.now() - Date.parse(npmModifiedAt)) / DAY_MS))
|
|
184
|
+
: null,
|
|
185
|
+
public_repo: publicRepo,
|
|
186
|
+
freshness_verdict: provided ? modifiedVerdict.verdict : 'FLAG' as FreshnessVerdict,
|
|
187
|
+
freshness_reason: provided
|
|
188
|
+
? modifiedVerdict.reason
|
|
189
|
+
: 'Release freshness was not provided by the runtime environment.',
|
|
190
|
+
}
|
|
191
|
+
}
|
package/src/guidance.ts
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type { McpProfile } from './profile.ts'
|
|
2
|
+
|
|
3
|
+
export type GuidanceVerdict = 'PASS' | 'FLAG' | 'BLOCK'
|
|
4
|
+
|
|
5
|
+
export interface AgentGuidance {
|
|
6
|
+
verdict: GuidanceVerdict
|
|
7
|
+
reason: string
|
|
8
|
+
next_step: string
|
|
9
|
+
recommended_tool?: string
|
|
10
|
+
retryable?: boolean
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function passGuidance(reason: string, next_step: string, recommended_tool?: string): AgentGuidance {
|
|
14
|
+
return {
|
|
15
|
+
verdict: 'PASS',
|
|
16
|
+
reason,
|
|
17
|
+
next_step,
|
|
18
|
+
recommended_tool,
|
|
19
|
+
retryable: false,
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function flagGuidance(reason: string, next_step: string, recommended_tool?: string): AgentGuidance {
|
|
24
|
+
return {
|
|
25
|
+
verdict: 'FLAG',
|
|
26
|
+
reason,
|
|
27
|
+
next_step,
|
|
28
|
+
recommended_tool,
|
|
29
|
+
retryable: true,
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function blockGuidance(reason: string, next_step: string, recommended_tool?: string): AgentGuidance {
|
|
34
|
+
return {
|
|
35
|
+
verdict: 'BLOCK',
|
|
36
|
+
reason,
|
|
37
|
+
next_step,
|
|
38
|
+
recommended_tool,
|
|
39
|
+
retryable: false,
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function readonlyBlockedGuidance(toolName: string, profile: McpProfile): AgentGuidance {
|
|
44
|
+
return blockGuidance(
|
|
45
|
+
`${toolName} is unavailable while Research Vault MCP is running in ${profile} profile.`,
|
|
46
|
+
'Use vault_search for readonly evidence, or switch to MCP_PROFILE=full for operator-approved non-destructive mutation in a private operator session.',
|
|
47
|
+
'vault_search',
|
|
48
|
+
)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export function adminBlockedGuidance(toolName: string, profile: McpProfile): AgentGuidance {
|
|
52
|
+
return blockGuidance(
|
|
53
|
+
`${toolName} is admin-only and unavailable while Research Vault MCP is running in ${profile} profile.`,
|
|
54
|
+
'Use vault_search for readonly evidence, or start a private admin operator session with MCP_PROFILE=admin; readonly/full profiles are insufficient for destructive/admin tools.',
|
|
55
|
+
'vault_search',
|
|
56
|
+
)
|
|
57
|
+
}
|
package/src/ingest/html.ts
CHANGED
|
@@ -1,10 +1,27 @@
|
|
|
1
1
|
// packages/research-vault-mcp/src/ingest/html.ts
|
|
2
2
|
|
|
3
|
+
import type { LookupAddress } from 'dns'
|
|
4
|
+
|
|
5
|
+
type DnsLookupFn = (hostname: string) => Promise<LookupAddress[]>
|
|
6
|
+
|
|
7
|
+
async function defaultLookup(hostname: string): Promise<LookupAddress[]> {
|
|
8
|
+
const { lookup } = await import('dns/promises')
|
|
9
|
+
return lookup(hostname, { all: true })
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
let dnsLookup: DnsLookupFn = defaultLookup
|
|
13
|
+
|
|
14
|
+
export function _setDnsLookup(fn: DnsLookupFn | null): void {
|
|
15
|
+
dnsLookup = fn ?? defaultLookup
|
|
16
|
+
}
|
|
17
|
+
|
|
3
18
|
/**
|
|
4
19
|
* Validate URL to prevent SSRF attacks.
|
|
5
|
-
* Blocks: private
|
|
20
|
+
* Blocks: private IPv4/IPv6 ranges, loopback, link-local, cloud metadata
|
|
21
|
+
* endpoints, invalid schemes, and forbidden hostname literals. DNS-backed
|
|
22
|
+
* hostname checks happen in validateHostDns(), which safeFetch calls per hop.
|
|
6
23
|
*/
|
|
7
|
-
function validateUrl(url: string): void {
|
|
24
|
+
export function validateUrl(url: string): void {
|
|
8
25
|
let parsed: URL
|
|
9
26
|
try {
|
|
10
27
|
parsed = new URL(url)
|
|
@@ -17,38 +34,131 @@ function validateUrl(url: string): void {
|
|
|
17
34
|
throw new Error(`URL scheme not allowed: ${scheme}. Only http/https permitted.`)
|
|
18
35
|
}
|
|
19
36
|
|
|
20
|
-
const hostname = parsed.hostname.toLowerCase()
|
|
37
|
+
const hostname = parsed.hostname.replace(/^\[(.*)\]$/, '$1').toLowerCase()
|
|
38
|
+
validateHostnameLiteralPolicy(hostname)
|
|
39
|
+
|
|
40
|
+
if (hostname.includes(':')) {
|
|
41
|
+
validateIpv6(hostname, hostname)
|
|
42
|
+
return
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const ipMatch = hostname.match(/^(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3})$/)
|
|
46
|
+
if (ipMatch) {
|
|
47
|
+
validateIpv4(hostname, hostname)
|
|
48
|
+
return
|
|
49
|
+
}
|
|
50
|
+
}
|
|
21
51
|
|
|
22
|
-
|
|
23
|
-
if (hostname === '
|
|
24
|
-
throw new Error(`
|
|
52
|
+
function validateHostnameLiteralPolicy(hostname: string): void {
|
|
53
|
+
if (hostname === 'localhost' || hostname === 'metadata.google.internal') {
|
|
54
|
+
throw new Error(`Hostname not permitted: ${hostname}`)
|
|
25
55
|
}
|
|
56
|
+
}
|
|
26
57
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
58
|
+
function validateIpv4(ip: string, originalHostname: string): void {
|
|
59
|
+
const parts = ip.split('.').map(p => parseInt(p, 10))
|
|
60
|
+
if (parts.length !== 4 || parts.some(n => Number.isNaN(n) || n < 0 || n > 255)) {
|
|
61
|
+
throw new Error(`Invalid IPv4 address: ${originalHostname}`)
|
|
30
62
|
}
|
|
63
|
+
const [a, b] = parts
|
|
31
64
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
if (
|
|
35
|
-
|
|
65
|
+
if (a === 0) throw new Error(`Reserved IP blocked: ${originalHostname}`)
|
|
66
|
+
if (a === 10) throw new Error(`Private IP blocked: ${originalHostname}`)
|
|
67
|
+
if (a === 127) throw new Error(`Loopback IP blocked: ${originalHostname}`)
|
|
68
|
+
if (a === 169 && b === 254 && parts[2] === 169 && parts[3] === 254) {
|
|
69
|
+
throw new Error(`Cloud metadata endpoint blocked: ${originalHostname}`)
|
|
36
70
|
}
|
|
71
|
+
if (a === 169 && b === 254) throw new Error(`Link-local IP blocked: ${originalHostname}`)
|
|
72
|
+
if (a === 172 && b >= 16 && b <= 31) throw new Error(`Private IP blocked: ${originalHostname}`)
|
|
73
|
+
if (a === 192 && b === 168) throw new Error(`Private IP blocked: ${originalHostname}`)
|
|
74
|
+
}
|
|
37
75
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
76
|
+
function validateIpv6(ip: string, originalHostname: string): void {
|
|
77
|
+
const stripped = ip.toLowerCase().split('%')[0]
|
|
78
|
+
if (stripped === '::1' || stripped === '::') {
|
|
79
|
+
throw new Error(`IPv6 loopback/unspecified blocked: ${originalHostname}`)
|
|
80
|
+
}
|
|
81
|
+
if (/^(fc|fd)[0-9a-f]{0,2}:/i.test(stripped)) {
|
|
82
|
+
throw new Error(`IPv6 unique-local blocked: ${originalHostname}`)
|
|
83
|
+
}
|
|
84
|
+
if (/^fe[89ab][0-9a-f]?:/i.test(stripped)) {
|
|
85
|
+
throw new Error(`IPv6 link-local blocked: ${originalHostname}`)
|
|
86
|
+
}
|
|
87
|
+
const mappedV4 = stripped.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/)
|
|
88
|
+
if (mappedV4) {
|
|
89
|
+
validateIpv4(mappedV4[1], originalHostname)
|
|
41
90
|
}
|
|
42
91
|
}
|
|
43
92
|
|
|
93
|
+
/**
|
|
94
|
+
* Resolve hostname via DNS and validate every returned IP against private,
|
|
95
|
+
* loopback, link-local, and cloud-metadata ranges. This closes the static
|
|
96
|
+
* hostname-to-private-IP SSRF gap.
|
|
97
|
+
*
|
|
98
|
+
* Residual risk: this narrows but does not fully close the DNS rebinding TOCTOU
|
|
99
|
+
* window between this lookup and fetch()'s own internal lookup. Full mitigation
|
|
100
|
+
* requires IP pinning, which is HTTPS-incompatible here without TLS SNI control.
|
|
101
|
+
*/
|
|
102
|
+
export async function validateHostDns(hostname: string): Promise<void> {
|
|
103
|
+
const stripped = hostname.replace(/^\[(.*)\]$/, '$1').toLowerCase()
|
|
104
|
+
validateHostnameLiteralPolicy(stripped)
|
|
105
|
+
|
|
106
|
+
if (/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/.test(stripped)) return
|
|
107
|
+
if (stripped.includes(':')) return
|
|
108
|
+
|
|
109
|
+
let resolved: LookupAddress[]
|
|
110
|
+
try {
|
|
111
|
+
resolved = await dnsLookup(stripped)
|
|
112
|
+
} catch (e) {
|
|
113
|
+
const msg = e instanceof Error ? e.message : String(e)
|
|
114
|
+
throw new Error(`DNS lookup failed for ${hostname}: ${msg}`)
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (!resolved || resolved.length === 0) {
|
|
118
|
+
throw new Error(`DNS lookup returned no records for ${hostname}`)
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
for (const { address, family } of resolved) {
|
|
122
|
+
if (family === 4) validateIpv4(address, hostname)
|
|
123
|
+
else if (family === 6) validateIpv6(address, hostname)
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const MAX_REDIRECTS = 5
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* fetch wrapper that follows redirects manually, re-validating each hop with
|
|
131
|
+
* validateUrl() and validateHostDns(). Redirects to private IP literals or
|
|
132
|
+
* hostnames resolving to private IPs are blocked before fetch follows them.
|
|
133
|
+
*/
|
|
134
|
+
async function safeFetch(url: string, init: RequestInit = {}): Promise<Response> {
|
|
135
|
+
let currentUrl = url
|
|
136
|
+
for (let hop = 0; hop <= MAX_REDIRECTS; hop++) {
|
|
137
|
+
validateUrl(currentUrl)
|
|
138
|
+
const parsed = new URL(currentUrl)
|
|
139
|
+
const hostname = parsed.hostname.replace(/^\[(.*)\]$/, '$1')
|
|
140
|
+
await validateHostDns(hostname)
|
|
141
|
+
|
|
142
|
+
const res = await fetch(currentUrl, { ...init, redirect: 'manual' })
|
|
143
|
+
if (res.status < 300 || res.status >= 400) {
|
|
144
|
+
return res
|
|
145
|
+
}
|
|
146
|
+
const location = res.headers.get('location')
|
|
147
|
+
if (!location) {
|
|
148
|
+
return res
|
|
149
|
+
}
|
|
150
|
+
currentUrl = new URL(location, currentUrl).toString()
|
|
151
|
+
}
|
|
152
|
+
throw new Error(`Too many redirects (>${MAX_REDIRECTS}) starting from ${url}`)
|
|
153
|
+
}
|
|
154
|
+
|
|
44
155
|
/**
|
|
45
156
|
* Fetch a URL and convert HTML to plain markdown-like text.
|
|
46
157
|
* Strips scripts, styles, nav, footer, header, aside elements.
|
|
47
158
|
* Uses Bun's native fetch — no external dependencies.
|
|
48
159
|
*/
|
|
49
160
|
export async function fetchHtml(url: string): Promise<string> {
|
|
50
|
-
|
|
51
|
-
const res = await fetch(url, {
|
|
161
|
+
const res = await safeFetch(url, {
|
|
52
162
|
headers: {
|
|
53
163
|
'User-Agent': 'Mozilla/5.0 research-vault-mcp/1.1.0',
|
|
54
164
|
'Accept': 'text/html'
|
|
@@ -85,4 +195,4 @@ export async function fetchHtml(url: string): Promise<string> {
|
|
|
85
195
|
text = text.replace(/\n{3,}/g, '\n\n').trim()
|
|
86
196
|
|
|
87
197
|
return text
|
|
88
|
-
}
|
|
198
|
+
}
|
package/src/profile.ts
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
export type McpProfile = 'readonly' | 'full' | 'admin'
|
|
2
|
+
|
|
3
|
+
export function getActiveProfile(env: Record<string, string | undefined> = process.env): McpProfile {
|
|
4
|
+
const raw = String(env.MCP_PROFILE || env.RESEARCH_VAULT_MCP_PROFILE || 'readonly').toLowerCase()
|
|
5
|
+
if (raw === 'full' || raw === 'admin' || raw === 'readonly') return raw
|
|
6
|
+
return 'readonly'
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function profileAllowsMutation(profile: McpProfile): boolean {
|
|
10
|
+
return profile === 'full' || profile === 'admin'
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function profileAllowsAdmin(profile: McpProfile): boolean {
|
|
14
|
+
return profile === 'admin'
|
|
15
|
+
}
|