@vespermcp/mcp-server 1.2.30 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,164 @@
1
+ import { getSupabaseAdminClient, resolveUserIdFromApiKey } from "./plan-resolve.js";
2
+ import { inferSourceFromDatasetId, normalizeProviderSource, PLAN_GATE_EXEMPT_TOOLS, } from "./plan-gate.js";
3
+ function truncateDatasetName(raw) {
4
+ const t = raw.trim();
5
+ if (!t)
6
+ return "unknown";
7
+ return t.length > 256 ? t.slice(0, 253) + "..." : t;
8
+ }
9
+ function pickDatasetName(args) {
10
+ const a = args || {};
11
+ const candidates = [
12
+ a.dataset_id,
13
+ a.query,
14
+ a.datasetId,
15
+ a.url,
16
+ a.file_path,
17
+ ];
18
+ for (const c of candidates) {
19
+ if (typeof c === "string" && c.trim()) {
20
+ return truncateDatasetName(c);
21
+ }
22
+ }
23
+ return "unknown";
24
+ }
25
+ function pickSource(toolName, args) {
26
+ const a = args || {};
27
+ const explicit = normalizeProviderSource(String(a.source ?? ""));
28
+ if (explicit)
29
+ return explicit;
30
+ const fromId = inferSourceFromDatasetId(String(a.dataset_id ?? a.query ?? ""));
31
+ if (fromId)
32
+ return fromId;
33
+ if (toolName === "vesper_web_find" && Array.isArray(a.sources) && a.sources.length > 0) {
34
+ return String(a.sources[0]).toLowerCase();
35
+ }
36
+ return null;
37
+ }
38
+ function pickFormat(args) {
39
+ const a = args || {};
40
+ const f = a.format ?? a.target_format ?? a.output_format;
41
+ if (typeof f === "string" && f.trim())
42
+ return f.trim().toLowerCase().slice(0, 32);
43
+ return null;
44
+ }
45
+ function mapToolToEvent(toolName) {
46
+ switch (toolName) {
47
+ case "vesper_search":
48
+ case "discover_datasets":
49
+ case "vesper_web_find":
50
+ case "vesper.extract_web":
51
+ case "get_dataset_info":
52
+ return { event_type: "dataset_search" };
53
+ case "download_dataset":
54
+ case "vesper_download_assets":
55
+ return { event_type: "dataset_download" };
56
+ case "quality_analyze":
57
+ case "analyze_quality":
58
+ case "analyze_image_quality":
59
+ case "analyze_media_quality":
60
+ case "generate_quality_report":
61
+ case "preview_cleaning":
62
+ return { event_type: "quality_analysis" };
63
+ case "prepare_dataset":
64
+ return { event_type: "dataset_prepare" };
65
+ case "export_dataset":
66
+ case "vesper_convert_format":
67
+ return { event_type: "export" };
68
+ default:
69
+ return { event_type: "data_processed" };
70
+ }
71
+ }
72
+ function mapToolToEventWithArgs(toolName, args) {
73
+ if (toolName === "unified_dataset_api") {
74
+ const op = String(args.operation ?? "").trim().toLowerCase();
75
+ if (op === "discover" || op === "providers" || op === "info") {
76
+ return { event_type: "dataset_search" };
77
+ }
78
+ if (op === "download") {
79
+ return { event_type: "dataset_download" };
80
+ }
81
+ }
82
+ return mapToolToEvent(toolName);
83
+ }
84
+ async function hasAnalyticsConsent(userId) {
85
+ const supabase = getSupabaseAdminClient();
86
+ if (!supabase)
87
+ return false;
88
+ const { data, error } = await supabase
89
+ .from("analytics_consent")
90
+ .select("consented")
91
+ .eq("user_id", userId)
92
+ .maybeSingle();
93
+ if (error) {
94
+ return false;
95
+ }
96
+ return data?.consented === true;
97
+ }
98
+ /**
99
+ * After each MCP tool call: insert one row into `analytics_events` (same table as
100
+ * `/api/analytics/ingest` and the landing Operations tab) when the user has opted in
101
+ * and `VESPER_API_KEY` / `api_key` resolves to a user.
102
+ */
103
+ export async function recordMcpToolAnalyticsAfterCall(opts) {
104
+ if (process.env.VESPER_DISABLE_MCP_ANALYTICS === "1" || process.env.VESPER_DISABLE_MCP_ANALYTICS === "true") {
105
+ return;
106
+ }
107
+ const toolName = String(opts.toolName || "").trim();
108
+ if (!toolName || PLAN_GATE_EXEMPT_TOOLS.has(toolName)) {
109
+ return;
110
+ }
111
+ const supabase = getSupabaseAdminClient();
112
+ if (!supabase) {
113
+ return;
114
+ }
115
+ const args = opts.args || {};
116
+ const apiKey = String(args.api_key ?? process.env.VESPER_API_KEY ?? "").trim();
117
+ if (!apiKey) {
118
+ return;
119
+ }
120
+ const userId = await resolveUserIdFromApiKey(apiKey);
121
+ if (!userId) {
122
+ return;
123
+ }
124
+ if (!(await hasAnalyticsConsent(userId))) {
125
+ return;
126
+ }
127
+ const { event_type } = mapToolToEventWithArgs(toolName, args);
128
+ const dataset_name = pickDatasetName(args);
129
+ const source = pickSource(toolName, args);
130
+ const format = pickFormat(args);
131
+ const metadata = {
132
+ mcp_tool: toolName,
133
+ ok: opts.result !== undefined && opts.result.isError !== true,
134
+ };
135
+ if (opts.result?.isError) {
136
+ const first = opts.result.content?.[0];
137
+ if (first && typeof first.text === "string") {
138
+ metadata.error_preview = first.text.slice(0, 500);
139
+ }
140
+ }
141
+ const row = {
142
+ user_id: userId,
143
+ event_type,
144
+ dataset_name,
145
+ source: source || null,
146
+ format: format || null,
147
+ size_bytes: null,
148
+ quality_score: null,
149
+ metadata,
150
+ created_at: new Date().toISOString(),
151
+ };
152
+ let { error } = await supabase.from("analytics_events").insert(row);
153
+ // Older DBs may lack `dataset_prepare` in CHECK constraint — fall back.
154
+ if (error && event_type === "dataset_prepare" && /check|constraint/i.test(error.message || "")) {
155
+ ({ error } = await supabase.from("analytics_events").insert({
156
+ ...row,
157
+ event_type: "data_processed",
158
+ metadata: { ...metadata, event_type_fallback: "dataset_prepare" },
159
+ }));
160
+ }
161
+ if (error) {
162
+ console.error("[mcp-analytics] insert failed:", error.message);
163
+ }
164
+ }
@@ -0,0 +1,103 @@
1
+ /**
2
+ * Mirrors `landing/lib/plan-entitlements.ts` — keep rules in sync when changing tiers.
3
+ */
4
+ const PRICING_URL = "https://getvesper.dev/pricing";
5
+ const FREE_ALLOWED_SOURCES = new Set(["huggingface", "openml", "s3", "local", "url"]);
6
+ const PRO_ONLY_TOOLS = new Set(["custom_clean"]);
7
+ const PRO_ONLY_DATASET_OPS = new Set(["compare_datasets", "fuse_datasets"]);
8
+ const ADVANCED_EXPORT_FORMATS = new Set(["feather", "jsonl", "arrow"]);
9
+ function asRecord(value) {
10
+ return value && typeof value === "object" ? value : {};
11
+ }
12
+ function normalizeText(value) {
13
+ return typeof value === "string" ? value.trim().toLowerCase() : "";
14
+ }
15
+ function readToolName(body, metadata) {
16
+ const candidates = [
17
+ body.tool,
18
+ body.operation,
19
+ body.feature,
20
+ body.action,
21
+ metadata.tool,
22
+ metadata.operation,
23
+ metadata.feature,
24
+ metadata.action,
25
+ ];
26
+ for (const candidate of candidates) {
27
+ const normalized = normalizeText(candidate);
28
+ if (normalized) {
29
+ return normalized;
30
+ }
31
+ }
32
+ return "";
33
+ }
34
+ function hasAdvancedExportOptions(body, metadata, format) {
35
+ if (ADVANCED_EXPORT_FORMATS.has(format)) {
36
+ return true;
37
+ }
38
+ const optionSources = [body, metadata, asRecord(metadata.options)];
39
+ for (const source of optionSources) {
40
+ if (!source || typeof source !== "object") {
41
+ continue;
42
+ }
43
+ const compression = normalizeText(source.compression);
44
+ if (compression && compression !== "uncompressed") {
45
+ return true;
46
+ }
47
+ if (typeof source.sample_rows === "number" && source.sample_rows > 0) {
48
+ return true;
49
+ }
50
+ if (Array.isArray(source.columns) && source.columns.length > 0) {
51
+ return true;
52
+ }
53
+ if (source.preview === true) {
54
+ return true;
55
+ }
56
+ }
57
+ return false;
58
+ }
59
+ export function evaluatePlanEntitlements(input) {
60
+ if (input.plan !== "free") {
61
+ return { allowed: true };
62
+ }
63
+ const body = asRecord(input.body);
64
+ const metadata = asRecord(input.metadata);
65
+ const source = normalizeText(input.source ?? body.source ?? metadata.source);
66
+ const format = normalizeText(input.format ?? body.format ?? metadata.format);
67
+ const toolName = readToolName(body, metadata);
68
+ if (source && !FREE_ALLOWED_SOURCES.has(source)) {
69
+ return {
70
+ allowed: false,
71
+ status: 403,
72
+ code: "feature_locked_source",
73
+ message: `Source '${source}' is a Pro feature. Free supports public sources only (huggingface, openml, s3). Upgrade at ${PRICING_URL}.`,
74
+ };
75
+ }
76
+ if (PRO_ONLY_TOOLS.has(toolName)) {
77
+ return {
78
+ allowed: false,
79
+ status: 403,
80
+ code: "feature_locked_custom_clean",
81
+ message: `custom_clean is available on Pro only. Upgrade at ${PRICING_URL}.`,
82
+ };
83
+ }
84
+ if (PRO_ONLY_DATASET_OPS.has(toolName)) {
85
+ return {
86
+ allowed: false,
87
+ status: 403,
88
+ code: "feature_locked_fusion_compare",
89
+ message: `${toolName} is available on Pro only. Upgrade at ${PRICING_URL}.`,
90
+ };
91
+ }
92
+ if (toolName === "export_dataset" || toolName === "vesper_export" || input.eventType === "export") {
93
+ if (hasAdvancedExportOptions(body, metadata, format)) {
94
+ return {
95
+ allowed: false,
96
+ status: 403,
97
+ code: "feature_locked_advanced_export",
98
+ message: `Advanced export options are Pro-only. Free supports basic csv/parquet export. Upgrade at ${PRICING_URL}.`,
99
+ };
100
+ }
101
+ }
102
+ return { allowed: true };
103
+ }
@@ -0,0 +1,243 @@
1
+ /** Tools that never hit paid data-plane rules (config, registry, lineage meta). */
2
+ export const PLAN_GATE_EXEMPT_TOOLS = new Set([
3
+ "configure_keys",
4
+ "lineage",
5
+ "get_lineage",
6
+ "diff_lineage_versions",
7
+ "vesper_list_datasets",
8
+ "check_job_status",
9
+ ]);
10
+ const DATASET_ID_SOURCE = /^(kaggle|huggingface|hf|openml|dataworld|arxiv|github|s3|local|url|bigquery|gcs|azure):/i;
11
+ export function normalizeProviderSource(raw) {
12
+ if (!raw)
13
+ return undefined;
14
+ const t = raw.trim().toLowerCase();
15
+ if (!t || t === "auto" || t === "unknown")
16
+ return undefined;
17
+ if (t === "hf")
18
+ return "huggingface";
19
+ return t;
20
+ }
21
+ export function inferSourceFromDatasetId(datasetId) {
22
+ if (!datasetId)
23
+ return undefined;
24
+ const m = String(datasetId).match(DATASET_ID_SOURCE);
25
+ if (!m)
26
+ return undefined;
27
+ const s = m[1].toLowerCase();
28
+ return s === "hf" ? "huggingface" : s;
29
+ }
30
+ export function buildToolPlanCheckInput(toolName, args) {
31
+ const a = args || {};
32
+ const op = String(a.operation ?? "").trim().toLowerCase();
33
+ switch (toolName) {
34
+ case "unified_dataset_api": {
35
+ if (op === "providers") {
36
+ return {
37
+ eventType: "dataset_search",
38
+ body: { tool: "unified_dataset_api", operation: "providers" },
39
+ };
40
+ }
41
+ if (op === "discover") {
42
+ const src = normalizeProviderSource(String(a.source ?? "")) ?? inferSourceFromDatasetId(String(a.query ?? ""));
43
+ return {
44
+ eventType: "dataset_search",
45
+ source: src,
46
+ body: { tool: "unified_dataset_api", operation: "discover" },
47
+ metadata: { source: a.source },
48
+ };
49
+ }
50
+ if (op === "download") {
51
+ const ds = String(a.dataset_id ?? "");
52
+ const src = normalizeProviderSource(String(a.source ?? "")) || inferSourceFromDatasetId(ds);
53
+ return {
54
+ eventType: "dataset_download",
55
+ source: src,
56
+ body: { tool: "unified_dataset_api", operation: "download", dataset_id: ds },
57
+ metadata: { source: a.source },
58
+ };
59
+ }
60
+ if (op === "info") {
61
+ const ds = String(a.dataset_id ?? "");
62
+ return {
63
+ eventType: "dataset_search",
64
+ source: inferSourceFromDatasetId(ds),
65
+ body: { tool: "unified_dataset_api", operation: "info", dataset_id: ds },
66
+ };
67
+ }
68
+ return {
69
+ eventType: "dataset_search",
70
+ body: { tool: "unified_dataset_api", operation: op || "unknown" },
71
+ };
72
+ }
73
+ case "vesper_search": {
74
+ const q = String(a.query ?? "");
75
+ return {
76
+ eventType: "dataset_search",
77
+ source: inferSourceFromDatasetId(q),
78
+ body: { tool: "vesper_search" },
79
+ };
80
+ }
81
+ case "discover_datasets": {
82
+ return {
83
+ eventType: "dataset_search",
84
+ source: normalizeProviderSource(String(a.source ?? "")),
85
+ body: { tool: "discover_datasets" },
86
+ };
87
+ }
88
+ case "download_dataset": {
89
+ return {
90
+ eventType: "dataset_download",
91
+ source: normalizeProviderSource(String(a.source ?? "huggingface")),
92
+ body: { tool: "download_dataset" },
93
+ };
94
+ }
95
+ case "vesper_download_assets": {
96
+ return {
97
+ eventType: "dataset_download",
98
+ source: normalizeProviderSource(String(a.source ?? "")),
99
+ body: { tool: "vesper_download_assets" },
100
+ };
101
+ }
102
+ case "get_dataset_info": {
103
+ const ds = String(a.dataset_id ?? "");
104
+ return {
105
+ eventType: "dataset_search",
106
+ source: inferSourceFromDatasetId(ds),
107
+ body: { tool: "get_dataset_info" },
108
+ };
109
+ }
110
+ case "vesper_web_find": {
111
+ const sources = Array.isArray(a.sources) ? a.sources : [];
112
+ const first = sources.length > 0 ? String(sources[0]).toLowerCase() : "github";
113
+ return {
114
+ eventType: "dataset_search",
115
+ source: normalizeProviderSource(first) || first,
116
+ body: { tool: "vesper_web_find" },
117
+ };
118
+ }
119
+ case "vesper.extract_web": {
120
+ return {
121
+ eventType: "dataset_search",
122
+ source: "url",
123
+ body: { tool: "vesper.extract_web" },
124
+ };
125
+ }
126
+ case "vesper.fuse": {
127
+ return {
128
+ eventType: "data_processed",
129
+ body: { tool: "fuse_datasets", operation: "web" },
130
+ metadata: { tool: "fuse_datasets" },
131
+ };
132
+ }
133
+ case "quality_analyze":
134
+ case "analyze_quality":
135
+ case "analyze_image_quality":
136
+ case "analyze_media_quality":
137
+ case "generate_quality_report": {
138
+ const ds = String(a.dataset_id ?? a.path ?? "");
139
+ return {
140
+ eventType: "quality_analysis",
141
+ source: inferSourceFromDatasetId(ds),
142
+ body: { tool: toolName },
143
+ };
144
+ }
145
+ case "preview_cleaning": {
146
+ const ds = String(a.dataset_id ?? "");
147
+ return {
148
+ eventType: "quality_analysis",
149
+ source: inferSourceFromDatasetId(ds),
150
+ body: { tool: "preview_cleaning" },
151
+ };
152
+ }
153
+ case "custom_clean": {
154
+ const ds = String(a.dataset_id ?? "");
155
+ return {
156
+ eventType: "dataset_prepare",
157
+ source: inferSourceFromDatasetId(ds),
158
+ body: { tool: "custom_clean" },
159
+ metadata: { tool: "custom_clean" },
160
+ };
161
+ }
162
+ case "prepare_dataset": {
163
+ const q = String(a.query ?? a.dataset_id ?? "");
164
+ return {
165
+ eventType: "dataset_prepare",
166
+ source: inferSourceFromDatasetId(q),
167
+ body: { tool: "prepare_dataset" },
168
+ };
169
+ }
170
+ case "compare_datasets": {
171
+ return {
172
+ eventType: "data_processed",
173
+ body: { tool: "compare_datasets" },
174
+ metadata: { tool: "compare_datasets" },
175
+ };
176
+ }
177
+ case "export_dataset": {
178
+ const fmt = String(a.format ?? "feather").trim().toLowerCase();
179
+ return {
180
+ eventType: "export",
181
+ format: fmt,
182
+ source: inferSourceFromDatasetId(String(a.dataset_id ?? "")),
183
+ body: {
184
+ tool: "export_dataset",
185
+ format: fmt,
186
+ compression: a.compression,
187
+ preview: a.preview,
188
+ sample_rows: a.sample_rows,
189
+ columns: a.columns,
190
+ },
191
+ metadata: {
192
+ options: {
193
+ compression: a.compression,
194
+ sample_rows: a.sample_rows,
195
+ columns: a.columns,
196
+ preview: a.preview,
197
+ },
198
+ },
199
+ };
200
+ }
201
+ case "vesper_convert_format": {
202
+ const tf = String(a.target_format ?? "").trim().toLowerCase();
203
+ return {
204
+ eventType: "export",
205
+ format: tf,
206
+ body: { tool: "vesper_convert_format", format: tf },
207
+ };
208
+ }
209
+ case "vesper_normalize_schema": {
210
+ const of = String(a.output_format ?? "jsonl").trim().toLowerCase();
211
+ return {
212
+ eventType: "data_processed",
213
+ format: of,
214
+ body: { tool: "vesper_normalize_schema", format: of },
215
+ };
216
+ }
217
+ case "fuse":
218
+ case "fuse_datasets": {
219
+ const outputFormat = String(a.output_format ?? "parquet").trim().toLowerCase();
220
+ const compression = a.compression ? String(a.compression) : undefined;
221
+ const preview = a.preview === true;
222
+ return {
223
+ eventType: "data_processed",
224
+ format: outputFormat,
225
+ body: {
226
+ tool: "fuse_datasets",
227
+ format: outputFormat,
228
+ compression,
229
+ preview,
230
+ },
231
+ metadata: {
232
+ tool: "fuse_datasets",
233
+ options: { compression, preview },
234
+ },
235
+ };
236
+ }
237
+ default:
238
+ return {
239
+ eventType: "dataset_search",
240
+ body: { tool: toolName },
241
+ };
242
+ }
243
+ }
@@ -0,0 +1,109 @@
1
+ import { createClient } from "@supabase/supabase-js";
2
+ import { evaluatePlanEntitlements, } from "./plan-entitlements.js";
3
+ import { buildToolPlanCheckInput, PLAN_GATE_EXEMPT_TOOLS, } from "./plan-gate.js";
4
+ let supabaseClient = null;
5
+ function getSupabase() {
6
+ const url = process.env.SUPABASE_URL?.trim();
7
+ const key = process.env.SUPABASE_SERVICE_ROLE_KEY?.trim();
8
+ if (!url || !key) {
9
+ return null;
10
+ }
11
+ if (!supabaseClient) {
12
+ supabaseClient = createClient(url, key);
13
+ }
14
+ return supabaseClient;
15
+ }
16
+ export function isPlanGateConfigured() {
17
+ return getSupabase() !== null;
18
+ }
19
+ /**
20
+ * When false, skip plan checks (local dev without Supabase).
21
+ * Set VESPER_ENFORCE_PLAN=true to require API key + Supabase when testing locally.
22
+ */
23
+ export function isPlanEnforcementEnabled() {
24
+ if (process.env.VESPER_ENFORCE_PLAN === "0" || process.env.VESPER_ENFORCE_PLAN === "false") {
25
+ return false;
26
+ }
27
+ return isPlanGateConfigured();
28
+ }
29
+ async function getUserPlanForUserId(userId) {
30
+ const supabase = getSupabase();
31
+ if (!supabase) {
32
+ return "pro";
33
+ }
34
+ const { data, error } = await supabase
35
+ .from("user_plans")
36
+ .select("plan")
37
+ .eq("user_id", userId)
38
+ .maybeSingle();
39
+ if (error || !data?.plan) {
40
+ return "free";
41
+ }
42
+ const plan = String(data.plan).toLowerCase();
43
+ if (plan === "pro" || plan === "startup") {
44
+ return plan;
45
+ }
46
+ return "free";
47
+ }
48
+ async function lookupUserByApiKey(apiKey) {
49
+ const supabase = getSupabase();
50
+ if (!supabase)
51
+ return null;
52
+ const { data, error } = await supabase
53
+ .from("api_keys")
54
+ .select("user_id, revoked_at")
55
+ .eq("key", apiKey)
56
+ .maybeSingle();
57
+ if (error || !data?.user_id || data.revoked_at) {
58
+ return null;
59
+ }
60
+ return { userId: data.user_id };
61
+ }
62
+ /** For analytics / profile sync — same lookup as plan gate. */
63
+ export async function resolveUserIdFromApiKey(apiKey) {
64
+ const row = await lookupUserByApiKey(apiKey);
65
+ return row?.userId ?? null;
66
+ }
67
+ export function getSupabaseAdminClient() {
68
+ return getSupabase();
69
+ }
70
+ /**
71
+ * Central gate: same rules as `landing/lib/plan-entitlements` + landing analytics ingest.
72
+ */
73
+ export async function enforcePlanGateForTool(toolName, args) {
74
+ if (PLAN_GATE_EXEMPT_TOOLS.has(toolName)) {
75
+ return { ok: true };
76
+ }
77
+ if (!isPlanEnforcementEnabled()) {
78
+ return { ok: true };
79
+ }
80
+ const rawArgs = args || {};
81
+ const apiKey = String(rawArgs.api_key ?? process.env.VESPER_API_KEY ?? "").trim();
82
+ if (!apiKey) {
83
+ return {
84
+ ok: false,
85
+ message: "Plan enforcement is enabled (Supabase configured). Set `VESPER_API_KEY` in the MCP env or pass `api_key` on tool calls to your Vesper API key so your tier can be verified.",
86
+ };
87
+ }
88
+ const user = await lookupUserByApiKey(apiKey);
89
+ if (!user) {
90
+ return {
91
+ ok: false,
92
+ message: "Invalid or revoked API key. Create a key in the Vesper profile and set VESPER_API_KEY or pass api_key on tool calls.",
93
+ };
94
+ }
95
+ const plan = await getUserPlanForUserId(user.userId);
96
+ const checkInput = buildToolPlanCheckInput(toolName, rawArgs);
97
+ const ent = evaluatePlanEntitlements({
98
+ plan,
99
+ eventType: checkInput.eventType,
100
+ source: checkInput.source,
101
+ format: checkInput.format,
102
+ metadata: checkInput.metadata,
103
+ body: checkInput.body,
104
+ });
105
+ if (!ent.allowed) {
106
+ return { ok: false, message: ent.message };
107
+ }
108
+ return { ok: true };
109
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vespermcp/mcp-server",
3
- "version": "1.2.30",
3
+ "version": "1.3.1",
4
4
  "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
5
5
  "type": "module",
6
6
  "main": "build/index.js",