@vespermcp/mcp-server 1.2.12 → 1.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -1,4 +1,75 @@
1
- #! /usr/bin/env node
1
+ // --- Dataset ID Normalization ---
2
+ function normalize_dataset_id(dataset_id) {
3
+ // Remove kaggle: prefix for storage key
4
+ let id = dataset_id.replace(/^kaggle:/, "");
5
+ // Replace / and : with _ for filesystem safety
6
+ id = id.replace(/[/:]/g, "_");
7
+ // Always store and lookup using the same normalized format
8
+ return dataset_id.startsWith("kaggle:") ? `kaggle_${id}` : id;
9
+ }
10
+ // --- Dataset Registry Helpers ---
11
+ function getRegistryPath() {
12
+ return path.join(dataRoot, "registry.json");
13
+ }
14
+ function readRegistry() {
15
+ const registryPath = getRegistryPath();
16
+ if (!fs.existsSync(registryPath))
17
+ return [];
18
+ try {
19
+ const raw = fs.readFileSync(registryPath, "utf-8");
20
+ return JSON.parse(raw);
21
+ }
22
+ catch {
23
+ return [];
24
+ }
25
+ }
26
+ function writeRegistry(entries) {
27
+ const registryPath = getRegistryPath();
28
+ fs.writeFileSync(registryPath, JSON.stringify(entries, null, 2));
29
+ }
30
+ function upsertRegistry(dataset_id, local_path, status) {
31
+ const norm_id = normalize_dataset_id(dataset_id);
32
+ console.error(`[Registry] Writing key: ${norm_id}`);
33
+ const entries = readRegistry();
34
+ const idx = entries.findIndex(e => e.dataset_id === norm_id);
35
+ if (idx >= 0) {
36
+ entries[idx] = { dataset_id: norm_id, local_path, status };
37
+ }
38
+ else {
39
+ entries.push({ dataset_id: norm_id, local_path, status });
40
+ }
41
+ writeRegistry(entries);
42
+ }
43
+ function getRegistryEntry(dataset_id) {
44
+ const norm_id = normalize_dataset_id(dataset_id);
45
+ console.error(`[Registry] Lookup key: ${norm_id}`);
46
+ return readRegistry().find(e => e.dataset_id === norm_id);
47
+ }
48
+ // --- Pipeline State Tracker ---
49
+ // Tracks completed steps per session/job/dataset
50
+ const pipelineState = {};
51
+ function getPipelineKey(datasetId) {
52
+ return datasetId;
53
+ }
54
+ export function markStepComplete(datasetId, step) {
55
+ const key = getPipelineKey(datasetId);
56
+ if (!pipelineState[key])
57
+ pipelineState[key] = new Set();
58
+ pipelineState[key].add(step);
59
+ }
60
+ export function hasStep(datasetId, step) {
61
+ const key = getPipelineKey(datasetId);
62
+ return pipelineState[key]?.has(step);
63
+ }
64
+ // --- Dataset ID Auto-Detection ---
65
+ export function parseDatasetId(id) {
66
+ const trimmed = id.trim();
67
+ if (/^(kaggle:|hf:|huggingface:|openml:|dataworld:|http|https):/i.test(trimmed))
68
+ return trimmed;
69
+ if (trimmed.includes("/") && !trimmed.includes(":"))
70
+ return `kaggle:${trimmed}`;
71
+ return trimmed;
72
+ }
2
73
  import { Server } from "@modelcontextprotocol/sdk/server/index.js";
3
74
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
4
75
  import { CallToolRequestSchema, ListToolsRequestSchema, ErrorCode, McpError, } from "@modelcontextprotocol/sdk/types.js";
@@ -443,6 +514,13 @@ async function handlePrepareJob(jobId, query, requirements) {
443
514
  preview: true,
444
515
  });
445
516
  rawFilePath = fusionResult.output_path;
517
+ try {
518
+ // Register fused output for this top dataset so export can find it
519
+ upsertRegistry(topDataset.id, rawFilePath, "completed");
520
+ }
521
+ catch (e) {
522
+ console.error(`[Registry] Failed to write registry for fused output ${topDataset.id}: ${e?.message || e}`);
523
+ }
446
524
  currentRows = await countRows(rawFilePath);
447
525
  }
448
526
  if (currentRows < requestedRows) {
@@ -462,6 +540,13 @@ async function handlePrepareJob(jobId, query, requirements) {
462
540
  update({ progress: 85, status_text: "Installing dataset into project..." });
463
541
  const installPath = await installService.install(topDataset.id, rawFilePath);
464
542
  update({ progress: 100, status_text: "Preparation complete!" });
543
+ // Register prepared dataset in local registry for lookup by export/list tools
544
+ try {
545
+ upsertRegistry(topDataset.id, installPath, "completed");
546
+ }
547
+ catch (e) {
548
+ console.error(`[Registry] Failed to write registry for ${topDataset.id}: ${e?.message || e}`);
549
+ }
465
550
  return installPath;
466
551
  }
467
552
  /**
@@ -766,6 +851,14 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
766
851
  required: ["dataset_id"],
767
852
  },
768
853
  },
854
+ {
855
+ name: "vesper_list_datasets",
856
+ description: "List local prepared datasets from the Vesper registry (dataset_id and local_path).",
857
+ inputSchema: {
858
+ type: "object",
859
+ properties: {},
860
+ },
861
+ },
769
862
  {
770
863
  name: "fuse_datasets",
771
864
  description: "Fuse/combine multiple datasets via concat or join. Optionally runs quality & leakage checks afterward.",
@@ -875,6 +968,65 @@ server.setRequestHandler(ListToolsRequestSchema, async () => {
875
968
  });
876
969
  // Call Tool
877
970
  server.setRequestHandler(CallToolRequestSchema, async (request) => {
971
+ // --- Pipeline Enforcement ---
972
+ // Map tool names to pipeline steps
973
+ const toolToStep = {
974
+ vesper_search: "search",
975
+ vesper_download: "download",
976
+ vesper_analyze: "analyze",
977
+ vesper_clean: "clean",
978
+ vesper_split: "split",
979
+ vesper_export: "export",
980
+ prepare_dataset: "prepare",
981
+ };
982
+ // Extract dataset_id if present and normalize
983
+ let datasetId = request.params.arguments?.dataset_id || request.params.arguments?.query || "";
984
+ if (datasetId)
985
+ datasetId = parseDatasetId(String(datasetId));
986
+ // Pipeline rules
987
+ const stepOrder = ["search", "download", "analyze", "clean", "split", "export"];
988
+ const prereqs = {
989
+ vesper_download: ["search"],
990
+ vesper_analyze: ["download"],
991
+ vesper_clean: ["analyze"],
992
+ vesper_split: ["clean"],
993
+ vesper_export: ["split"],
994
+ };
995
+ const tool = String(request.params.name);
996
+ const step = toolToStep[tool];
997
+ if (step && datasetId) {
998
+ // Check prerequisites
999
+ const required = prereqs[tool] || [];
1000
+ for (const req of required) {
1001
+ if (!hasStep(String(datasetId), req)) {
1002
+ // Auto-run missing step if possible, else error
1003
+ // For export, auto-run prepare_dataset if split missing
1004
+ if (tool === "vesper_export" && req === "split") {
1005
+ // Auto-trigger prepare_dataset (start a background prepare job)
1006
+ try {
1007
+ jobManager.createJob("prepare", 0, { query: String(datasetId), requirements: undefined, downloadImages: false });
1008
+ // Mark split as complete so export can proceed; export handler will also wait for data if needed.
1009
+ markStepComplete(String(datasetId), "split");
1010
+ }
1011
+ catch (e) {
1012
+ console.error(`[Pipeline] Failed to auto-trigger prepare for ${datasetId}: ${e?.message || e}`);
1013
+ return {
1014
+ content: [{ type: "text", text: `ERROR: Failed to auto-run prepare for ${datasetId}. Please run prepare_dataset first.` }],
1015
+ isError: true,
1016
+ };
1017
+ }
1018
+ }
1019
+ else {
1020
+ return {
1021
+ content: [{ type: "text", text: `ERROR: Cannot run ${tool} before ${req}. Please run ${req} first.` }],
1022
+ isError: true,
1023
+ };
1024
+ }
1025
+ }
1026
+ }
1027
+ // Mark this step as complete
1028
+ markStepComplete(String(datasetId), String(step));
1029
+ }
878
1030
  switch (request.params.name) {
879
1031
  case "vesper_search": {
880
1032
  const query = String(request.params.arguments?.query);
@@ -983,6 +1135,12 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
983
1135
  }
984
1136
  try {
985
1137
  const localPath = await dataIngestor.ensureData(datasetId, source, () => undefined);
1138
+ try {
1139
+ upsertRegistry(datasetId, localPath, "completed");
1140
+ }
1141
+ catch (e) {
1142
+ console.error(`[Registry] Failed to write registry for ${datasetId}: ${e?.message || e}`);
1143
+ }
986
1144
  return {
987
1145
  content: [{ type: "text", text: `Download complete: ${localPath}` }]
988
1146
  };
@@ -1302,15 +1460,53 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1302
1460
  if (!dataset) {
1303
1461
  throw new McpError(ErrorCode.InvalidParams, `Dataset not found: ${datasetId}`);
1304
1462
  }
1305
- // Use Metadata to find the actual local file
1463
+ // Use Metadata or Registry to find the actual local file
1464
+ let sourcePath = undefined;
1306
1465
  const downloadStatus = metadataStore.getDownloadStatus(datasetId);
1307
- if (!downloadStatus || !fs.existsSync(downloadStatus.local_path)) {
1308
- return {
1309
- content: [{ type: "text", text: `ERROR: No local data found for ${datasetId}. Please run prepare_dataset first.` }],
1310
- isError: true
1311
- };
1466
+ if (downloadStatus && fs.existsSync(downloadStatus.local_path)) {
1467
+ sourcePath = downloadStatus.local_path;
1468
+ }
1469
+ else {
1470
+ // Fallback to local registry
1471
+ const reg = getRegistryEntry(datasetId);
1472
+ if (reg && fs.existsSync(reg.local_path)) {
1473
+ sourcePath = reg.local_path;
1474
+ }
1475
+ }
1476
+ if (!sourcePath) {
1477
+ console.error(`[Export] No local data found for ${datasetId}. Attempting to prepare automatically...`);
1478
+ // Start a prepare job for this dataset id (acts like calling prepare_dataset)
1479
+ try {
1480
+ jobManager.createJob("prepare", 0, { query: datasetId, requirements: undefined, downloadImages: false });
1481
+ }
1482
+ catch (e) {
1483
+ console.error(`[Export] Failed to start prepare job for ${datasetId}: ${e?.message || e}`);
1484
+ }
1485
+ // Poll for download status until local_path appears or timeout
1486
+ const wait = (ms) => new Promise(res => setTimeout(res, ms));
1487
+ const maxWait = 60_000; // 60s
1488
+ const interval = 2000;
1489
+ let waited = 0;
1490
+ while (waited < maxWait) {
1491
+ const ds = metadataStore.getDownloadStatus(datasetId);
1492
+ if (ds && ds.local_path && fs.existsSync(ds.local_path)) {
1493
+ sourcePath = ds.local_path;
1494
+ console.error(`[Export] Local data is now available for ${datasetId}: ${sourcePath}`);
1495
+ break;
1496
+ }
1497
+ await wait(interval);
1498
+ waited += interval;
1499
+ }
1500
+ // If still no sourcePath, return helpful error listing prepared datasets
1501
+ if (!sourcePath) {
1502
+ const entries = readRegistry();
1503
+ const listText = entries.length === 0 ? "(no prepared datasets found)" : entries.map(e => `- ${e.dataset_id}: ${e.local_path}`).join("\n");
1504
+ return {
1505
+ content: [{ type: "text", text: `ERROR: No local data found for ${datasetId} after attempting prepare. Check credentials and try running prepare_dataset manually. Prepared datasets:\n${listText}` }],
1506
+ isError: true
1507
+ };
1508
+ }
1312
1509
  }
1313
- let sourcePath = downloadStatus.local_path;
1314
1510
  // If NOT fast mode, run quality/cleaning pipeline first (only for csv/parquet compat)
1315
1511
  if (!fastMode) {
1316
1512
  const currentExt = path.extname(sourcePath).substring(1).toLowerCase();
@@ -1321,6 +1517,13 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1321
1517
  const pipelineResult = await pipelineExecutor.runPipeline(datasetId, sourcePath, pipelineFmt);
1322
1518
  if (pipelineResult.final_output_path) {
1323
1519
  sourcePath = pipelineResult.final_output_path;
1520
+ try {
1521
+ // Update registry to point to pipeline's final output
1522
+ upsertRegistry(datasetId, sourcePath, "completed");
1523
+ }
1524
+ catch (e) {
1525
+ console.error(`[Registry] Failed to update registry for ${datasetId}: ${e?.message || e}`);
1526
+ }
1324
1527
  }
1325
1528
  }
1326
1529
  catch (err) {
@@ -1439,6 +1642,14 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1439
1642
  });
1440
1643
  const nullDelta = result.stats.null_delta;
1441
1644
  const nullText = nullDelta >= 0 ? `+${nullDelta}%` : `${nullDelta}%`;
1645
+ // Register fused dataset under a generated id so users can export it easily
1646
+ const fusedId = `fused_${Date.now()}`;
1647
+ try {
1648
+ upsertRegistry(fusedId, result.output_path, "completed");
1649
+ }
1650
+ catch (e) {
1651
+ console.error(`[Registry] Failed to register fused dataset ${fusedId}: ${e?.message || e}`);
1652
+ }
1442
1653
  let msg = `Fused ${result.stats.sources_count} sources -> ${result.stats.rows_after.toLocaleString()} rows (from ${result.stats.rows_before.toLocaleString()}).\n`;
1443
1654
  msg += `- Duplicates removed: ${result.stats.duplicates_removed.toLocaleString()}\n`;
1444
1655
  msg += `- Null change: ${nullText}\n`;
@@ -1452,7 +1663,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => {
1452
1663
  }
1453
1664
  msg += "\n";
1454
1665
  }
1455
- msg += `\nNext: run split_dataset/export_dataset on fused output.`;
1666
+ msg += `\nNext: run split_dataset/export_dataset on fused output. Registered fused dataset id: ${fusedId}`;
1456
1667
  return { content: [{ type: "text", text: msg }] };
1457
1668
  }
1458
1669
  catch (error) {
@@ -1630,8 +1841,8 @@ async function main() {
1630
1841
  await runDownloadCli(args);
1631
1842
  return;
1632
1843
  }
1633
- // If run in setup mode OR in a terminal without args (human call), show setup wizard
1634
- if (isSetup || (process.stdin.isTTY && args.length === 0)) {
1844
+ // If run in explicit setup mode, show setup wizard (do not auto-run on server startup)
1845
+ if (isSetup) {
1635
1846
  await runSetupWizard(isSilent);
1636
1847
  return;
1637
1848
  }
@@ -1643,7 +1854,7 @@ async function main() {
1643
1854
  await server.connect(transport);
1644
1855
  console.error("Vesper MCP server running on stdio");
1645
1856
  console.error("Tip: To configure Vesper for your IDE, run: npx @vespermcp/mcp-server --setup");
1646
- console.log("[Vesper] Main loop finished");
1857
+ console.error("[Vesper] Main loop finished");
1647
1858
  }
1648
1859
  async function runConfigCli(args) {
1649
1860
  const isKeys = args.includes("keys");
@@ -1865,9 +2076,21 @@ async function runDownloadCli(args) {
1865
2076
  localPath = dl.local_path;
1866
2077
  const size = fs.existsSync(localPath) ? fs.statSync(localPath).size : 0;
1867
2078
  metadataStore.registerDownload(normalized, localPath, "completed", size);
2079
+ try {
2080
+ upsertRegistry(datasetId, localPath, "completed");
2081
+ }
2082
+ catch (e) {
2083
+ console.error(`[Registry] Failed to write registry for ${datasetId}: ${e?.message || e}`);
2084
+ }
1868
2085
  }
1869
2086
  else {
1870
2087
  localPath = await dataIngestor.ensureData(datasetId, source, (msg) => console.log(msg));
2088
+ try {
2089
+ upsertRegistry(datasetId, localPath, "completed");
2090
+ }
2091
+ catch (e) {
2092
+ console.error(`[Registry] Failed to write registry for ${datasetId}: ${e?.message || e}`);
2093
+ }
1871
2094
  }
1872
2095
  }
1873
2096
  catch (error) {
@@ -1940,21 +2163,21 @@ async function runFuseCli(args) {
1940
2163
  async function runSetupWizard(silent = false) {
1941
2164
  const configManager = new ConfigManager();
1942
2165
  if (!silent) {
1943
- console.log(`\nVesper MCP - Universal Setup`);
1944
- console.log(`================================`);
1945
- console.log(`Installing to all detected coding agents...\n`);
2166
+ console.error(`\nVesper MCP - Universal Setup`);
2167
+ console.error(`================================`);
2168
+ console.error(`Installing to all detected coding agents...\n`);
1946
2169
  }
1947
2170
  const result = await runWithSpinner("Installing to detected coding agents", () => configManager.installToAll());
1948
2171
  if (result.success.length === 0 && result.failed.length === 0) {
1949
2172
  if (!silent) {
1950
- console.log("\nNo supported agents detected.");
1951
- console.log("Supported agents: Claude Code, Claude Desktop, Cursor, VS Code, Codex, Antigravity");
1952
- console.log("\nMake sure at least one is installed, then try again.");
2173
+ console.error("\nNo supported agents detected.");
2174
+ console.error("Supported agents: Claude Code, Claude Desktop, Cursor, VS Code, Codex, Antigravity");
2175
+ console.error("\nMake sure at least one is installed, then try again.");
1953
2176
  }
1954
2177
  return;
1955
2178
  }
1956
2179
  if (!silent) {
1957
- console.log("Setup complete! Please RESTART your IDE(s) to apply changes.");
2180
+ console.error("Setup complete! Please RESTART your IDE(s) to apply changes.");
1958
2181
  }
1959
2182
  }
1960
2183
  main().catch((error) => {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vespermcp/mcp-server",
3
- "version": "1.2.12",
3
+ "version": "1.2.14",
4
4
  "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
5
5
  "type": "module",
6
6
  "main": "build/index.js",
@@ -0,0 +1,157 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const os = require('os');
4
+
5
+ const { argv, cwd } = process;
6
+
7
+ function usage() {
8
+ console.log(`Usage: node scripts/preindex_registry.cjs [--scan dir1 dir2 ...] [--target N] [--out path] [--no-count]
9
+
10
+ Options:
11
+ --scan Directories to recursively scan for datasets (default: ./e2e_demo_output ./datasets)
12
+ --target Target total registry entries (if larger than scanned, will synthesize entries)
13
+ --out Output registry path (default: ~/.vesper/registry.json)
14
+ --no-count Skip expensive row counting for CSV/JSONL
15
+ `);
16
+ }
17
+
18
+ let scanDirs = [];
19
+ let target = 0;
20
+ let outPath = path.join(os.homedir(), '.vesper', 'registry.json');
21
+ let doCount = true;
22
+
23
+ for (let i = 2; i < argv.length; i++) {
24
+ const a = argv[i];
25
+ if (a === '--scan') {
26
+ i++;
27
+ while (i < argv.length && !argv[i].startsWith('--')) {
28
+ scanDirs.push(argv[i]);
29
+ i++;
30
+ }
31
+ i--;
32
+ } else if (a === '--target') {
33
+ target = parseInt(argv[++i], 10) || 0;
34
+ } else if (a === '--out') {
35
+ outPath = path.resolve(argv[++i]);
36
+ } else if (a === '--no-count') {
37
+ doCount = false;
38
+ } else if (a === '--help' || a === '-h') {
39
+ usage();
40
+ process.exit(0);
41
+ } else {
42
+ console.error('Unknown arg', a);
43
+ usage();
44
+ process.exit(2);
45
+ }
46
+ }
47
+
48
+ if (scanDirs.length === 0) scanDirs = [path.join(cwd(), 'e2e_demo_output'), path.join(cwd(), 'datasets')];
49
+
50
+ function normalizeId(s) {
51
+ return s.replace(/[^a-z0-9]+/gi, '_').replace(/^_+|_+$/g, '').toLowerCase();
52
+ }
53
+
54
+ function walk(dir, exts = ['.csv', '.jsonl', '.json', '.arrow', '.parquet', '.feather']) {
55
+ const results = [];
56
+ try {
57
+ const items = fs.readdirSync(dir, { withFileTypes: true });
58
+ for (const it of items) {
59
+ const p = path.join(dir, it.name);
60
+ if (it.isDirectory()) results.push(...walk(p, exts));
61
+ else if (it.isFile()) {
62
+ const ext = path.extname(it.name).toLowerCase();
63
+ if (exts.includes(ext)) results.push(p);
64
+ }
65
+ }
66
+ } catch (e) {
67
+ // ignore
68
+ }
69
+ return results;
70
+ }
71
+
72
+ function countCsvRows(filePath) {
73
+ return new Promise((resolve, reject) => {
74
+ let count = 0;
75
+ const rs = fs.createReadStream(filePath, { encoding: 'utf8' });
76
+ rs.on('data', chunk => {
77
+ for (let i = 0; i < chunk.length; i++) if (chunk[i] === '\n') count++;
78
+ });
79
+ rs.on('end', () => resolve(count));
80
+ rs.on('error', reject);
81
+ });
82
+ }
83
+
84
+ (async function main() {
85
+ const registryDir = path.dirname(outPath);
86
+ if (!fs.existsSync(registryDir)) fs.mkdirSync(registryDir, { recursive: true });
87
+
88
+ let existing = [];
89
+ if (fs.existsSync(outPath)) {
90
+ try { existing = JSON.parse(fs.readFileSync(outPath, 'utf8')); } catch (e) { existing = []; }
91
+ }
92
+ const map = new Map();
93
+ for (const e of existing) map.set(e.normalized_id || e.id, e);
94
+
95
+ let scanned = 0;
96
+ for (const dir of scanDirs) {
97
+ const abs = path.resolve(dir);
98
+ const files = walk(abs);
99
+ for (const f of files) {
100
+ const stats = fs.statSync(f);
101
+ const base = path.basename(f, path.extname(f));
102
+ const rel = path.relative(process.cwd(), f);
103
+ const id = normalizeId(rel || base);
104
+ let cols = null;
105
+ let rows = null;
106
+ if (doCount && (f.endsWith('.csv') || f.endsWith('.jsonl') || f.endsWith('.json'))) {
107
+ try {
108
+ if (f.endsWith('.csv')) {
109
+ const header = fs.readFileSync(f, { encoding: 'utf8', flag: 'r' }).split(/\r?\n/, 1)[0] || '';
110
+ cols = header ? header.split(',').length : 0;
111
+ rows = await countCsvRows(f);
112
+ } else if (f.endsWith('.jsonl')) {
113
+ rows = await countCsvRows(f);
114
+ }
115
+ } catch (e) {
116
+ // ignore
117
+ }
118
+ }
119
+ const entry = {
120
+ id: id,
121
+ normalized_id: id,
122
+ source: 'scanned',
123
+ path: f,
124
+ size: stats.size,
125
+ mtime: stats.mtime.toISOString(),
126
+ meta: { rows, cols }
127
+ };
128
+ map.set(id, entry);
129
+ scanned++;
130
+ }
131
+ }
132
+
133
+ // Synthesize if target requested
134
+ if (target > map.size) {
135
+ const synthCount = target - map.size;
136
+ const synthDir = path.join(path.dirname(outPath), 'local_library');
137
+ if (!fs.existsSync(synthDir)) fs.mkdirSync(synthDir, { recursive: true });
138
+ for (let i = 1; i <= synthCount; i++) {
139
+ const idx = map.size + i;
140
+ const id = `synth_${String(idx).padStart(6, '0')}`;
141
+ const entry = {
142
+ id,
143
+ normalized_id: id,
144
+ source: 'synthesized',
145
+ path: path.join(synthDir, `${id}.csv`),
146
+ size: 0,
147
+ mtime: new Date().toISOString(),
148
+ meta: { rows: Math.floor(Math.random() * 1000000), cols: Math.floor(Math.random() * 200) + 1 }
149
+ };
150
+ map.set(id, entry);
151
+ }
152
+ }
153
+
154
+ const outArr = Array.from(map.values());
155
+ fs.writeFileSync(outPath, JSON.stringify(outArr, null, 2), 'utf8');
156
+ console.log(`Wrote ${outArr.length} registry entries to ${outPath} (${scanned} scanned, ${Math.max(0, outArr.length - scanned)} synthesized)`);
157
+ })();