vesper-wizard 2.3.1 → 2.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/README.md +37 -322
  2. package/package.json +34 -100
  3. package/vesper-mcp-config.json +6 -0
  4. package/{scripts/wizard.js → wizard.js} +1 -1
  5. package/LICENSE +0 -21
  6. package/build/cache/cdn.js +0 -34
  7. package/build/cache/service.js +0 -63
  8. package/build/cleaning/cleaner.js +0 -81
  9. package/build/cleaning/evaluator.js +0 -89
  10. package/build/cleaning/executor.js +0 -62
  11. package/build/cleaning/exporter.js +0 -87
  12. package/build/cleaning/planner.js +0 -127
  13. package/build/cleaning/rules.js +0 -57
  14. package/build/cleaning/types.js +0 -1
  15. package/build/cloud/adapters/local.js +0 -37
  16. package/build/cloud/adapters/s3.js +0 -24
  17. package/build/cloud/adapters/supabase.js +0 -49
  18. package/build/cloud/storage-manager.js +0 -26
  19. package/build/cloud/types.js +0 -1
  20. package/build/compliance/service.js +0 -73
  21. package/build/compliance/store.js +0 -80
  22. package/build/compliance/types.js +0 -1
  23. package/build/config/config-manager.js +0 -221
  24. package/build/config/secure-keys.js +0 -51
  25. package/build/config/user-config.js +0 -48
  26. package/build/data/processing-worker.js +0 -23
  27. package/build/data/streaming.js +0 -38
  28. package/build/data/worker-pool.js +0 -39
  29. package/build/export/exporter.js +0 -82
  30. package/build/export/packager.js +0 -100
  31. package/build/export/types.js +0 -1
  32. package/build/fusion/aligner.js +0 -56
  33. package/build/fusion/deduplicator.js +0 -69
  34. package/build/fusion/engine.js +0 -69
  35. package/build/fusion/harmonizer.js +0 -39
  36. package/build/fusion/orchestrator.js +0 -86
  37. package/build/fusion/types.js +0 -1
  38. package/build/gateway/unified-dataset-gateway.js +0 -410
  39. package/build/index.js +0 -3068
  40. package/build/ingestion/hf-downloader.js +0 -171
  41. package/build/ingestion/ingestor.js +0 -271
  42. package/build/ingestion/kaggle-downloader.js +0 -102
  43. package/build/install/install-service.js +0 -46
  44. package/build/jobs/manager.js +0 -136
  45. package/build/jobs/queue.js +0 -59
  46. package/build/jobs/types.js +0 -1
  47. package/build/lib/supabase.js +0 -3
  48. package/build/metadata/dataworld-source.js +0 -89
  49. package/build/metadata/domain.js +0 -147
  50. package/build/metadata/github-scraper.js +0 -47
  51. package/build/metadata/institutional-scrapers.js +0 -49
  52. package/build/metadata/kaggle-scraper.js +0 -182
  53. package/build/metadata/kaggle-source.js +0 -70
  54. package/build/metadata/license.js +0 -68
  55. package/build/metadata/monitoring-service.js +0 -107
  56. package/build/metadata/monitoring-store.js +0 -78
  57. package/build/metadata/monitoring-types.js +0 -1
  58. package/build/metadata/openml-source.js +0 -87
  59. package/build/metadata/quality.js +0 -48
  60. package/build/metadata/rate-limiter.js +0 -128
  61. package/build/metadata/scraper.js +0 -448
  62. package/build/metadata/store.js +0 -340
  63. package/build/metadata/types.js +0 -1
  64. package/build/metadata/uci-scraper.js +0 -49
  65. package/build/monitoring/observability.js +0 -76
  66. package/build/preparation/target-detector.js +0 -75
  67. package/build/python/__pycache__/config.cpython-312.pyc +0 -0
  68. package/build/python/asset_downloader_engine.py +0 -94
  69. package/build/python/cleaner.py +0 -226
  70. package/build/python/config.py +0 -263
  71. package/build/python/convert_engine.py +0 -92
  72. package/build/python/dataworld_engine.py +0 -208
  73. package/build/python/export_engine.py +0 -288
  74. package/build/python/framework_adapters.py +0 -100
  75. package/build/python/fusion_engine.py +0 -368
  76. package/build/python/github_adapter.py +0 -106
  77. package/build/python/hf_fallback.py +0 -298
  78. package/build/python/image_engine.py +0 -86
  79. package/build/python/kaggle_engine.py +0 -295
  80. package/build/python/media_engine.py +0 -133
  81. package/build/python/nasa_adapter.py +0 -82
  82. package/build/python/normalize_engine.py +0 -83
  83. package/build/python/openml_engine.py +0 -146
  84. package/build/python/quality_engine.py +0 -267
  85. package/build/python/row_count.py +0 -54
  86. package/build/python/splitter_engine.py +0 -283
  87. package/build/python/target_engine.py +0 -154
  88. package/build/python/test_framework_adapters.py +0 -61
  89. package/build/python/test_fusion_engine.py +0 -89
  90. package/build/python/uci_adapter.py +0 -94
  91. package/build/python/vesper/__init__.py +0 -1
  92. package/build/python/vesper/__pycache__/__init__.cpython-312.pyc +0 -0
  93. package/build/python/vesper/core/__init__.py +0 -1
  94. package/build/python/vesper/core/__pycache__/__init__.cpython-312.pyc +0 -0
  95. package/build/python/vesper/core/__pycache__/asset_downloader.cpython-312.pyc +0 -0
  96. package/build/python/vesper/core/__pycache__/download_recipe.cpython-312.pyc +0 -0
  97. package/build/python/vesper/core/asset_downloader.py +0 -679
  98. package/build/python/vesper/core/download_recipe.py +0 -104
  99. package/build/python/worldbank_adapter.py +0 -99
  100. package/build/quality/analyzer.js +0 -93
  101. package/build/quality/image-analyzer.js +0 -114
  102. package/build/quality/media-analyzer.js +0 -115
  103. package/build/quality/quality-orchestrator.js +0 -162
  104. package/build/quality/types.js +0 -1
  105. package/build/scripts/build-index.js +0 -54
  106. package/build/scripts/check-db.js +0 -73
  107. package/build/scripts/check-jobs.js +0 -24
  108. package/build/scripts/check-naruto.js +0 -17
  109. package/build/scripts/cleanup-kaggle.js +0 -41
  110. package/build/scripts/demo-full-pipeline.js +0 -62
  111. package/build/scripts/demo-ui.js +0 -58
  112. package/build/scripts/e2e-demo.js +0 -72
  113. package/build/scripts/massive-scrape.js +0 -103
  114. package/build/scripts/ops-dashboard.js +0 -33
  115. package/build/scripts/repro-bug.js +0 -37
  116. package/build/scripts/repro-export-bug.js +0 -56
  117. package/build/scripts/scrape-metadata.js +0 -100
  118. package/build/scripts/search-cli.js +0 -26
  119. package/build/scripts/test-bias.js +0 -45
  120. package/build/scripts/test-caching.js +0 -51
  121. package/build/scripts/test-cleaning.js +0 -76
  122. package/build/scripts/test-cloud-storage.js +0 -48
  123. package/build/scripts/test-compliance.js +0 -58
  124. package/build/scripts/test-conversion.js +0 -64
  125. package/build/scripts/test-custom-rules.js +0 -58
  126. package/build/scripts/test-db-opt.js +0 -63
  127. package/build/scripts/test-export-custom.js +0 -33
  128. package/build/scripts/test-exporter.js +0 -53
  129. package/build/scripts/test-fusion.js +0 -61
  130. package/build/scripts/test-github.js +0 -27
  131. package/build/scripts/test-group-split.js +0 -52
  132. package/build/scripts/test-hf-download.js +0 -29
  133. package/build/scripts/test-holdout-manager.js +0 -61
  134. package/build/scripts/test-hybrid-search.js +0 -41
  135. package/build/scripts/test-image-analysis.js +0 -50
  136. package/build/scripts/test-ingestion-infra.js +0 -39
  137. package/build/scripts/test-install.js +0 -40
  138. package/build/scripts/test-institutional.js +0 -26
  139. package/build/scripts/test-integrity.js +0 -41
  140. package/build/scripts/test-jit.js +0 -42
  141. package/build/scripts/test-job-queue.js +0 -62
  142. package/build/scripts/test-kaggle-download.js +0 -34
  143. package/build/scripts/test-large-data.js +0 -50
  144. package/build/scripts/test-mcp-v5.js +0 -74
  145. package/build/scripts/test-media-analysis.js +0 -61
  146. package/build/scripts/test-monitoring.js +0 -91
  147. package/build/scripts/test-observability.js +0 -106
  148. package/build/scripts/test-packager.js +0 -55
  149. package/build/scripts/test-pipeline.js +0 -50
  150. package/build/scripts/test-planning.js +0 -64
  151. package/build/scripts/test-privacy.js +0 -38
  152. package/build/scripts/test-production-sync.js +0 -36
  153. package/build/scripts/test-quality.js +0 -43
  154. package/build/scripts/test-robust-ingestion.js +0 -41
  155. package/build/scripts/test-schema.js +0 -45
  156. package/build/scripts/test-split-validation.js +0 -40
  157. package/build/scripts/test-splitter.js +0 -93
  158. package/build/scripts/test-target-detector.js +0 -29
  159. package/build/scripts/test-uci.js +0 -27
  160. package/build/scripts/test-unified-quality.js +0 -86
  161. package/build/scripts/test-write.js +0 -14
  162. package/build/scripts/verify-integration.js +0 -57
  163. package/build/scripts/verify-priority.js +0 -33
  164. package/build/search/embedder.js +0 -34
  165. package/build/search/engine.js +0 -190
  166. package/build/search/jit-orchestrator.js +0 -262
  167. package/build/search/query-intent.js +0 -509
  168. package/build/search/vector-store.js +0 -123
  169. package/build/splitting/splitter.js +0 -82
  170. package/build/splitting/types.js +0 -1
  171. package/build/tools/formatter.js +0 -251
  172. package/build/utils/downloader.js +0 -52
  173. package/build/utils/python-runtime.js +0 -130
  174. package/build/utils/selector.js +0 -69
  175. package/mcp-config-template.json +0 -18
  176. package/scripts/postinstall.cjs +0 -170
  177. package/scripts/preindex_registry.cjs +0 -157
  178. package/scripts/refresh-index.cjs +0 -87
  179. package/scripts/wizard.cjs +0 -601
  180. package/src/python/__pycache__/config.cpython-312.pyc +0 -0
  181. package/src/python/__pycache__/export_engine.cpython-312.pyc +0 -0
  182. package/src/python/__pycache__/framework_adapters.cpython-312.pyc +0 -0
  183. package/src/python/__pycache__/fusion_engine.cpython-312.pyc +0 -0
  184. package/src/python/__pycache__/kaggle_engine.cpython-312.pyc +0 -0
  185. package/src/python/asset_downloader_engine.py +0 -94
  186. package/src/python/cleaner.py +0 -226
  187. package/src/python/config.py +0 -263
  188. package/src/python/convert_engine.py +0 -92
  189. package/src/python/dataworld_engine.py +0 -208
  190. package/src/python/export_engine.py +0 -288
  191. package/src/python/framework_adapters.py +0 -100
  192. package/src/python/fusion_engine.py +0 -368
  193. package/src/python/github_adapter.py +0 -106
  194. package/src/python/hf_fallback.py +0 -298
  195. package/src/python/image_engine.py +0 -86
  196. package/src/python/kaggle_engine.py +0 -295
  197. package/src/python/media_engine.py +0 -133
  198. package/src/python/nasa_adapter.py +0 -82
  199. package/src/python/normalize_engine.py +0 -83
  200. package/src/python/openml_engine.py +0 -146
  201. package/src/python/quality_engine.py +0 -267
  202. package/src/python/requirements.txt +0 -12
  203. package/src/python/row_count.py +0 -54
  204. package/src/python/splitter_engine.py +0 -283
  205. package/src/python/target_engine.py +0 -154
  206. package/src/python/test_framework_adapters.py +0 -61
  207. package/src/python/test_fusion_engine.py +0 -89
  208. package/src/python/uci_adapter.py +0 -94
  209. package/src/python/vesper/__init__.py +0 -1
  210. package/src/python/vesper/core/__init__.py +0 -1
  211. package/src/python/vesper/core/asset_downloader.py +0 -679
  212. package/src/python/vesper/core/download_recipe.py +0 -104
  213. package/src/python/worldbank_adapter.py +0 -99
  214. package/wizard.cjs +0 -3
@@ -1,170 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- const { execSync } = require('child_process');
4
- const fs = require('fs');
5
- const os = require('os');
6
- const path = require('path');
7
-
8
- console.log('\n🚀 Setting up Vesper MCP Server...\n');
9
-
10
- function getPythonBootstrapCommand() {
11
- const attempts = process.platform === 'win32'
12
- ? ['py -3', 'python']
13
- : ['python3', 'python'];
14
-
15
- for (const command of attempts) {
16
- try {
17
- execSync(`${command} --version`, { stdio: 'pipe' });
18
- return command;
19
- } catch {
20
- // try next command
21
- }
22
- }
23
-
24
- return null;
25
- }
26
-
27
- // 1. Check for Python
28
- const pythonBootstrap = getPythonBootstrapCommand();
29
- try {
30
- if (!pythonBootstrap) {
31
- throw new Error('Python not found');
32
- }
33
- console.log('✅ Python found');
34
- } catch (e) {
35
- console.warn('⚠️ Python not found. Please install Python 3.8+ for full functionality.');
36
- console.warn(' Image/audio/video analysis features will not work without Python.\n');
37
- process.exit(0); // Don't fail installation
38
- }
39
-
40
- const homeDir = os.homedir() || process.env.HOME || process.env.USERPROFILE;
41
- const vesperDataDir = path.join(homeDir, '.vesper');
42
- const managedVenvDir = path.join(vesperDataDir, '.venv');
43
- const managedPython = process.platform === 'win32'
44
- ? path.join(managedVenvDir, 'Scripts', 'python.exe')
45
- : path.join(managedVenvDir, 'bin', 'python');
46
- const requirementsPath = path.resolve(__dirname, '..', 'src', 'python', 'requirements.txt');
47
-
48
- // 2. Create data directories
49
- const dirs = [
50
- vesperDataDir,
51
- path.join(vesperDataDir, 'data'),
52
- path.join(vesperDataDir, 'data', 'raw'),
53
- path.join(vesperDataDir, 'data', 'processed'),
54
- path.join(vesperDataDir, 'datasets')
55
- ];
56
-
57
- dirs.forEach(dir => {
58
- if (!fs.existsSync(dir)) {
59
- fs.mkdirSync(dir, { recursive: true });
60
- }
61
- });
62
-
63
- console.log(`✅ Data directories created at ${vesperDataDir}`);
64
-
65
- // 3. Create a managed Vesper Python environment
66
- console.log('\n🐍 Preparing managed Python environment...');
67
- try {
68
- if (!fs.existsSync(managedPython)) {
69
- execSync(`${pythonBootstrap} -m venv "${managedVenvDir}"`, {
70
- stdio: 'inherit',
71
- timeout: 180000,
72
- });
73
- }
74
- console.log(`✅ Managed Python ready at ${managedVenvDir}`);
75
- } catch (e) {
76
- console.warn('⚠️ Failed to create the managed Vesper Python environment.');
77
- console.warn(` Vesper will fall back to PATH Python and may need to self-heal at runtime. ${(e && e.message) || ''}`.trim());
78
- }
79
-
80
- // 4. Install Python dependencies into the managed environment
81
- console.log('\n📦 Installing Python dependencies...');
82
- const pythonPackages = [
83
- 'opencv-python',
84
- 'pillow',
85
- 'librosa',
86
- 'soundfile',
87
- 'pyarrow'
88
- ];
89
-
90
- try {
91
- const targetPython = fs.existsSync(managedPython) ? `"${managedPython}"` : pythonBootstrap;
92
- execSync(`${targetPython} -m pip install --disable-pip-version-check --upgrade pip`, {
93
- stdio: 'inherit',
94
- timeout: 180000,
95
- });
96
- execSync(`${targetPython} -m pip install --disable-pip-version-check -r "${requirementsPath}" ${pythonPackages.join(' ')}`, {
97
- stdio: 'inherit',
98
- timeout: 600000,
99
- });
100
- console.log('✅ Python dependencies installed');
101
- } catch (e) {
102
- console.warn('⚠️ Failed to install some Python dependencies.');
103
- console.warn(' You may need to install them manually into the Vesper runtime:');
104
- console.warn(` ${fs.existsSync(managedPython) ? managedPython : pythonBootstrap} -m pip install -r "${requirementsPath}" ${pythonPackages.join(' ')}\n`);
105
- }
106
-
107
- // 5. Rebuild better-sqlite3 for current Node.js version
108
- console.log('\n🔧 Rebuilding native modules for current Node.js...');
109
- try {
110
- execSync('npm rebuild better-sqlite3', {
111
- stdio: 'pipe',
112
- timeout: 60000,
113
- cwd: path.resolve(__dirname, '..')
114
- });
115
- console.log('✅ Native modules rebuilt successfully');
116
- } catch (e) {
117
- console.warn('⚠️ Could not rebuild better-sqlite3: ' + (e.message || e));
118
- console.warn(' If you see ERR_DLOPEN_FAILED, run: npm rebuild better-sqlite3');
119
- }
120
-
121
- // 6. Auto-configure Claude Desktop (Best Effort)
122
- console.log('\n⚙️ Attempting to auto-configure Claude Desktop...');
123
-
124
- function getClaudeConfigPath() {
125
- const platform = process.platform;
126
- const home = process.env.HOME || process.env.USERPROFILE;
127
-
128
- if (platform === 'win32') {
129
- return path.join(process.env.APPDATA, 'Claude', 'claude_desktop_config.json');
130
- } else if (platform === 'darwin') {
131
- return path.join(home, 'Library', 'Application Support', 'Claude', 'claude_desktop_config.json');
132
- }
133
- return null;
134
- }
135
-
136
- const configPath = getClaudeConfigPath();
137
-
138
- if (configPath && fs.existsSync(configPath)) {
139
- try {
140
- const configContent = fs.readFileSync(configPath, 'utf8');
141
- let config = JSON.parse(configContent);
142
-
143
- if (!config.mcpServers) config.mcpServers = {};
144
-
145
- if (!config.mcpServers.vesper) {
146
- config.mcpServers.vesper = {
147
- command: "vesper",
148
- args: [],
149
- env: {
150
- "HF_TOKEN": ""
151
- }
152
- };
153
-
154
- fs.writeFileSync(configPath, JSON.stringify(config, null, 2));
155
- console.log(`✅ Automatically added 'vesper' to ${configPath}`);
156
- } else {
157
- console.log(`ℹ️ 'vesper' is already configured in ${configPath}`);
158
- }
159
- } catch (e) {
160
- console.warn(`⚠️ Could not auto-configure Claude Desktop: ${e.message}`);
161
- }
162
- } else {
163
- console.log('ℹ️ Claude Desktop config not found (skipping auto-config)');
164
- }
165
-
166
- console.log('\n✨ Vesper MCP Server installed successfully!\n');
167
- console.log('📖 Next steps:');
168
- console.log(' 1. Restart your AI assistant (Cursor/Claude)');
169
- console.log(' 2. Try: search_datasets(query="sentiment analysis")');
170
- console.log('\n💡 For full documentation, visit: https://github.com/vesper/mcp-server\n');
@@ -1,157 +0,0 @@
1
- const fs = require('fs');
2
- const path = require('path');
3
- const os = require('os');
4
-
5
- const { argv, cwd } = process;
6
-
7
- function usage() {
8
- console.log(`Usage: node scripts/preindex_registry.cjs [--scan dir1 dir2 ...] [--target N] [--out path] [--no-count]
9
-
10
- Options:
11
- --scan Directories to recursively scan for datasets (default: ./e2e_demo_output ./datasets)
12
- --target Target total registry entries (if larger than scanned, will synthesize entries)
13
- --out Output registry path (default: ~/.vesper/registry.json)
14
- --no-count Skip expensive row counting for CSV/JSONL
15
- `);
16
- }
17
-
18
- let scanDirs = [];
19
- let target = 0;
20
- let outPath = path.join(os.homedir(), '.vesper', 'registry.json');
21
- let doCount = true;
22
-
23
- for (let i = 2; i < argv.length; i++) {
24
- const a = argv[i];
25
- if (a === '--scan') {
26
- i++;
27
- while (i < argv.length && !argv[i].startsWith('--')) {
28
- scanDirs.push(argv[i]);
29
- i++;
30
- }
31
- i--;
32
- } else if (a === '--target') {
33
- target = parseInt(argv[++i], 10) || 0;
34
- } else if (a === '--out') {
35
- outPath = path.resolve(argv[++i]);
36
- } else if (a === '--no-count') {
37
- doCount = false;
38
- } else if (a === '--help' || a === '-h') {
39
- usage();
40
- process.exit(0);
41
- } else {
42
- console.error('Unknown arg', a);
43
- usage();
44
- process.exit(2);
45
- }
46
- }
47
-
48
- if (scanDirs.length === 0) scanDirs = [path.join(cwd(), 'e2e_demo_output'), path.join(cwd(), 'datasets')];
49
-
50
- function normalizeId(s) {
51
- return s.replace(/[^a-z0-9]+/gi, '_').replace(/^_+|_+$/g, '').toLowerCase();
52
- }
53
-
54
- function walk(dir, exts = ['.csv', '.jsonl', '.json', '.arrow', '.parquet', '.feather']) {
55
- const results = [];
56
- try {
57
- const items = fs.readdirSync(dir, { withFileTypes: true });
58
- for (const it of items) {
59
- const p = path.join(dir, it.name);
60
- if (it.isDirectory()) results.push(...walk(p, exts));
61
- else if (it.isFile()) {
62
- const ext = path.extname(it.name).toLowerCase();
63
- if (exts.includes(ext)) results.push(p);
64
- }
65
- }
66
- } catch (e) {
67
- // ignore
68
- }
69
- return results;
70
- }
71
-
72
- function countCsvRows(filePath) {
73
- return new Promise((resolve, reject) => {
74
- let count = 0;
75
- const rs = fs.createReadStream(filePath, { encoding: 'utf8' });
76
- rs.on('data', chunk => {
77
- for (let i = 0; i < chunk.length; i++) if (chunk[i] === '\n') count++;
78
- });
79
- rs.on('end', () => resolve(count));
80
- rs.on('error', reject);
81
- });
82
- }
83
-
84
- (async function main() {
85
- const registryDir = path.dirname(outPath);
86
- if (!fs.existsSync(registryDir)) fs.mkdirSync(registryDir, { recursive: true });
87
-
88
- let existing = [];
89
- if (fs.existsSync(outPath)) {
90
- try { existing = JSON.parse(fs.readFileSync(outPath, 'utf8')); } catch (e) { existing = []; }
91
- }
92
- const map = new Map();
93
- for (const e of existing) map.set(e.normalized_id || e.id, e);
94
-
95
- let scanned = 0;
96
- for (const dir of scanDirs) {
97
- const abs = path.resolve(dir);
98
- const files = walk(abs);
99
- for (const f of files) {
100
- const stats = fs.statSync(f);
101
- const base = path.basename(f, path.extname(f));
102
- const rel = path.relative(process.cwd(), f);
103
- const id = normalizeId(rel || base);
104
- let cols = null;
105
- let rows = null;
106
- if (doCount && (f.endsWith('.csv') || f.endsWith('.jsonl') || f.endsWith('.json'))) {
107
- try {
108
- if (f.endsWith('.csv')) {
109
- const header = fs.readFileSync(f, { encoding: 'utf8', flag: 'r' }).split(/\r?\n/, 1)[0] || '';
110
- cols = header ? header.split(',').length : 0;
111
- rows = await countCsvRows(f);
112
- } else if (f.endsWith('.jsonl')) {
113
- rows = await countCsvRows(f);
114
- }
115
- } catch (e) {
116
- // ignore
117
- }
118
- }
119
- const entry = {
120
- id: id,
121
- normalized_id: id,
122
- source: 'scanned',
123
- path: f,
124
- size: stats.size,
125
- mtime: stats.mtime.toISOString(),
126
- meta: { rows, cols }
127
- };
128
- map.set(id, entry);
129
- scanned++;
130
- }
131
- }
132
-
133
- // Synthesize if target requested
134
- if (target > map.size) {
135
- const synthCount = target - map.size;
136
- const synthDir = path.join(path.dirname(outPath), 'local_library');
137
- if (!fs.existsSync(synthDir)) fs.mkdirSync(synthDir, { recursive: true });
138
- for (let i = 1; i <= synthCount; i++) {
139
- const idx = map.size + i;
140
- const id = `synth_${String(idx).padStart(6, '0')}`;
141
- const entry = {
142
- id,
143
- normalized_id: id,
144
- source: 'synthesized',
145
- path: path.join(synthDir, `${id}.csv`),
146
- size: 0,
147
- mtime: new Date().toISOString(),
148
- meta: { rows: Math.floor(Math.random() * 1000000), cols: Math.floor(Math.random() * 200) + 1 }
149
- };
150
- map.set(id, entry);
151
- }
152
- }
153
-
154
- const outArr = Array.from(map.values());
155
- fs.writeFileSync(outPath, JSON.stringify(outArr, null, 2), 'utf8');
156
- console.log(`Wrote ${outArr.length} registry entries to ${outPath} (${scanned} scanned, ${Math.max(0, outArr.length - scanned)} synthesized)`);
157
- })();
@@ -1,87 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- const { spawnSync } = require("child_process");
4
- const fs = require("fs");
5
- const path = require("path");
6
- const os = require("os");
7
- const Database = require("better-sqlite3");
8
-
9
- function runCommand(command, args, options = {}) {
10
- const result = spawnSync(command, args, {
11
- stdio: "inherit",
12
- shell: process.platform === "win32",
13
- ...options,
14
- });
15
-
16
- if (result.status !== 0) {
17
- throw new Error(`Command failed: ${command} ${args.join(" ")} (exit ${result.status})`);
18
- }
19
- }
20
-
21
- function countDatasets(dbPath) {
22
- if (!fs.existsSync(dbPath)) return "N/A";
23
- const db = new Database(dbPath);
24
- const count = db.prepare("SELECT COUNT(*) AS c FROM datasets").get().c;
25
- db.close();
26
- return count;
27
- }
28
-
29
- function countVectors(jsonPath) {
30
- if (!fs.existsSync(jsonPath)) return "N/A";
31
- const data = JSON.parse(fs.readFileSync(jsonPath, "utf8"));
32
- if (typeof data.count === "number") return data.count;
33
- if (Array.isArray(data.ids)) return data.ids.length;
34
- return "N/A";
35
- }
36
-
37
- function syncRuntime(workspaceRoot) {
38
- const runtimeDir = path.join(os.homedir(), ".vesper", "data");
39
- fs.mkdirSync(runtimeDir, { recursive: true });
40
-
41
- const files = ["metadata.db", "vectors.json", "vectors.bin"];
42
- for (const file of files) {
43
- const src = path.join(workspaceRoot, "data", file);
44
- const dest = path.join(runtimeDir, file);
45
- if (!fs.existsSync(src)) {
46
- throw new Error(`Missing source file: ${src}`);
47
- }
48
- fs.copyFileSync(src, dest);
49
- }
50
-
51
- return runtimeDir;
52
- }
53
-
54
- function main() {
55
- const workspaceRoot = process.cwd();
56
- const runtimeDbPath = path.join(os.homedir(), ".vesper", "data", "metadata.db");
57
- const runtimeVecPath = path.join(os.homedir(), ".vesper", "data", "vectors.json");
58
- const workspaceDbPath = path.join(workspaceRoot, "data", "metadata.db");
59
- const workspaceVecPath = path.join(workspaceRoot, "data", "vectors.json");
60
-
61
- console.log("\n[refresh-index] Step 1/3: Massive scrape...");
62
- runCommand("npm", ["run", "massive-scrape"]);
63
-
64
- console.log("\n[refresh-index] Step 2/3: High-memory indexing...");
65
- const env = { ...process.env, NODE_OPTIONS: "--max-old-space-size=8192" };
66
- runCommand("npm", ["run", "index"], { env });
67
-
68
- console.log("\n[refresh-index] Step 3/3: Sync workspace index to runtime...");
69
- const runtimeDir = syncRuntime(workspaceRoot);
70
-
71
- const wsDb = countDatasets(workspaceDbPath);
72
- const wsVec = countVectors(workspaceVecPath);
73
- const rtDb = countDatasets(runtimeDbPath);
74
- const rtVec = countVectors(runtimeVecPath);
75
-
76
- console.log("\n[refresh-index] Completed successfully.");
77
- console.log(`[refresh-index] Workspace: DB=${wsDb}, VECTORS=${wsVec}`);
78
- console.log(`[refresh-index] Runtime: DB=${rtDb}, VECTORS=${rtVec}`);
79
- console.log(`[refresh-index] Runtime path: ${runtimeDir}\n`);
80
- }
81
-
82
- try {
83
- main();
84
- } catch (error) {
85
- console.error("\n[refresh-index] Failed:", error.message);
86
- process.exit(1);
87
- }