vesper-wizard 2.3.0 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. package/README.md +37 -322
  2. package/package.json +34 -100
  3. package/vesper-mcp-config.json +6 -0
  4. package/{scripts/wizard.js → wizard.js} +34 -10
  5. package/LICENSE +0 -21
  6. package/build/cache/cdn.js +0 -34
  7. package/build/cache/service.js +0 -63
  8. package/build/cleaning/cleaner.js +0 -81
  9. package/build/cleaning/evaluator.js +0 -89
  10. package/build/cleaning/executor.js +0 -62
  11. package/build/cleaning/exporter.js +0 -87
  12. package/build/cleaning/planner.js +0 -127
  13. package/build/cleaning/rules.js +0 -57
  14. package/build/cleaning/types.js +0 -1
  15. package/build/cloud/adapters/local.js +0 -37
  16. package/build/cloud/adapters/s3.js +0 -24
  17. package/build/cloud/adapters/supabase.js +0 -49
  18. package/build/cloud/storage-manager.js +0 -26
  19. package/build/cloud/types.js +0 -1
  20. package/build/compliance/service.js +0 -73
  21. package/build/compliance/store.js +0 -80
  22. package/build/compliance/types.js +0 -1
  23. package/build/config/config-manager.js +0 -221
  24. package/build/config/secure-keys.js +0 -51
  25. package/build/config/user-config.js +0 -48
  26. package/build/data/processing-worker.js +0 -23
  27. package/build/data/streaming.js +0 -38
  28. package/build/data/worker-pool.js +0 -39
  29. package/build/export/exporter.js +0 -82
  30. package/build/export/packager.js +0 -100
  31. package/build/export/types.js +0 -1
  32. package/build/fusion/aligner.js +0 -56
  33. package/build/fusion/deduplicator.js +0 -69
  34. package/build/fusion/engine.js +0 -69
  35. package/build/fusion/harmonizer.js +0 -39
  36. package/build/fusion/orchestrator.js +0 -86
  37. package/build/fusion/types.js +0 -1
  38. package/build/gateway/unified-dataset-gateway.js +0 -410
  39. package/build/index.js +0 -3068
  40. package/build/ingestion/hf-downloader.js +0 -171
  41. package/build/ingestion/ingestor.js +0 -271
  42. package/build/ingestion/kaggle-downloader.js +0 -102
  43. package/build/install/install-service.js +0 -46
  44. package/build/jobs/manager.js +0 -136
  45. package/build/jobs/queue.js +0 -59
  46. package/build/jobs/types.js +0 -1
  47. package/build/lib/supabase.js +0 -3
  48. package/build/metadata/dataworld-source.js +0 -89
  49. package/build/metadata/domain.js +0 -147
  50. package/build/metadata/github-scraper.js +0 -47
  51. package/build/metadata/institutional-scrapers.js +0 -49
  52. package/build/metadata/kaggle-scraper.js +0 -182
  53. package/build/metadata/kaggle-source.js +0 -70
  54. package/build/metadata/license.js +0 -68
  55. package/build/metadata/monitoring-service.js +0 -107
  56. package/build/metadata/monitoring-store.js +0 -78
  57. package/build/metadata/monitoring-types.js +0 -1
  58. package/build/metadata/openml-source.js +0 -87
  59. package/build/metadata/quality.js +0 -48
  60. package/build/metadata/rate-limiter.js +0 -128
  61. package/build/metadata/scraper.js +0 -448
  62. package/build/metadata/store.js +0 -340
  63. package/build/metadata/types.js +0 -1
  64. package/build/metadata/uci-scraper.js +0 -49
  65. package/build/monitoring/observability.js +0 -76
  66. package/build/preparation/target-detector.js +0 -75
  67. package/build/python/__pycache__/config.cpython-312.pyc +0 -0
  68. package/build/python/asset_downloader_engine.py +0 -94
  69. package/build/python/cleaner.py +0 -226
  70. package/build/python/config.py +0 -263
  71. package/build/python/convert_engine.py +0 -92
  72. package/build/python/dataworld_engine.py +0 -208
  73. package/build/python/export_engine.py +0 -288
  74. package/build/python/framework_adapters.py +0 -100
  75. package/build/python/fusion_engine.py +0 -368
  76. package/build/python/github_adapter.py +0 -106
  77. package/build/python/hf_fallback.py +0 -298
  78. package/build/python/image_engine.py +0 -86
  79. package/build/python/kaggle_engine.py +0 -295
  80. package/build/python/media_engine.py +0 -133
  81. package/build/python/nasa_adapter.py +0 -82
  82. package/build/python/normalize_engine.py +0 -83
  83. package/build/python/openml_engine.py +0 -146
  84. package/build/python/quality_engine.py +0 -267
  85. package/build/python/row_count.py +0 -54
  86. package/build/python/splitter_engine.py +0 -283
  87. package/build/python/target_engine.py +0 -154
  88. package/build/python/test_framework_adapters.py +0 -61
  89. package/build/python/test_fusion_engine.py +0 -89
  90. package/build/python/uci_adapter.py +0 -94
  91. package/build/python/vesper/__init__.py +0 -1
  92. package/build/python/vesper/__pycache__/__init__.cpython-312.pyc +0 -0
  93. package/build/python/vesper/core/__init__.py +0 -1
  94. package/build/python/vesper/core/__pycache__/__init__.cpython-312.pyc +0 -0
  95. package/build/python/vesper/core/__pycache__/asset_downloader.cpython-312.pyc +0 -0
  96. package/build/python/vesper/core/__pycache__/download_recipe.cpython-312.pyc +0 -0
  97. package/build/python/vesper/core/asset_downloader.py +0 -679
  98. package/build/python/vesper/core/download_recipe.py +0 -104
  99. package/build/python/worldbank_adapter.py +0 -99
  100. package/build/quality/analyzer.js +0 -93
  101. package/build/quality/image-analyzer.js +0 -114
  102. package/build/quality/media-analyzer.js +0 -115
  103. package/build/quality/quality-orchestrator.js +0 -162
  104. package/build/quality/types.js +0 -1
  105. package/build/scripts/build-index.js +0 -54
  106. package/build/scripts/check-db.js +0 -73
  107. package/build/scripts/check-jobs.js +0 -24
  108. package/build/scripts/check-naruto.js +0 -17
  109. package/build/scripts/cleanup-kaggle.js +0 -41
  110. package/build/scripts/demo-full-pipeline.js +0 -62
  111. package/build/scripts/demo-ui.js +0 -58
  112. package/build/scripts/e2e-demo.js +0 -72
  113. package/build/scripts/massive-scrape.js +0 -103
  114. package/build/scripts/ops-dashboard.js +0 -33
  115. package/build/scripts/repro-bug.js +0 -37
  116. package/build/scripts/repro-export-bug.js +0 -56
  117. package/build/scripts/scrape-metadata.js +0 -100
  118. package/build/scripts/search-cli.js +0 -26
  119. package/build/scripts/test-bias.js +0 -45
  120. package/build/scripts/test-caching.js +0 -51
  121. package/build/scripts/test-cleaning.js +0 -76
  122. package/build/scripts/test-cloud-storage.js +0 -48
  123. package/build/scripts/test-compliance.js +0 -58
  124. package/build/scripts/test-conversion.js +0 -64
  125. package/build/scripts/test-custom-rules.js +0 -58
  126. package/build/scripts/test-db-opt.js +0 -63
  127. package/build/scripts/test-export-custom.js +0 -33
  128. package/build/scripts/test-exporter.js +0 -53
  129. package/build/scripts/test-fusion.js +0 -61
  130. package/build/scripts/test-github.js +0 -27
  131. package/build/scripts/test-group-split.js +0 -52
  132. package/build/scripts/test-hf-download.js +0 -29
  133. package/build/scripts/test-holdout-manager.js +0 -61
  134. package/build/scripts/test-hybrid-search.js +0 -41
  135. package/build/scripts/test-image-analysis.js +0 -50
  136. package/build/scripts/test-ingestion-infra.js +0 -39
  137. package/build/scripts/test-install.js +0 -40
  138. package/build/scripts/test-institutional.js +0 -26
  139. package/build/scripts/test-integrity.js +0 -41
  140. package/build/scripts/test-jit.js +0 -42
  141. package/build/scripts/test-job-queue.js +0 -62
  142. package/build/scripts/test-kaggle-download.js +0 -34
  143. package/build/scripts/test-large-data.js +0 -50
  144. package/build/scripts/test-mcp-v5.js +0 -74
  145. package/build/scripts/test-media-analysis.js +0 -61
  146. package/build/scripts/test-monitoring.js +0 -91
  147. package/build/scripts/test-observability.js +0 -106
  148. package/build/scripts/test-packager.js +0 -55
  149. package/build/scripts/test-pipeline.js +0 -50
  150. package/build/scripts/test-planning.js +0 -64
  151. package/build/scripts/test-privacy.js +0 -38
  152. package/build/scripts/test-production-sync.js +0 -36
  153. package/build/scripts/test-quality.js +0 -43
  154. package/build/scripts/test-robust-ingestion.js +0 -41
  155. package/build/scripts/test-schema.js +0 -45
  156. package/build/scripts/test-split-validation.js +0 -40
  157. package/build/scripts/test-splitter.js +0 -93
  158. package/build/scripts/test-target-detector.js +0 -29
  159. package/build/scripts/test-uci.js +0 -27
  160. package/build/scripts/test-unified-quality.js +0 -86
  161. package/build/scripts/test-write.js +0 -14
  162. package/build/scripts/verify-integration.js +0 -57
  163. package/build/scripts/verify-priority.js +0 -33
  164. package/build/search/embedder.js +0 -34
  165. package/build/search/engine.js +0 -190
  166. package/build/search/jit-orchestrator.js +0 -262
  167. package/build/search/query-intent.js +0 -509
  168. package/build/search/vector-store.js +0 -123
  169. package/build/splitting/splitter.js +0 -82
  170. package/build/splitting/types.js +0 -1
  171. package/build/tools/formatter.js +0 -251
  172. package/build/utils/downloader.js +0 -52
  173. package/build/utils/python-runtime.js +0 -130
  174. package/build/utils/selector.js +0 -69
  175. package/mcp-config-template.json +0 -18
  176. package/scripts/postinstall.cjs +0 -170
  177. package/scripts/preindex_registry.cjs +0 -157
  178. package/scripts/refresh-index.cjs +0 -87
  179. package/scripts/wizard.cjs +0 -601
  180. package/src/python/__pycache__/config.cpython-312.pyc +0 -0
  181. package/src/python/__pycache__/export_engine.cpython-312.pyc +0 -0
  182. package/src/python/__pycache__/framework_adapters.cpython-312.pyc +0 -0
  183. package/src/python/__pycache__/fusion_engine.cpython-312.pyc +0 -0
  184. package/src/python/__pycache__/kaggle_engine.cpython-312.pyc +0 -0
  185. package/src/python/asset_downloader_engine.py +0 -94
  186. package/src/python/cleaner.py +0 -226
  187. package/src/python/config.py +0 -263
  188. package/src/python/convert_engine.py +0 -92
  189. package/src/python/dataworld_engine.py +0 -208
  190. package/src/python/export_engine.py +0 -288
  191. package/src/python/framework_adapters.py +0 -100
  192. package/src/python/fusion_engine.py +0 -368
  193. package/src/python/github_adapter.py +0 -106
  194. package/src/python/hf_fallback.py +0 -298
  195. package/src/python/image_engine.py +0 -86
  196. package/src/python/kaggle_engine.py +0 -295
  197. package/src/python/media_engine.py +0 -133
  198. package/src/python/nasa_adapter.py +0 -82
  199. package/src/python/normalize_engine.py +0 -83
  200. package/src/python/openml_engine.py +0 -146
  201. package/src/python/quality_engine.py +0 -267
  202. package/src/python/row_count.py +0 -54
  203. package/src/python/splitter_engine.py +0 -283
  204. package/src/python/target_engine.py +0 -154
  205. package/src/python/test_framework_adapters.py +0 -61
  206. package/src/python/test_fusion_engine.py +0 -89
  207. package/src/python/uci_adapter.py +0 -94
  208. package/src/python/vesper/__init__.py +0 -1
  209. package/src/python/vesper/core/__init__.py +0 -1
  210. package/src/python/vesper/core/asset_downloader.py +0 -679
  211. package/src/python/vesper/core/download_recipe.py +0 -104
  212. package/src/python/worldbank_adapter.py +0 -99
  213. package/wizard.cjs +0 -3
@@ -1,82 +0,0 @@
1
- import { spawn } from "child_process";
2
- import path from "path";
3
- import fs from "fs";
4
- export class DataSplitter {
5
- pythonPath = "python";
6
- scriptPath;
7
- constructor(buildDir = process.cwd()) {
8
- const homeDir = process.env.HOME || process.env.USERPROFILE || buildDir;
9
- const dataRoot = path.join(homeDir, ".vesper");
10
- const scriptPath0 = path.resolve(dataRoot, "python", "splitter_engine.py");
11
- const scriptPath1 = path.resolve(buildDir, "python", "splitter_engine.py");
12
- const scriptPath2 = path.resolve(buildDir, "..", "src", "python", "splitter_engine.py");
13
- const scriptPath3 = path.resolve(buildDir, "..", "python", "splitter_engine.py");
14
- if (fs.existsSync(scriptPath0)) {
15
- this.scriptPath = scriptPath0;
16
- }
17
- else if (fs.existsSync(scriptPath1)) {
18
- this.scriptPath = scriptPath1;
19
- }
20
- else if (fs.existsSync(scriptPath2)) {
21
- this.scriptPath = scriptPath2;
22
- }
23
- else if (fs.existsSync(scriptPath3)) {
24
- this.scriptPath = scriptPath3;
25
- }
26
- else {
27
- this.scriptPath = scriptPath0;
28
- }
29
- // Detect Python command
30
- if (process.platform === "win32") {
31
- this.pythonPath = "py";
32
- }
33
- }
34
- /**
35
- * Splits a dataset into Train/Val/Test sets based on config
36
- */
37
- async split(filePath, config) {
38
- return this.runPython("split", [filePath, JSON.stringify(config)]);
39
- }
40
- /**
41
- * Validates a split for leakage and distribution
42
- */
43
- async validate(paths, options) {
44
- const config = {
45
- paths,
46
- id_column: options?.id_column,
47
- target_column: options?.target_column
48
- };
49
- return this.runPython("validate", [JSON.stringify(config)]);
50
- }
51
- async runPython(action, args) {
52
- return new Promise((resolve, reject) => {
53
- const process = spawn(this.pythonPath, [this.scriptPath, action, ...args]);
54
- let stdout = "";
55
- let stderr = "";
56
- process.stdout.on("data", (data) => {
57
- stdout += data.toString();
58
- });
59
- process.stderr.on("data", (data) => {
60
- stderr += data.toString();
61
- });
62
- process.on("close", (code) => {
63
- if (code !== 0) {
64
- reject(new Error(`Data Splitter (${action}) failed: ${stderr}`));
65
- return;
66
- }
67
- try {
68
- const result = JSON.parse(stdout);
69
- if (result.error) {
70
- reject(new Error(result.error));
71
- }
72
- else {
73
- resolve(result);
74
- }
75
- }
76
- catch (e) {
77
- reject(new Error(`Failed to parse output: ${stdout}`));
78
- }
79
- });
80
- });
81
- }
82
- }
@@ -1 +0,0 @@
1
- export {};
@@ -1,251 +0,0 @@
1
- /**
2
- * Format job status for visual representation
3
- */
4
- export function formatJobStatus(job) {
5
- const statusMap = {
6
- "pending": "PENDING",
7
- "queued": "QUEUED",
8
- "running": "RUNNING",
9
- "completed": "COMPLETED",
10
- "failed": "FAILED",
11
- "retrying": "RETRYING"
12
- };
13
- const statusText = statusMap[job.status] || "UNKNOWN";
14
- const barWidth = 20;
15
- const filledWidth = Math.round((job.progress / 100) * barWidth);
16
- const emptyWidth = barWidth - filledWidth;
17
- const bar = "█".repeat(filledWidth) + "░".repeat(emptyWidth);
18
- let output = `═ Job Status: ${job.type.toUpperCase()} ═\n`;
19
- output += `ID: ${job.id}\n`;
20
- output += `Status: ${statusText}\n`;
21
- output += `Progress: ${bar} ${job.progress}%\n`;
22
- output += `Activity: ${job.status_text}\n`;
23
- if (job.status === "running" || job.status === "retrying" || job.status === "queued" || job.status === "pending") {
24
- output += `Polling hint: check again in 5-10 seconds.\n`;
25
- }
26
- else {
27
- output += `Polling hint: no further polling required.\n`;
28
- }
29
- if (job.result_url) {
30
- output += `\nResult: ${job.result_url}\n`;
31
- }
32
- if (job.error) {
33
- output += `\nERROR:\n`;
34
- // Format multi-line errors nicely
35
- const errorLines = job.error.split('\n');
36
- errorLines.forEach(line => {
37
- output += ` ${line}\n`;
38
- });
39
- output += `\n`;
40
- }
41
- output += `Updated: ${new Date(job.updated_at).toLocaleTimeString()}\n`;
42
- output += "═".repeat(25) + "\n";
43
- return output;
44
- }
45
- /**
46
- * Format dataset search results for human-readable display
47
- */
48
- export function formatSearchResults(results) {
49
- if (results.length === 0) {
50
- return "No datasets found matching your query.";
51
- }
52
- let output = `Found ${results.length} dataset(s):\n\n`;
53
- output += "═".repeat(80) + "\n\n";
54
- results.forEach((ds, index) => {
55
- const relevanceScore = ds.relevance_score || 0;
56
- // Source badge and access level
57
- const openSources = ["huggingface", "openml", "s3", "uci", "github", "worldbank", "nasa"];
58
- const isOpen = openSources.includes(ds.source);
59
- const sourceLabel = ds.source.charAt(0).toUpperCase() + ds.source.slice(1);
60
- const accessBadge = isOpen ? "Open Access" : "Requires API Key";
61
- // Safety indicator
62
- let safetyIndicator = "";
63
- if (ds.license.category === "safe") {
64
- safetyIndicator = "Safe";
65
- }
66
- else if (ds.license.category === "restricted") {
67
- safetyIndicator = "Restricted";
68
- }
69
- else {
70
- safetyIndicator = "Unknown License";
71
- }
72
- // Header
73
- output += `${index + 1}. ${ds.name}\n`;
74
- output += ` Source: ${sourceLabel} | ${accessBadge} | ${safetyIndicator}\n`;
75
- output += ` Relevance: ${(relevanceScore * 100).toFixed(0)}% | ID: ${ds.id}\n\n`;
76
- // Description
77
- if (ds.description && ds.description.length > 0) {
78
- const shortDesc = ds.description.length > 200
79
- ? ds.description.substring(0, 200) + "..."
80
- : ds.description;
81
- output += ` ${shortDesc}\n\n`;
82
- }
83
- // Quality warnings
84
- if (ds.quality_warnings && ds.quality_warnings.length > 0) {
85
- output += ` Quality Warnings:\n`;
86
- ds.quality_warnings.forEach(warning => {
87
- output += ` • ${warning}\n`;
88
- });
89
- output += "\n";
90
- }
91
- // Key stats
92
- output += ` Stats:\n`;
93
- if (ds.downloads)
94
- output += ` Downloads: ${ds.downloads.toLocaleString()}\n`;
95
- if (ds.likes)
96
- output += ` Likes: ${ds.likes}\n`;
97
- if (ds.total_examples)
98
- output += ` Examples: ${ds.total_examples.toLocaleString()}\n`;
99
- if (ds.total_size_mb)
100
- output += ` Size: ${ds.total_size_mb} MB\n`;
101
- output += ` Domain: ${ds.domain || "unknown"}\n`;
102
- output += ` Task: ${ds.task || "unknown"}\n`;
103
- // Data splits
104
- if (ds.splits && ds.splits.length > 0) {
105
- const splitNames = ds.splits.map(s => s.name).join(", ");
106
- output += ` Splits: ${splitNames}\n`;
107
- }
108
- // License details
109
- output += `\n License: ${ds.license.id || "Unknown"}\n`;
110
- if (ds.license.warnings && ds.license.warnings.length > 0) {
111
- ds.license.warnings.forEach(warning => {
112
- output += ` WARNING: ${warning}\n`;
113
- });
114
- }
115
- if (ds.license.commercial_use !== undefined) {
116
- output += ` Commercial use: ${ds.license.commercial_use ? "Yes" : "No"}\n`;
117
- }
118
- // Download link
119
- output += `\n ${ds.download_url}\n`;
120
- output += "\n" + "─".repeat(80) + "\n\n";
121
- });
122
- return output;
123
- }
124
- /**
125
- * Format detailed dataset info
126
- */
127
- export function formatDatasetInfo(ds) {
128
- let output = "";
129
- // Header
130
- output += "═".repeat(80) + "\n";
131
- output += `${ds.name}\n`;
132
- output += "═".repeat(80) + "\n\n";
133
- // Source and safety
134
- const openSources = ["huggingface", "openml", "s3", "uci", "github", "worldbank", "nasa"];
135
- const isOpen = openSources.includes(ds.source);
136
- const sourceLabel = ds.source.charAt(0).toUpperCase() + ds.source.slice(1);
137
- const accessBadge = isOpen ? "Open Access" : "Requires API Key";
138
- let safetyIndicator = "";
139
- if (ds.license.category === "safe") {
140
- safetyIndicator = "Safe for use";
141
- }
142
- else if (ds.license.category === "restricted") {
143
- safetyIndicator = "Restricted - Review license carefully";
144
- }
145
- else {
146
- safetyIndicator = "Unknown license - Use with caution";
147
- }
148
- output += `Source: ${sourceLabel} (${accessBadge})\n`;
149
- output += `Safety: ${safetyIndicator}\n`;
150
- output += `ID: ${ds.id}\n\n`;
151
- if (!isOpen && ds.source === "kaggle") {
152
- output += `NOTE: This dataset uses the Kaggle connector. Vesper can access it through server-managed credentials when configured, otherwise a Kaggle key is still required.\n\n`;
153
- }
154
- if (!isOpen && ds.source === "dataworld") {
155
- output += `NOTE: This dataset uses the data.world connector. Vesper can access it through a server-managed token when configured.\n\n`;
156
- }
157
- // Description
158
- if (ds.description) {
159
- output += "Description:\n";
160
- output += `${ds.description}\n\n`;
161
- }
162
- // Quality warnings
163
- if (ds.quality_warnings && ds.quality_warnings.length > 0) {
164
- output += "Quality Warnings:\n";
165
- ds.quality_warnings.forEach(warning => {
166
- output += ` • ${warning}\n`;
167
- });
168
- output += "\n";
169
- }
170
- // Metadata
171
- output += "Metadata:\n";
172
- output += ` Downloads: ${ds.downloads?.toLocaleString() || "N/A"}\n`;
173
- output += ` Likes: ${ds.likes || 0}\n`;
174
- output += ` Quality Score: ${ds.quality_score}/100\n`;
175
- output += ` Domain: ${ds.domain || "unknown"}\n`;
176
- output += ` Task: ${ds.task || "unknown"}\n`;
177
- output += ` Languages: ${ds.languages?.join(", ") || "N/A"}\n`;
178
- output += ` Last Updated: ${new Date(ds.last_updated).toLocaleDateString()}\n\n`;
179
- // Data characteristics
180
- output += "Data Characteristics:\n";
181
- output += ` Total Examples: ${ds.total_examples?.toLocaleString() || "N/A"}\n`;
182
- output += ` Total Size: ${ds.total_size_mb ? ds.total_size_mb + " MB" : "N/A"}\n`;
183
- output += ` Structured: ${ds.is_structured ? "Yes" : "No"}\n`;
184
- output += ` Has Target Column: ${ds.has_target_column ? "Yes" : "No"}\n`;
185
- output += ` Format: ${ds.format || "N/A"}\n\n`;
186
- // Splits
187
- if (ds.splits && ds.splits.length > 0) {
188
- output += "Data Splits:\n";
189
- ds.splits.forEach(split => {
190
- output += ` • ${split.name}: ${split.num_examples?.toLocaleString() || "?"} examples`;
191
- if (split.size_bytes) {
192
- output += ` (${(split.size_bytes / (1024 * 1024)).toFixed(2)} MB)`;
193
- }
194
- output += "\n";
195
- });
196
- output += "\n";
197
- }
198
- // Columns
199
- if (ds.columns && ds.columns.length > 0) {
200
- output += "Columns:\n";
201
- ds.columns.slice(0, 10).forEach(col => {
202
- const targetMarker = col.is_target ? " [TARGET]" : "";
203
- output += ` • ${col.name}${targetMarker}`;
204
- if (col.type)
205
- output += ` (${col.type})`;
206
- output += "\n";
207
- });
208
- if (ds.columns.length > 10) {
209
- output += ` ... and ${ds.columns.length - 10} more columns\n`;
210
- }
211
- output += "\n";
212
- }
213
- // License
214
- output += "License Information:\n";
215
- output += ` License: ${ds.license.id || "Unknown"}\n`;
216
- output += ` Category: ${ds.license.category}\n`;
217
- output += ` Commercial Use: ${ds.license.commercial_use ? "Allowed" : "Not allowed"}\n`;
218
- if (ds.license.warnings && ds.license.warnings.length > 0) {
219
- output += ` Warnings:\n`;
220
- ds.license.warnings.forEach(warning => {
221
- output += ` WARNING: ${warning}\n`;
222
- });
223
- }
224
- if (ds.license.usage_restrictions && ds.license.usage_restrictions.length > 0) {
225
- output += ` Restrictions:\n`;
226
- ds.license.usage_restrictions.forEach(restriction => {
227
- output += ` • ${restriction}\n`;
228
- });
229
- }
230
- output += "\n";
231
- // Safety flags
232
- output += "Safety Flags:\n";
233
- output += ` Safe Source: ${ds.is_safe_source ? "Yes" : "No"}\n`;
234
- output += ` Has Personal Data: ${ds.has_personal_data ? "Yes" : "No"}\n`;
235
- output += ` Paywalled: ${ds.is_paywalled ? "Yes" : "No"}\n`;
236
- output += ` Scraped Web Data: ${ds.is_scraped_web_data ? "Yes" : "No"}\n\n`;
237
- // Tags
238
- if (ds.tags && ds.tags.length > 0) {
239
- output += "Tags:\n";
240
- output += ` ${ds.tags.slice(0, 15).join(", ")}`;
241
- if (ds.tags.length > 15) {
242
- output += ` ... and ${ds.tags.length - 15} more`;
243
- }
244
- output += "\n\n";
245
- }
246
- // Download link
247
- output += "Download:\n";
248
- output += ` ${ds.download_url}\n\n`;
249
- output += "═".repeat(80) + "\n";
250
- return output;
251
- }
@@ -1,52 +0,0 @@
1
- import fs from "fs";
2
- import { Readable } from "stream";
3
- import { finished } from "stream/promises";
4
- import { retryWithBackoff } from "../metadata/rate-limiter.js";
5
- export class RobustDownloader {
6
- /**
7
- * Downloads a file with automatic retries and resume support
8
- */
9
- async download(url, targetPath, options = {}) {
10
- await retryWithBackoff(async () => {
11
- let startByte = 0;
12
- const headers = { ...(options.headers || {}) };
13
- // Handle resume logic
14
- if (options.resume && fs.existsSync(targetPath)) {
15
- startByte = fs.statSync(targetPath).size;
16
- if (startByte > 0) {
17
- headers["Range"] = `bytes=${startByte}-`;
18
- console.error(`[Downloader] Resuming from byte ${startByte}`);
19
- }
20
- }
21
- const response = await fetch(url, { headers });
22
- if (response.status === 416) {
23
- // Requested range not satisfiable - likely already finished
24
- console.error("[Downloader] Range not satisfiable, file might be complete.");
25
- return;
26
- }
27
- if (!response.ok && response.status !== 206) {
28
- throw new Error(`Download failed: ${response.statusText} (${response.status})`);
29
- }
30
- const contentLength = response.headers.get("content-length");
31
- const totalSize = (contentLength ? parseInt(contentLength, 10) : 0) + startByte;
32
- const reader = response.body;
33
- if (!reader)
34
- throw new Error("Response body is empty");
35
- // Open stream in append mode if resuming
36
- const fileStream = fs.createWriteStream(targetPath, { flags: startByte > 0 ? "a" : "w" });
37
- const nodeReadable = Readable.fromWeb(reader);
38
- let downloadedBytes = startByte;
39
- let lastProgressTime = 0;
40
- nodeReadable.on("data", (chunk) => {
41
- downloadedBytes += chunk.length;
42
- // Throttle progress updates
43
- const now = Date.now();
44
- if (options.onProgress && (now - lastProgressTime > 500 || downloadedBytes === totalSize)) {
45
- options.onProgress(downloadedBytes, totalSize);
46
- lastProgressTime = now;
47
- }
48
- });
49
- await finished(nodeReadable.pipe(fileStream));
50
- }, { maxRetries: 5, initialDelay: 2000 });
51
- }
52
- }
@@ -1,130 +0,0 @@
1
- import { spawn } from "child_process";
2
- import fs from "fs";
3
- import os from "os";
4
- import path from "path";
5
- function getHomeDir(buildDir) {
6
- return os.homedir() || process.env.HOME || process.env.USERPROFILE || buildDir;
7
- }
8
- export function getVesperDataRoot(buildDir = process.cwd()) {
9
- return path.join(getHomeDir(buildDir), ".vesper");
10
- }
11
- export function getManagedPythonPath(buildDir = process.cwd()) {
12
- const dataRoot = getVesperDataRoot(buildDir);
13
- return process.platform === "win32"
14
- ? path.join(dataRoot, ".venv", "Scripts", "python.exe")
15
- : path.join(dataRoot, ".venv", "bin", "python");
16
- }
17
- function getFallbackPythonCommand() {
18
- return process.platform === "win32" ? "py" : "python3";
19
- }
20
- export function resolvePythonCommand(buildDir = process.cwd()) {
21
- const managedPython = getManagedPythonPath(buildDir);
22
- if (fs.existsSync(managedPython)) {
23
- return managedPython;
24
- }
25
- const envPython = process.env.VESPER_PYTHON;
26
- if (envPython) {
27
- return envPython;
28
- }
29
- const localCandidates = process.platform === "win32"
30
- ? [
31
- path.resolve(buildDir, ".venv", "Scripts", "python.exe"),
32
- path.resolve(buildDir, "..", ".venv", "Scripts", "python.exe")
33
- ]
34
- : [
35
- path.resolve(buildDir, ".venv", "bin", "python"),
36
- path.resolve(buildDir, "..", ".venv", "bin", "python")
37
- ];
38
- for (const candidate of localCandidates) {
39
- if (fs.existsSync(candidate)) {
40
- return candidate;
41
- }
42
- }
43
- return getFallbackPythonCommand();
44
- }
45
- function runPythonCommand(pythonPath, args, timeoutMs = 300000) {
46
- return new Promise((resolve, reject) => {
47
- const proc = spawn(pythonPath, args, {
48
- env: {
49
- ...process.env,
50
- PYTHONIOENCODING: "utf-8",
51
- },
52
- });
53
- let stdout = "";
54
- let stderr = "";
55
- const timer = setTimeout(() => {
56
- proc.kill();
57
- resolve({ code: 124, stdout, stderr: stderr || `Python command timed out after ${timeoutMs}ms` });
58
- }, timeoutMs);
59
- proc.stdout.on("data", (data) => {
60
- stdout += data.toString();
61
- });
62
- proc.stderr.on("data", (data) => {
63
- stderr += data.toString();
64
- });
65
- proc.on("close", (code) => {
66
- clearTimeout(timer);
67
- resolve({ code: code ?? 1, stdout, stderr });
68
- });
69
- proc.on("error", (error) => {
70
- clearTimeout(timer);
71
- reject(error);
72
- });
73
- });
74
- }
75
- async function createManagedPythonEnv(buildDir) {
76
- const dataRoot = getVesperDataRoot(buildDir);
77
- const venvDir = path.join(dataRoot, ".venv");
78
- const managedPython = getManagedPythonPath(buildDir);
79
- if (fs.existsSync(managedPython)) {
80
- return managedPython;
81
- }
82
- fs.mkdirSync(dataRoot, { recursive: true });
83
- const bootstrapAttempts = process.platform === "win32"
84
- ? [
85
- { command: "py", args: ["-3", "-m", "venv", venvDir] },
86
- { command: "python", args: ["-m", "venv", venvDir] },
87
- ]
88
- : [
89
- { command: "python3", args: ["-m", "venv", venvDir] },
90
- { command: "python", args: ["-m", "venv", venvDir] },
91
- ];
92
- let lastError = "";
93
- for (const attempt of bootstrapAttempts) {
94
- try {
95
- const result = await runPythonCommand(attempt.command, attempt.args, 180000);
96
- if (result.code === 0 && fs.existsSync(managedPython)) {
97
- await runPythonCommand(managedPython, ["-m", "pip", "install", "--disable-pip-version-check", "--upgrade", "pip"], 300000);
98
- return managedPython;
99
- }
100
- lastError = (result.stderr || result.stdout || "Unknown venv creation error").trim();
101
- }
102
- catch (error) {
103
- lastError = error?.message || String(error);
104
- }
105
- }
106
- throw new Error(`Failed to create Vesper Python environment. ${lastError}`.trim());
107
- }
108
- export async function ensurePythonPackages(buildDir, requirements) {
109
- const pythonPath = await createManagedPythonEnv(buildDir).catch(() => resolvePythonCommand(buildDir));
110
- const missing = [];
111
- for (const requirement of requirements) {
112
- const check = await runPythonCommand(pythonPath, [
113
- "-c",
114
- `import importlib.util,sys; sys.exit(0 if importlib.util.find_spec(${JSON.stringify(requirement.module)}) else 1)`
115
- ], 20000);
116
- if (check.code !== 0) {
117
- missing.push(requirement);
118
- }
119
- }
120
- if (missing.length === 0) {
121
- return pythonPath;
122
- }
123
- const packages = [...new Set(missing.map(requirement => requirement.packageName))];
124
- const install = await runPythonCommand(pythonPath, ["-m", "pip", "install", "--disable-pip-version-check", ...packages], 600000);
125
- if (install.code !== 0) {
126
- const details = (install.stderr || install.stdout || "Unknown pip install error").trim();
127
- throw new Error(`Failed to install Python packages (${packages.join(", ")}). ${details}`);
128
- }
129
- return pythonPath;
130
- }
@@ -1,69 +0,0 @@
1
- import readline from "readline";
2
- export class Selector {
3
- currentIndex = 0;
4
- options;
5
- title;
6
- constructor(title, options) {
7
- this.title = title;
8
- this.options = options;
9
- }
10
- render() {
11
- // Clear previous lines
12
- process.stdout.write("\x1b[?25l"); // Hide cursor
13
- readline.cursorTo(process.stdout, 0);
14
- // Clear the lines we used before (options + title + blank line)
15
- for (let i = 0; i <= this.options.length + 1; i++) {
16
- readline.clearLine(process.stdout, 0);
17
- process.stdout.write("\x1b[1A"); // Move up one line
18
- }
19
- readline.clearLine(process.stdout, 0);
20
- console.log(`\n${this.title}`);
21
- this.options.forEach((opt, idx) => {
22
- const isCurrent = idx === this.currentIndex;
23
- const checkbox = opt.selected ? "[\x1b[32mX\x1b[0m]" : "[ ]";
24
- const cursor = isCurrent ? "\x1b[36m>\x1b[0m " : " ";
25
- const label = isCurrent ? `\x1b[36m${opt.name}\x1b[0m` : opt.name;
26
- console.log(`${cursor}${checkbox} ${label}`);
27
- });
28
- console.log("\x1b[2m(Use arrows to move, Space to toggle, Enter to confirm)\x1b[0m");
29
- }
30
- async run() {
31
- if (this.options.length === 0)
32
- return [];
33
- readline.emitKeypressEvents(process.stdin);
34
- if (process.stdin.isTTY) {
35
- process.stdin.setRawMode(true);
36
- }
37
- // Initial render room (print blank lines to be cleared)
38
- console.log("\n".repeat(this.options.length + 1));
39
- this.render();
40
- return new Promise((resolve) => {
41
- const handleKey = (str, key) => {
42
- if (key.name === "up") {
43
- this.currentIndex = (this.currentIndex - 1 + this.options.length) % this.options.length;
44
- this.render();
45
- }
46
- else if (key.name === "down") {
47
- this.currentIndex = (this.currentIndex + 1) % this.options.length;
48
- this.render();
49
- }
50
- else if (key.name === "space") {
51
- this.options[this.currentIndex].selected = !this.options[this.currentIndex].selected;
52
- this.render();
53
- }
54
- else if (key.name === "return") {
55
- process.stdin.setRawMode(false);
56
- process.stdin.removeListener("keypress", handleKey);
57
- process.stdout.write("\x1b[?25h"); // Show cursor
58
- console.log("");
59
- resolve(this.options.filter(o => o.selected).map(o => o.value));
60
- }
61
- else if (key.ctrl && key.name === "c") {
62
- process.stdin.setRawMode(false);
63
- process.exit();
64
- }
65
- };
66
- process.stdin.on("keypress", handleKey);
67
- });
68
- }
69
- }
@@ -1,18 +0,0 @@
1
- {
2
- "mcpServers": {
3
- "vesper": {
4
- "command": "npx",
5
- "args": [
6
- "-y",
7
- "-p",
8
- "@vespermcp/mcp-server@latest",
9
- "vespermcp"
10
- ],
11
- "env": {
12
- "KAGGLE_USERNAME": "your-kaggle-username",
13
- "KAGGLE_KEY": "your-kaggle-api-key",
14
- "HF_TOKEN": "your-huggingface-token"
15
- }
16
- }
17
- }
18
- }