vesper-wizard 2.3.1 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/README.md +37 -322
  2. package/package.json +34 -100
  3. package/vesper-mcp-config.json +6 -0
  4. package/{scripts/wizard.js → wizard.js} +34 -10
  5. package/LICENSE +0 -21
  6. package/build/cache/cdn.js +0 -34
  7. package/build/cache/service.js +0 -63
  8. package/build/cleaning/cleaner.js +0 -81
  9. package/build/cleaning/evaluator.js +0 -89
  10. package/build/cleaning/executor.js +0 -62
  11. package/build/cleaning/exporter.js +0 -87
  12. package/build/cleaning/planner.js +0 -127
  13. package/build/cleaning/rules.js +0 -57
  14. package/build/cleaning/types.js +0 -1
  15. package/build/cloud/adapters/local.js +0 -37
  16. package/build/cloud/adapters/s3.js +0 -24
  17. package/build/cloud/adapters/supabase.js +0 -49
  18. package/build/cloud/storage-manager.js +0 -26
  19. package/build/cloud/types.js +0 -1
  20. package/build/compliance/service.js +0 -73
  21. package/build/compliance/store.js +0 -80
  22. package/build/compliance/types.js +0 -1
  23. package/build/config/config-manager.js +0 -221
  24. package/build/config/secure-keys.js +0 -51
  25. package/build/config/user-config.js +0 -48
  26. package/build/data/processing-worker.js +0 -23
  27. package/build/data/streaming.js +0 -38
  28. package/build/data/worker-pool.js +0 -39
  29. package/build/export/exporter.js +0 -82
  30. package/build/export/packager.js +0 -100
  31. package/build/export/types.js +0 -1
  32. package/build/fusion/aligner.js +0 -56
  33. package/build/fusion/deduplicator.js +0 -69
  34. package/build/fusion/engine.js +0 -69
  35. package/build/fusion/harmonizer.js +0 -39
  36. package/build/fusion/orchestrator.js +0 -86
  37. package/build/fusion/types.js +0 -1
  38. package/build/gateway/unified-dataset-gateway.js +0 -410
  39. package/build/index.js +0 -3068
  40. package/build/ingestion/hf-downloader.js +0 -171
  41. package/build/ingestion/ingestor.js +0 -271
  42. package/build/ingestion/kaggle-downloader.js +0 -102
  43. package/build/install/install-service.js +0 -46
  44. package/build/jobs/manager.js +0 -136
  45. package/build/jobs/queue.js +0 -59
  46. package/build/jobs/types.js +0 -1
  47. package/build/lib/supabase.js +0 -3
  48. package/build/metadata/dataworld-source.js +0 -89
  49. package/build/metadata/domain.js +0 -147
  50. package/build/metadata/github-scraper.js +0 -47
  51. package/build/metadata/institutional-scrapers.js +0 -49
  52. package/build/metadata/kaggle-scraper.js +0 -182
  53. package/build/metadata/kaggle-source.js +0 -70
  54. package/build/metadata/license.js +0 -68
  55. package/build/metadata/monitoring-service.js +0 -107
  56. package/build/metadata/monitoring-store.js +0 -78
  57. package/build/metadata/monitoring-types.js +0 -1
  58. package/build/metadata/openml-source.js +0 -87
  59. package/build/metadata/quality.js +0 -48
  60. package/build/metadata/rate-limiter.js +0 -128
  61. package/build/metadata/scraper.js +0 -448
  62. package/build/metadata/store.js +0 -340
  63. package/build/metadata/types.js +0 -1
  64. package/build/metadata/uci-scraper.js +0 -49
  65. package/build/monitoring/observability.js +0 -76
  66. package/build/preparation/target-detector.js +0 -75
  67. package/build/python/__pycache__/config.cpython-312.pyc +0 -0
  68. package/build/python/asset_downloader_engine.py +0 -94
  69. package/build/python/cleaner.py +0 -226
  70. package/build/python/config.py +0 -263
  71. package/build/python/convert_engine.py +0 -92
  72. package/build/python/dataworld_engine.py +0 -208
  73. package/build/python/export_engine.py +0 -288
  74. package/build/python/framework_adapters.py +0 -100
  75. package/build/python/fusion_engine.py +0 -368
  76. package/build/python/github_adapter.py +0 -106
  77. package/build/python/hf_fallback.py +0 -298
  78. package/build/python/image_engine.py +0 -86
  79. package/build/python/kaggle_engine.py +0 -295
  80. package/build/python/media_engine.py +0 -133
  81. package/build/python/nasa_adapter.py +0 -82
  82. package/build/python/normalize_engine.py +0 -83
  83. package/build/python/openml_engine.py +0 -146
  84. package/build/python/quality_engine.py +0 -267
  85. package/build/python/row_count.py +0 -54
  86. package/build/python/splitter_engine.py +0 -283
  87. package/build/python/target_engine.py +0 -154
  88. package/build/python/test_framework_adapters.py +0 -61
  89. package/build/python/test_fusion_engine.py +0 -89
  90. package/build/python/uci_adapter.py +0 -94
  91. package/build/python/vesper/__init__.py +0 -1
  92. package/build/python/vesper/__pycache__/__init__.cpython-312.pyc +0 -0
  93. package/build/python/vesper/core/__init__.py +0 -1
  94. package/build/python/vesper/core/__pycache__/__init__.cpython-312.pyc +0 -0
  95. package/build/python/vesper/core/__pycache__/asset_downloader.cpython-312.pyc +0 -0
  96. package/build/python/vesper/core/__pycache__/download_recipe.cpython-312.pyc +0 -0
  97. package/build/python/vesper/core/asset_downloader.py +0 -679
  98. package/build/python/vesper/core/download_recipe.py +0 -104
  99. package/build/python/worldbank_adapter.py +0 -99
  100. package/build/quality/analyzer.js +0 -93
  101. package/build/quality/image-analyzer.js +0 -114
  102. package/build/quality/media-analyzer.js +0 -115
  103. package/build/quality/quality-orchestrator.js +0 -162
  104. package/build/quality/types.js +0 -1
  105. package/build/scripts/build-index.js +0 -54
  106. package/build/scripts/check-db.js +0 -73
  107. package/build/scripts/check-jobs.js +0 -24
  108. package/build/scripts/check-naruto.js +0 -17
  109. package/build/scripts/cleanup-kaggle.js +0 -41
  110. package/build/scripts/demo-full-pipeline.js +0 -62
  111. package/build/scripts/demo-ui.js +0 -58
  112. package/build/scripts/e2e-demo.js +0 -72
  113. package/build/scripts/massive-scrape.js +0 -103
  114. package/build/scripts/ops-dashboard.js +0 -33
  115. package/build/scripts/repro-bug.js +0 -37
  116. package/build/scripts/repro-export-bug.js +0 -56
  117. package/build/scripts/scrape-metadata.js +0 -100
  118. package/build/scripts/search-cli.js +0 -26
  119. package/build/scripts/test-bias.js +0 -45
  120. package/build/scripts/test-caching.js +0 -51
  121. package/build/scripts/test-cleaning.js +0 -76
  122. package/build/scripts/test-cloud-storage.js +0 -48
  123. package/build/scripts/test-compliance.js +0 -58
  124. package/build/scripts/test-conversion.js +0 -64
  125. package/build/scripts/test-custom-rules.js +0 -58
  126. package/build/scripts/test-db-opt.js +0 -63
  127. package/build/scripts/test-export-custom.js +0 -33
  128. package/build/scripts/test-exporter.js +0 -53
  129. package/build/scripts/test-fusion.js +0 -61
  130. package/build/scripts/test-github.js +0 -27
  131. package/build/scripts/test-group-split.js +0 -52
  132. package/build/scripts/test-hf-download.js +0 -29
  133. package/build/scripts/test-holdout-manager.js +0 -61
  134. package/build/scripts/test-hybrid-search.js +0 -41
  135. package/build/scripts/test-image-analysis.js +0 -50
  136. package/build/scripts/test-ingestion-infra.js +0 -39
  137. package/build/scripts/test-install.js +0 -40
  138. package/build/scripts/test-institutional.js +0 -26
  139. package/build/scripts/test-integrity.js +0 -41
  140. package/build/scripts/test-jit.js +0 -42
  141. package/build/scripts/test-job-queue.js +0 -62
  142. package/build/scripts/test-kaggle-download.js +0 -34
  143. package/build/scripts/test-large-data.js +0 -50
  144. package/build/scripts/test-mcp-v5.js +0 -74
  145. package/build/scripts/test-media-analysis.js +0 -61
  146. package/build/scripts/test-monitoring.js +0 -91
  147. package/build/scripts/test-observability.js +0 -106
  148. package/build/scripts/test-packager.js +0 -55
  149. package/build/scripts/test-pipeline.js +0 -50
  150. package/build/scripts/test-planning.js +0 -64
  151. package/build/scripts/test-privacy.js +0 -38
  152. package/build/scripts/test-production-sync.js +0 -36
  153. package/build/scripts/test-quality.js +0 -43
  154. package/build/scripts/test-robust-ingestion.js +0 -41
  155. package/build/scripts/test-schema.js +0 -45
  156. package/build/scripts/test-split-validation.js +0 -40
  157. package/build/scripts/test-splitter.js +0 -93
  158. package/build/scripts/test-target-detector.js +0 -29
  159. package/build/scripts/test-uci.js +0 -27
  160. package/build/scripts/test-unified-quality.js +0 -86
  161. package/build/scripts/test-write.js +0 -14
  162. package/build/scripts/verify-integration.js +0 -57
  163. package/build/scripts/verify-priority.js +0 -33
  164. package/build/search/embedder.js +0 -34
  165. package/build/search/engine.js +0 -190
  166. package/build/search/jit-orchestrator.js +0 -262
  167. package/build/search/query-intent.js +0 -509
  168. package/build/search/vector-store.js +0 -123
  169. package/build/splitting/splitter.js +0 -82
  170. package/build/splitting/types.js +0 -1
  171. package/build/tools/formatter.js +0 -251
  172. package/build/utils/downloader.js +0 -52
  173. package/build/utils/python-runtime.js +0 -130
  174. package/build/utils/selector.js +0 -69
  175. package/mcp-config-template.json +0 -18
  176. package/scripts/postinstall.cjs +0 -170
  177. package/scripts/preindex_registry.cjs +0 -157
  178. package/scripts/refresh-index.cjs +0 -87
  179. package/scripts/wizard.cjs +0 -601
  180. package/src/python/__pycache__/config.cpython-312.pyc +0 -0
  181. package/src/python/__pycache__/export_engine.cpython-312.pyc +0 -0
  182. package/src/python/__pycache__/framework_adapters.cpython-312.pyc +0 -0
  183. package/src/python/__pycache__/fusion_engine.cpython-312.pyc +0 -0
  184. package/src/python/__pycache__/kaggle_engine.cpython-312.pyc +0 -0
  185. package/src/python/asset_downloader_engine.py +0 -94
  186. package/src/python/cleaner.py +0 -226
  187. package/src/python/config.py +0 -263
  188. package/src/python/convert_engine.py +0 -92
  189. package/src/python/dataworld_engine.py +0 -208
  190. package/src/python/export_engine.py +0 -288
  191. package/src/python/framework_adapters.py +0 -100
  192. package/src/python/fusion_engine.py +0 -368
  193. package/src/python/github_adapter.py +0 -106
  194. package/src/python/hf_fallback.py +0 -298
  195. package/src/python/image_engine.py +0 -86
  196. package/src/python/kaggle_engine.py +0 -295
  197. package/src/python/media_engine.py +0 -133
  198. package/src/python/nasa_adapter.py +0 -82
  199. package/src/python/normalize_engine.py +0 -83
  200. package/src/python/openml_engine.py +0 -146
  201. package/src/python/quality_engine.py +0 -267
  202. package/src/python/requirements.txt +0 -12
  203. package/src/python/row_count.py +0 -54
  204. package/src/python/splitter_engine.py +0 -283
  205. package/src/python/target_engine.py +0 -154
  206. package/src/python/test_framework_adapters.py +0 -61
  207. package/src/python/test_fusion_engine.py +0 -89
  208. package/src/python/uci_adapter.py +0 -94
  209. package/src/python/vesper/__init__.py +0 -1
  210. package/src/python/vesper/core/__init__.py +0 -1
  211. package/src/python/vesper/core/asset_downloader.py +0 -679
  212. package/src/python/vesper/core/download_recipe.py +0 -104
  213. package/src/python/worldbank_adapter.py +0 -99
  214. package/wizard.cjs +0 -3
package/README.md CHANGED
@@ -1,345 +1,60 @@
1
- # Vesper MCP Server 🚀
1
+ # vesper-wizard
2
2
 
3
- **AI-powered dataset discovery, quality analysis, and preparation** with multimodal support (text, image, audio, video).
3
+ Zero-friction setup wizard for [Vesper](https://github.com/vesper/mcp-server) your local MCP-native dataset intelligence layer.
4
4
 
5
- Vesper is a Model Context Protocol (MCP) server that helps you find, analyze, and prepare high-quality datasets for machine learning projects. It integrates seamlessly with AI assistants like Claude, providing autonomous dataset workflows.
6
-
7
- ## ✨ Features
8
-
9
- ### 🔍 **Dataset Discovery**
10
- - Search across HuggingFace, Kaggle, UCI ML Repository, and more
11
- - Intelligent ranking based on quality, safety, and relevance
12
- - Automatic metadata extraction and enrichment
13
-
14
- ### 📊 **Quality Analysis**
15
- - **Text**: Missing data, duplicates, column profiling
16
- - **Images**: Resolution, corruption, blur detection
17
- - **Audio**: Sample rate, duration, silence detection
18
- - **Video**: FPS, frame validation, corruption risk
19
- - **Unified Reports**: Consolidated quality scores (0-100) with recommendations
20
-
21
- ### 🛠️ **Data Preparation**
22
- - Automated cleaning pipelines
23
- - Format conversion (CSV, JSON, Parquet)
24
- - Train/test/validation splitting
25
- - Automatic installation to project directories
26
-
27
- ### 🎯 **Multimodal Support**
28
- - Analyze mixed datasets (text + images + audio)
29
- - Media-specific quality metrics
30
- - Intelligent modality detection
31
-
32
- ## 📦 Installation
33
-
34
- ## 🚀 Quick Start (VS Code + Copilot)
35
-
36
- The fastest way to install Vesper and configure it for **GitHub Copilot Chat** or **Cursor** is to run the automated setup:
37
-
38
- ```bash
39
- npx -y -p @vespermcp/mcp-server@latest vespermcp --setup
40
- ```
41
-
42
- 1. Select **Visual Studio Code (Settings.json)** from the list.
43
- 2. Restart VS Code.
44
- 3. Open Copilot Chat and look for the **MCP Servers** section.
45
-
46
- ## 🛠️ Configuration
47
- Vesper supports:
48
- - **GitHub Copilot Chat**: Automated setup via `settings.json`.
49
- - **Cursor**: Automated setup via `mcp.json`.
50
- - **Claude Desktop**: Automated setup via `claude_desktop_config.json`.
51
-
52
- ### Manual Python Setup (if needed)
5
+ ## Install
53
6
 
54
7
  ```bash
55
- pip install opencv-python pillow numpy librosa soundfile
8
+ npx vesper-wizard@latest
56
9
  ```
57
10
 
58
- ## ⚙️ MCP Configuration
11
+ That's it. The wizard handles everything:
59
12
 
60
- ### For Cursor
61
- 1. Go to **Settings** > **Features** > **MCP**
62
- 2. Click **Add New MCP Server**
63
- 3. Enter:
64
- - **Name**: `vesper`
65
- - **Type**: `command`
66
- - **Command**: `vesper`
13
+ 1. Creates `~/.vesper/` directories and local API key
14
+ 2. Initializes a local credentials vault in unified-key mode (no external API keys required)
15
+ 3. Installs `@vespermcp/mcp-server` and auto-configures MCP for all detected agents (Claude, Cursor, VS Code, Codex, Gemini CLI)
16
+ 4. Verifies the installation
67
17
 
68
- ### For Claude Desktop
69
- Vesper attempts to auto-configure itself! Restart Claude and check. If not:
18
+ ## What you get
70
19
 
71
- ```json
72
- {
73
- "mcpServers": {
74
- "vesper": {
75
- "command": "vesper",
76
- "args": [],
77
- "env": {
78
- "HF_TOKEN": "your-huggingface-token"
79
- }
80
- }
81
- }
82
- }
83
- ```
84
-
85
- > **Note**: If the `vesper` command isn't found, you can stick to the absolute path method.
20
+ After the wizard finishes, your AI assistant can immediately use Vesper tools:
86
21
 
87
- ### Environment Variables (Optional)
22
+ | Tool | Description |
23
+ |------|-------------|
24
+ | `vesper_search` | Search 16,000+ datasets via natural language |
25
+ | `discover_datasets` | Discover from HuggingFace, Kaggle, OpenML, data.world |
26
+ | `download_dataset` | Download any dataset to local storage |
27
+ | `prepare_dataset` | Full pipeline: analyze → clean → split → export |
28
+ | `analyze_quality` | Deep quality analysis with recommendations |
29
+ | `export_dataset` | Export to parquet, csv, feather, jsonl, arrow |
30
+ | `fuse_datasets` | Combine multiple datasets with quality checks |
88
31
 
89
- - `KAGGLE_USERNAME` & `KAGGLE_KEY`: For Kaggle dataset access
90
- - `HF_TOKEN`: For private HuggingFace datasets
32
+ ## Security
91
33
 
92
- ### Optional Kaggle Setup (Not Required)
34
+ - **Local-only**: Uses one local key in `~/.vesper/config.toml`
35
+ - **Keyring-backed**: Uses OS keyring when available, falls back to local TOML
36
+ - **No cloud**: Zero external API calls during setup
37
+ - **No external keys**: No HuggingFace/Kaggle/Nia key prompts during setup
93
38
 
94
- Core Vesper works without any API keys. Keys are only needed when you explicitly use Kaggle or gated Hugging Face.
39
+ ## Config file
95
40
 
96
- Install optional Kaggle client only if you need Kaggle source access:
41
+ The wizard generates `~/.vesper/config.toml`:
97
42
 
98
- ```bash
99
- pip install kaggle
43
+ ```toml
44
+ api_key = "vesper_sk_local_..."
45
+ auth_mode = "local_unified"
100
46
  ```
101
47
 
102
- ```bash
103
- vespermcp config keys
104
- ```
105
-
106
- The setup wizard supports skip and stores keys securely via OS keyring when available,
107
- with fallback to `~/.vesper/config.toml`.
108
-
109
- or use Kaggle's native file:
48
+ ## Post-setup
110
49
 
111
- - `~/.kaggle/kaggle.json`
50
+ Restart your IDE and try in your AI assistant:
112
51
 
113
- If credentials are missing and you run Kaggle commands, Vesper shows:
114
-
115
- `Kaggle support requires API key. Run 'vespermcp config keys' (30 seconds).`
116
-
117
- ### CLI Examples
118
-
119
- ```bash
120
- vespermcp discover --source kaggle "credit risk" --limit 10
121
- vespermcp discover --source huggingface "credit risk" --limit 10
122
- vespermcp download kaggle username/dataset-name
123
- vespermcp download kaggle https://www.kaggle.com/datasets/username/dataset-name --target-dir ./data
124
52
  ```
125
-
126
- ## 🚀 Quick Start
127
-
128
- After installation and configuration, restart your AI assistant and try:
129
-
53
+ vesper_search(query="sentiment analysis")
54
+ prepare_dataset(query="image classification cats dogs")
55
+ analyze_quality(dataset_id="imdb")
130
56
  ```
131
- search_datasets(query="sentiment analysis", limit=5)
132
- ```
133
-
134
- ```
135
- prepare_dataset(query="image classification cats vs dogs")
136
- ```
137
-
138
- ```
139
- generate_quality_report(
140
- dataset_id="huggingface:imdb",
141
- dataset_path="/path/to/data"
142
- )
143
- ```
144
-
145
- ## 📚 Available Tools
146
-
147
- ### Dataset Discovery
148
-
149
- #### `unified_dataset_api`
150
- Single facade over multiple dataset backends. Use one tool for provider capability inspection, dataset discovery, dataset download, and dataset info lookup. The gateway prefers public/keyless providers and can also use server-managed credentials for connectors like Kaggle or data.world when configured by the operator.
151
-
152
- **Parameters:**
153
- - `operation` (string): `providers`, `discover`, `download`, or `info`
154
- - `source` (string, optional): `auto`, `huggingface`, `openml`, `kaggle`, `dataworld`, `s3`, `bigquery`
155
- - `query` (string, required for `discover`)
156
- - `dataset_id` (string, required for `download`/`info`)
157
- - `limit` (number, optional)
158
- - `target_dir` (string, optional)
159
- - `public_only` (boolean, optional)
160
-
161
- **Examples:**
162
- ```
163
- unified_dataset_api(operation="providers")
164
- ```
165
-
166
- ```
167
- unified_dataset_api(operation="discover", query="credit risk", source="auto")
168
- ```
169
-
170
- ```
171
- unified_dataset_api(operation="download", dataset_id="huggingface:imdb")
172
- ```
173
-
174
- ---
175
-
176
- #### `search_datasets`
177
- Search for datasets across multiple sources.
178
-
179
- **Parameters:**
180
- - `query` (string): Search query
181
- - `limit` (number, optional): Max results (default: 10)
182
- - `min_quality_score` (number, optional): Minimum quality threshold
183
-
184
- **Example:**
185
- ```
186
- search_datasets(query="medical imaging", limit=5, min_quality_score=70)
187
- ```
188
-
189
- ---
190
-
191
- ### Data Preparation
192
-
193
- #### `prepare_dataset`
194
- Download, analyze, and prepare a dataset for use.
195
-
196
- **Parameters:**
197
- - `query` (string): Dataset search query or ID
198
-
199
- **Example:**
200
- ```
201
- prepare_dataset(query="squad")
202
- ```
203
-
204
- ---
205
-
206
- #### `export_dataset`
207
- Export a prepared dataset to a custom directory with format conversion.
208
-
209
- **Parameters:**
210
- - `dataset_id` (string): Dataset identifier
211
- - `target_dir` (string): Export directory
212
- - `format` (string, optional): Output format (csv, json, parquet)
213
-
214
- **Example:**
215
- ```
216
- export_dataset(
217
- dataset_id="huggingface:imdb",
218
- target_dir="./my-data",
219
- format="csv"
220
- )
221
- ```
222
-
223
- ---
224
-
225
- #### `vesper_download_assets`
226
- Download image/media assets to a user-controlled local directory.
227
-
228
- **Parameters:**
229
- - `dataset_id` (string): Dataset identifier
230
- - `source` (string): `huggingface`, `kaggle`, or `url`
231
- - `target_dir` (string, optional): Exact local directory where assets should be written
232
- - `output_dir` (string, optional): Alias for `target_dir`
233
- - `output_format` (string, optional): `webdataset`, `imagefolder`, or `parquet`
234
-
235
- **Example:**
236
- ```
237
- vesper_download_assets(
238
- dataset_id="cats_vs_dogs",
239
- source="kaggle",
240
- target_dir="./datasets/cats_dogs_100",
241
- output_format="imagefolder"
242
- )
243
- ```
244
-
245
- ---
246
-
247
- ### Quality Analysis
248
-
249
- #### `analyze_image_quality`
250
- Analyze image datasets for resolution, corruption, and blur.
251
-
252
- **Parameters:**
253
- - `path` (string): Path to image file or folder
254
-
255
- **Example:**
256
- ```
257
- analyze_image_quality(path="/path/to/images")
258
- ```
259
-
260
- ---
261
-
262
- #### `analyze_media_quality`
263
- Analyze audio/video files for quality metrics.
264
-
265
- **Parameters:**
266
- - `path` (string): Path to media file or folder
267
-
268
- **Example:**
269
- ```
270
- analyze_media_quality(path="/path/to/audio")
271
- ```
272
-
273
- ---
274
-
275
- #### `generate_quality_report`
276
- Generate a comprehensive unified quality report for multimodal datasets.
277
-
278
- **Parameters:**
279
- - `dataset_id` (string): Dataset identifier
280
- - `dataset_path` (string): Path to dataset directory
281
-
282
- **Example:**
283
- ```
284
- generate_quality_report(
285
- dataset_id="my-dataset",
286
- dataset_path="/path/to/data"
287
- )
288
- ```
289
-
290
- ---
291
-
292
- ### Data Splitting
293
-
294
- #### `split_dataset`
295
- Split a dataset into train/test/validation sets.
296
-
297
- **Parameters:**
298
- - `dataset_id` (string): Dataset identifier
299
- - `train_ratio` (number): Training set ratio (0-1)
300
- - `test_ratio` (number): Test set ratio (0-1)
301
- - `val_ratio` (number, optional): Validation set ratio (0-1)
302
-
303
- **Example:**
304
- ```
305
- split_dataset(
306
- dataset_id="my-dataset",
307
- train_ratio=0.7,
308
- test_ratio=0.2,
309
- val_ratio=0.1
310
- )
311
- ```
312
-
313
- ## 🏗️ Architecture
314
-
315
- Vesper is built with:
316
- - **TypeScript** for the MCP server
317
- - **Python** for image/audio/video processing
318
- - **SQLite** for metadata storage
319
- - **Transformers.js** for semantic search
320
-
321
- ## 🤝 Contributing
322
-
323
- Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
324
-
325
- ## 📄 License
326
-
327
- MIT License - see [LICENSE](LICENSE) for details.
328
-
329
- ## 🐛 Issues & Support
330
-
331
- - **Issues**: https://github.com/vesper/mcp-server/issues
332
- - **Discussions**: https://github.com/vesper/mcp-server/discussions
333
-
334
- ## 🌟 Acknowledgments
335
-
336
- Built with:
337
- - [Model Context Protocol](https://modelcontextprotocol.io/)
338
- - [HuggingFace Hub](https://huggingface.co/)
339
- - [Kaggle API](https://www.kaggle.com/docs/api)
340
- - [OpenCV](https://opencv.org/)
341
- - [librosa](https://librosa.org/)
342
57
 
343
- ---
58
+ ## License
344
59
 
345
- Made with ❤️ by the Vesper Team
60
+ MIT
package/package.json CHANGED
@@ -1,100 +1,34 @@
1
- {
2
- "name": "vesper-wizard",
3
- "version": "2.3.1",
4
- "description": "AI-powered dataset discovery, quality analysis, and preparation MCP server with multimodal support (text, image, audio, video)",
5
- "type": "module",
6
- "main": "build/index.js",
7
- "bin": {
8
- "vesper-wizard": "wizard.cjs"
9
- },
10
- "files": [
11
- "build/**/*",
12
- "src/python/**/*",
13
- "wizard.cjs",
14
- "scripts/**/*",
15
- "README.md",
16
- "LICENSE",
17
- "mcp-config-template.json"
18
- ],
19
- "scripts": {
20
- "build": "tsc && node -e \"const fs=require('fs');const path=require('path');const src='src/python';const dest='build/python';const walk=(d)=>fs.readdirSync(d,{withFileTypes:true}).flatMap(e=>e.isDirectory()?walk(path.join(d,e.name)):[path.join(d,e.name)]);if(!fs.existsSync(dest))fs.mkdirSync(dest,{recursive:true});if(fs.existsSync(src)){for(const f of walk(src)){if(!f.endsWith('.py'))continue;const rel=path.relative(src,f);const out=path.join(dest,rel);fs.mkdirSync(path.dirname(out),{recursive:true});fs.copyFileSync(f,out);}}console.log('Copied Python scripts to build/python');\"",
21
- "dev": "tsx watch src/index.ts",
22
- "postinstall": "node scripts/postinstall.cjs",
23
- "scrape": "tsx src/scripts/scrape-metadata.ts",
24
- "massive-scrape": "tsx src/scripts/massive-scrape.ts",
25
- "index": "tsx src/scripts/build-index.ts",
26
- "search-cli": "tsx src/scripts/search-cli.ts",
27
- "check-db": "tsx src/scripts/check-db.ts",
28
- "test-jit": "tsx src/scripts/test-jit.ts",
29
- "demo-ui": "tsx src/scripts/demo-ui.ts",
30
- "fuse": "node build/index.js fuse",
31
- "discover": "node build/index.js discover",
32
- "download": "node build/index.js download",
33
- "export": "node build/index.js export",
34
- "config": "node build/index.js config",
35
- "test-fusion-engine": "py src/python/test_fusion_engine.py",
36
- "setup": "node build/index.js --setup",
37
- "setup:silent": "node build/index.js --setup --silent",
38
- "refresh-index": "node scripts/refresh-index.cjs",
39
- "test": "vitest",
40
- "start": "node build/index.js"
41
- },
42
- "keywords": [
43
- "mcp",
44
- "model-context-protocol",
45
- "dataset",
46
- "machine-learning",
47
- "data-quality",
48
- "huggingface",
49
- "kaggle",
50
- "multimodal",
51
- "image-analysis",
52
- "audio-analysis",
53
- "video-analysis",
54
- "data-preparation",
55
- "ai",
56
- "ml"
57
- ],
58
- "author": "Vesper Team",
59
- "license": "MIT",
60
- "repository": {
61
- "type": "git",
62
- "url": "git+https://github.com/vesper/mcp-server.git"
63
- },
64
- "engines": {
65
- "node": ">=18.0.0",
66
- "npm": ">=8.0.0"
67
- },
68
- "dependencies": {
69
- "@huggingface/hub": "^2.7.1",
70
- "@modelcontextprotocol/sdk": "^1.25.2",
71
- "@polar-sh/nextjs": "^0.9.4",
72
- "@supabase/supabase-js": "^2.98.0",
73
- "@xenova/transformers": "^2.17.2",
74
- "adm-zip": "^0.5.16",
75
- "ajv": "^8.17.1",
76
- "ajv-formats": "^3.0.1",
77
- "better-sqlite3": "^12.6.0",
78
- "inquirer": "^13.3.0",
79
- "lodash": "^4.17.21",
80
- "uuid": "^13.0.0",
81
- "zod": "^4.3.5",
82
- "zod-to-json-schema": "^3.25.1"
83
- },
84
- "devDependencies": {
85
- "@types/adm-zip": "^0.5.7",
86
- "@types/better-sqlite3": "^7.6.13",
87
- "@types/lodash": "^4.17.23",
88
- "@types/node": "^25.0.9",
89
- "@types/uuid": "^10.0.0",
90
- "@typescript-eslint/eslint-plugin": "^8.53.0",
91
- "@typescript-eslint/parser": "^8.53.0",
92
- "eslint": "^9.39.2",
93
- "eslint-config-prettier": "^10.1.8",
94
- "prettier": "^3.8.0",
95
- "tsx": "^4.21.0",
96
- "typescript": "^5.9.3",
97
- "vitest": "^4.0.17"
98
- },
99
- "packageManager": "pnpm@10.18.1+sha512.77a884a165cbba2d8d1c19e3b4880eee6d2fcabd0d879121e282196b80042351d5eb3ca0935fa599da1dc51265cc68816ad2bddd2a2de5ea9fdf92adbec7cd34"
100
- }
1
+ {
2
+ "name": "vesper-wizard",
3
+ "version": "2.3.2",
4
+ "description": "Zero-friction setup wizard for Vesper local MCP server, unified dataset API, and agent auto-config in 60 seconds",
5
+ "bin": {
6
+ "vesper-wizard": "wizard.js"
7
+ },
8
+ "files": [
9
+ "wizard.js",
10
+ "README.md",
11
+ "vesper-mcp-config.json"
12
+ ],
13
+ "keywords": [
14
+ "vesper",
15
+ "mcp",
16
+ "wizard",
17
+ "setup",
18
+ "datasets",
19
+ "machine-learning",
20
+ "huggingface",
21
+ "kaggle",
22
+ "openml"
23
+ ],
24
+ "author": "Vesper Team",
25
+ "license": "MIT",
26
+ "repository": {
27
+ "type": "git",
28
+ "url": "https://github.com/vesper/mcp-server"
29
+ },
30
+ "engines": {
31
+ "node": ">=18.0.0"
32
+ },
33
+ "dependencies": {}
34
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "project": "vesper",
3
+ "dataDir": "./datasets",
4
+ "exportFormat": "parquet",
5
+ "tokens": {}
6
+ }
@@ -60,9 +60,9 @@ function magenta(text) { return `\x1b[35m${text}\x1b[0m`; }
60
60
  // ── Vesper API URL resolution ────────────────────────────────
61
61
  const VESPER_API_URL = process.env.VESPER_API_URL || '';
62
62
  const DEFAULT_VESPER_API_CANDIDATES = [
63
+ 'https://getvesper.dev',
63
64
  'http://localhost:3000',
64
65
  'http://127.0.0.1:3000',
65
- 'https://getvesper.dev',
66
66
  ];
67
67
 
68
68
  // ── Device Auth Helpers ──────────────────────────────────────
@@ -221,10 +221,14 @@ async function chooseAuthMode(existingKey, existingAuthMode) {
221
221
  }
222
222
 
223
223
  const choices = [];
224
- choices.push({ value: 'browser', label: 'Sign in through the browser' });
224
+ if (hasExistingKey) {
225
+ choices.push({ value: 'keep', label: 'Keep current key as-is' });
226
+ }
225
227
  choices.push({ value: 'manual', label: 'Provide Vesper API key manually' });
228
+ choices.push({ value: 'browser', label: 'Sign in through the browser' });
229
+ choices.push({ value: 'local', label: 'Use local-only key' });
226
230
 
227
- return await askChoice(`${cyan('→')} How do you want to authenticate Vesper?`, choices, 'browser');
231
+ return await askChoice(`${cyan('→')} How do you want to authenticate Vesper?`, choices, hasExistingKey ? 'keep' : 'browser');
228
232
  }
229
233
 
230
234
  async function deviceAuthFlow() {
@@ -236,7 +240,7 @@ async function deviceAuthFlow() {
236
240
  console.log(` ${red('✗')} ${red('Could not reach any Vesper auth endpoint.')}`);
237
241
  console.log(` ${dim('Tried:')} ${dim((VESPER_API_URL ? [VESPER_API_URL] : DEFAULT_VESPER_API_CANDIDATES).join(', '))}`);
238
242
  console.log(` ${dim('If your landing app is running locally, start it on http://localhost:3000 or set VESPER_API_URL.')}`);
239
- console.log(` ${dim('Falling back to manual key entry.\n')}`);
243
+ console.log(` ${dim('Falling back to local-only mode.\n')}`);
240
244
  return null;
241
245
  }
242
246
 
@@ -245,7 +249,7 @@ async function deviceAuthFlow() {
245
249
  console.log(` ${dim('Endpoint:')} ${dim(resolvedApiBaseUrl.baseUrl)}`);
246
250
  console.log(` ${dim('Reason:')} ${dim(resolvedApiBaseUrl.message || 'Apply Supabase migrations first.')}`);
247
251
  console.log(` ${dim('Run the SQL in supabase/migrations/001_device_auth.sql and 002_rate_limits.sql, then retry.')}`);
248
- console.log(` ${dim('Falling back to manual key entry.\n')}`);
252
+ console.log(` ${dim('Falling back to local-only mode.\n')}`);
249
253
  return null;
250
254
  }
251
255
 
@@ -259,7 +263,7 @@ async function deviceAuthFlow() {
259
263
  } catch (err) {
260
264
  console.log(` ${red('✗')}`);
261
265
  console.log(` ${red('Could not reach Vesper API at')} ${dim(resolvedApiBaseUrl.baseUrl)}`);
262
- console.log(` ${dim('Falling back to manual key entry.\n')}`);
266
+ console.log(` ${dim('Falling back to local-only mode.\n')}`);
263
267
  return null;
264
268
  }
265
269
 
@@ -459,7 +463,9 @@ async function main() {
459
463
 
460
464
  const authChoice = await chooseAuthMode(localKey, authMode);
461
465
 
462
- if (authChoice === 'manual') {
466
+ if (authChoice === 'keep' && localKey) {
467
+ console.log(` ${green('✓')} Keeping current key`);
468
+ } else if (authChoice === 'manual') {
463
469
  localKey = await promptForManualApiKey();
464
470
  authMode = 'cloud';
465
471
  console.log(` ${green('✓')} Cloud API key saved from manual input`);
@@ -469,10 +475,28 @@ async function main() {
469
475
  localKey = cloudKey;
470
476
  authMode = 'cloud';
471
477
  } else {
472
- console.log(`\n ${yellow('!')} Browser sign-in did not complete. Falling back to manual key entry.`);
473
- localKey = await promptForManualApiKey();
474
- authMode = 'cloud';
478
+ const fallbackChoice = await askChoice(`${yellow('!')} Browser sign-in did not complete. Choose a fallback:`, [
479
+ { value: 'manual', label: 'Provide Vesper API key manually' },
480
+ { value: 'local', label: 'Use local-only key' },
481
+ ], 'manual');
482
+
483
+ if (fallbackChoice === 'manual') {
484
+ localKey = await promptForManualApiKey();
485
+ authMode = 'cloud';
486
+ } else {
487
+ if (!localKey || isCloudApiKey(localKey)) {
488
+ localKey = generateLocalKey();
489
+ }
490
+ authMode = 'local_unified';
491
+ console.log(`\n ${yellow('⚠')} Using local-only key. Run the wizard again anytime to link an account.`);
492
+ }
493
+ }
494
+ } else {
495
+ if (!localKey || isCloudApiKey(localKey)) {
496
+ localKey = generateLocalKey();
475
497
  }
498
+ authMode = 'local_unified';
499
+ console.log(` ${green('✓')} Local-only key ready`);
476
500
  }
477
501
 
478
502
  const configData = { ...existing, api_key: localKey, auth_mode: authMode };
package/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2026 Vesper Team
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
@@ -1,34 +0,0 @@
1
- import fs from "fs";
2
- import path from "path";
3
- export class CDNService {
4
- baseDir;
5
- baseUrl;
6
- constructor(baseDir = "data/cdn_mock", baseUrl = "https://cdn.vesper.ai") {
7
- this.baseDir = path.resolve(baseDir);
8
- this.baseUrl = baseUrl;
9
- if (!fs.existsSync(this.baseDir)) {
10
- fs.mkdirSync(this.baseDir, { recursive: true });
11
- }
12
- }
13
- /**
14
- * Uploads a file to the CDN.
15
- * @param fileName Name of the file (including extension)
16
- * @param content String or Buffer content
17
- * @returns The public URL of the file
18
- */
19
- async upload(fileName, content) {
20
- const filePath = path.join(this.baseDir, fileName);
21
- fs.writeFileSync(filePath, content);
22
- // Return a simulated cloud URL
23
- return `${this.baseUrl}/${fileName}`;
24
- }
25
- /**
26
- * Deletes a file from the CDN.
27
- */
28
- async delete(fileName) {
29
- const filePath = path.join(this.baseDir, fileName);
30
- if (fs.existsSync(filePath)) {
31
- fs.unlinkSync(filePath);
32
- }
33
- }
34
- }