vesper-wizard 2.3.0 → 2.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -322
- package/package.json +34 -100
- package/vesper-mcp-config.json +6 -0
- package/{scripts/wizard.js → wizard.js} +34 -10
- package/LICENSE +0 -21
- package/build/cache/cdn.js +0 -34
- package/build/cache/service.js +0 -63
- package/build/cleaning/cleaner.js +0 -81
- package/build/cleaning/evaluator.js +0 -89
- package/build/cleaning/executor.js +0 -62
- package/build/cleaning/exporter.js +0 -87
- package/build/cleaning/planner.js +0 -127
- package/build/cleaning/rules.js +0 -57
- package/build/cleaning/types.js +0 -1
- package/build/cloud/adapters/local.js +0 -37
- package/build/cloud/adapters/s3.js +0 -24
- package/build/cloud/adapters/supabase.js +0 -49
- package/build/cloud/storage-manager.js +0 -26
- package/build/cloud/types.js +0 -1
- package/build/compliance/service.js +0 -73
- package/build/compliance/store.js +0 -80
- package/build/compliance/types.js +0 -1
- package/build/config/config-manager.js +0 -221
- package/build/config/secure-keys.js +0 -51
- package/build/config/user-config.js +0 -48
- package/build/data/processing-worker.js +0 -23
- package/build/data/streaming.js +0 -38
- package/build/data/worker-pool.js +0 -39
- package/build/export/exporter.js +0 -82
- package/build/export/packager.js +0 -100
- package/build/export/types.js +0 -1
- package/build/fusion/aligner.js +0 -56
- package/build/fusion/deduplicator.js +0 -69
- package/build/fusion/engine.js +0 -69
- package/build/fusion/harmonizer.js +0 -39
- package/build/fusion/orchestrator.js +0 -86
- package/build/fusion/types.js +0 -1
- package/build/gateway/unified-dataset-gateway.js +0 -410
- package/build/index.js +0 -3068
- package/build/ingestion/hf-downloader.js +0 -171
- package/build/ingestion/ingestor.js +0 -271
- package/build/ingestion/kaggle-downloader.js +0 -102
- package/build/install/install-service.js +0 -46
- package/build/jobs/manager.js +0 -136
- package/build/jobs/queue.js +0 -59
- package/build/jobs/types.js +0 -1
- package/build/lib/supabase.js +0 -3
- package/build/metadata/dataworld-source.js +0 -89
- package/build/metadata/domain.js +0 -147
- package/build/metadata/github-scraper.js +0 -47
- package/build/metadata/institutional-scrapers.js +0 -49
- package/build/metadata/kaggle-scraper.js +0 -182
- package/build/metadata/kaggle-source.js +0 -70
- package/build/metadata/license.js +0 -68
- package/build/metadata/monitoring-service.js +0 -107
- package/build/metadata/monitoring-store.js +0 -78
- package/build/metadata/monitoring-types.js +0 -1
- package/build/metadata/openml-source.js +0 -87
- package/build/metadata/quality.js +0 -48
- package/build/metadata/rate-limiter.js +0 -128
- package/build/metadata/scraper.js +0 -448
- package/build/metadata/store.js +0 -340
- package/build/metadata/types.js +0 -1
- package/build/metadata/uci-scraper.js +0 -49
- package/build/monitoring/observability.js +0 -76
- package/build/preparation/target-detector.js +0 -75
- package/build/python/__pycache__/config.cpython-312.pyc +0 -0
- package/build/python/asset_downloader_engine.py +0 -94
- package/build/python/cleaner.py +0 -226
- package/build/python/config.py +0 -263
- package/build/python/convert_engine.py +0 -92
- package/build/python/dataworld_engine.py +0 -208
- package/build/python/export_engine.py +0 -288
- package/build/python/framework_adapters.py +0 -100
- package/build/python/fusion_engine.py +0 -368
- package/build/python/github_adapter.py +0 -106
- package/build/python/hf_fallback.py +0 -298
- package/build/python/image_engine.py +0 -86
- package/build/python/kaggle_engine.py +0 -295
- package/build/python/media_engine.py +0 -133
- package/build/python/nasa_adapter.py +0 -82
- package/build/python/normalize_engine.py +0 -83
- package/build/python/openml_engine.py +0 -146
- package/build/python/quality_engine.py +0 -267
- package/build/python/row_count.py +0 -54
- package/build/python/splitter_engine.py +0 -283
- package/build/python/target_engine.py +0 -154
- package/build/python/test_framework_adapters.py +0 -61
- package/build/python/test_fusion_engine.py +0 -89
- package/build/python/uci_adapter.py +0 -94
- package/build/python/vesper/__init__.py +0 -1
- package/build/python/vesper/__pycache__/__init__.cpython-312.pyc +0 -0
- package/build/python/vesper/core/__init__.py +0 -1
- package/build/python/vesper/core/__pycache__/__init__.cpython-312.pyc +0 -0
- package/build/python/vesper/core/__pycache__/asset_downloader.cpython-312.pyc +0 -0
- package/build/python/vesper/core/__pycache__/download_recipe.cpython-312.pyc +0 -0
- package/build/python/vesper/core/asset_downloader.py +0 -679
- package/build/python/vesper/core/download_recipe.py +0 -104
- package/build/python/worldbank_adapter.py +0 -99
- package/build/quality/analyzer.js +0 -93
- package/build/quality/image-analyzer.js +0 -114
- package/build/quality/media-analyzer.js +0 -115
- package/build/quality/quality-orchestrator.js +0 -162
- package/build/quality/types.js +0 -1
- package/build/scripts/build-index.js +0 -54
- package/build/scripts/check-db.js +0 -73
- package/build/scripts/check-jobs.js +0 -24
- package/build/scripts/check-naruto.js +0 -17
- package/build/scripts/cleanup-kaggle.js +0 -41
- package/build/scripts/demo-full-pipeline.js +0 -62
- package/build/scripts/demo-ui.js +0 -58
- package/build/scripts/e2e-demo.js +0 -72
- package/build/scripts/massive-scrape.js +0 -103
- package/build/scripts/ops-dashboard.js +0 -33
- package/build/scripts/repro-bug.js +0 -37
- package/build/scripts/repro-export-bug.js +0 -56
- package/build/scripts/scrape-metadata.js +0 -100
- package/build/scripts/search-cli.js +0 -26
- package/build/scripts/test-bias.js +0 -45
- package/build/scripts/test-caching.js +0 -51
- package/build/scripts/test-cleaning.js +0 -76
- package/build/scripts/test-cloud-storage.js +0 -48
- package/build/scripts/test-compliance.js +0 -58
- package/build/scripts/test-conversion.js +0 -64
- package/build/scripts/test-custom-rules.js +0 -58
- package/build/scripts/test-db-opt.js +0 -63
- package/build/scripts/test-export-custom.js +0 -33
- package/build/scripts/test-exporter.js +0 -53
- package/build/scripts/test-fusion.js +0 -61
- package/build/scripts/test-github.js +0 -27
- package/build/scripts/test-group-split.js +0 -52
- package/build/scripts/test-hf-download.js +0 -29
- package/build/scripts/test-holdout-manager.js +0 -61
- package/build/scripts/test-hybrid-search.js +0 -41
- package/build/scripts/test-image-analysis.js +0 -50
- package/build/scripts/test-ingestion-infra.js +0 -39
- package/build/scripts/test-install.js +0 -40
- package/build/scripts/test-institutional.js +0 -26
- package/build/scripts/test-integrity.js +0 -41
- package/build/scripts/test-jit.js +0 -42
- package/build/scripts/test-job-queue.js +0 -62
- package/build/scripts/test-kaggle-download.js +0 -34
- package/build/scripts/test-large-data.js +0 -50
- package/build/scripts/test-mcp-v5.js +0 -74
- package/build/scripts/test-media-analysis.js +0 -61
- package/build/scripts/test-monitoring.js +0 -91
- package/build/scripts/test-observability.js +0 -106
- package/build/scripts/test-packager.js +0 -55
- package/build/scripts/test-pipeline.js +0 -50
- package/build/scripts/test-planning.js +0 -64
- package/build/scripts/test-privacy.js +0 -38
- package/build/scripts/test-production-sync.js +0 -36
- package/build/scripts/test-quality.js +0 -43
- package/build/scripts/test-robust-ingestion.js +0 -41
- package/build/scripts/test-schema.js +0 -45
- package/build/scripts/test-split-validation.js +0 -40
- package/build/scripts/test-splitter.js +0 -93
- package/build/scripts/test-target-detector.js +0 -29
- package/build/scripts/test-uci.js +0 -27
- package/build/scripts/test-unified-quality.js +0 -86
- package/build/scripts/test-write.js +0 -14
- package/build/scripts/verify-integration.js +0 -57
- package/build/scripts/verify-priority.js +0 -33
- package/build/search/embedder.js +0 -34
- package/build/search/engine.js +0 -190
- package/build/search/jit-orchestrator.js +0 -262
- package/build/search/query-intent.js +0 -509
- package/build/search/vector-store.js +0 -123
- package/build/splitting/splitter.js +0 -82
- package/build/splitting/types.js +0 -1
- package/build/tools/formatter.js +0 -251
- package/build/utils/downloader.js +0 -52
- package/build/utils/python-runtime.js +0 -130
- package/build/utils/selector.js +0 -69
- package/mcp-config-template.json +0 -18
- package/scripts/postinstall.cjs +0 -170
- package/scripts/preindex_registry.cjs +0 -157
- package/scripts/refresh-index.cjs +0 -87
- package/scripts/wizard.cjs +0 -601
- package/src/python/__pycache__/config.cpython-312.pyc +0 -0
- package/src/python/__pycache__/export_engine.cpython-312.pyc +0 -0
- package/src/python/__pycache__/framework_adapters.cpython-312.pyc +0 -0
- package/src/python/__pycache__/fusion_engine.cpython-312.pyc +0 -0
- package/src/python/__pycache__/kaggle_engine.cpython-312.pyc +0 -0
- package/src/python/asset_downloader_engine.py +0 -94
- package/src/python/cleaner.py +0 -226
- package/src/python/config.py +0 -263
- package/src/python/convert_engine.py +0 -92
- package/src/python/dataworld_engine.py +0 -208
- package/src/python/export_engine.py +0 -288
- package/src/python/framework_adapters.py +0 -100
- package/src/python/fusion_engine.py +0 -368
- package/src/python/github_adapter.py +0 -106
- package/src/python/hf_fallback.py +0 -298
- package/src/python/image_engine.py +0 -86
- package/src/python/kaggle_engine.py +0 -295
- package/src/python/media_engine.py +0 -133
- package/src/python/nasa_adapter.py +0 -82
- package/src/python/normalize_engine.py +0 -83
- package/src/python/openml_engine.py +0 -146
- package/src/python/quality_engine.py +0 -267
- package/src/python/row_count.py +0 -54
- package/src/python/splitter_engine.py +0 -283
- package/src/python/target_engine.py +0 -154
- package/src/python/test_framework_adapters.py +0 -61
- package/src/python/test_fusion_engine.py +0 -89
- package/src/python/uci_adapter.py +0 -94
- package/src/python/vesper/__init__.py +0 -1
- package/src/python/vesper/core/__init__.py +0 -1
- package/src/python/vesper/core/asset_downloader.py +0 -679
- package/src/python/vesper/core/download_recipe.py +0 -104
- package/src/python/worldbank_adapter.py +0 -99
- package/wizard.cjs +0 -3
package/README.md
CHANGED
|
@@ -1,345 +1,60 @@
|
|
|
1
|
-
#
|
|
1
|
+
# vesper-wizard
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Zero-friction setup wizard for [Vesper](https://github.com/vesper/mcp-server) — your local MCP-native dataset intelligence layer.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
## ✨ Features
|
|
8
|
-
|
|
9
|
-
### 🔍 **Dataset Discovery**
|
|
10
|
-
- Search across HuggingFace, Kaggle, UCI ML Repository, and more
|
|
11
|
-
- Intelligent ranking based on quality, safety, and relevance
|
|
12
|
-
- Automatic metadata extraction and enrichment
|
|
13
|
-
|
|
14
|
-
### 📊 **Quality Analysis**
|
|
15
|
-
- **Text**: Missing data, duplicates, column profiling
|
|
16
|
-
- **Images**: Resolution, corruption, blur detection
|
|
17
|
-
- **Audio**: Sample rate, duration, silence detection
|
|
18
|
-
- **Video**: FPS, frame validation, corruption risk
|
|
19
|
-
- **Unified Reports**: Consolidated quality scores (0-100) with recommendations
|
|
20
|
-
|
|
21
|
-
### 🛠️ **Data Preparation**
|
|
22
|
-
- Automated cleaning pipelines
|
|
23
|
-
- Format conversion (CSV, JSON, Parquet)
|
|
24
|
-
- Train/test/validation splitting
|
|
25
|
-
- Automatic installation to project directories
|
|
26
|
-
|
|
27
|
-
### 🎯 **Multimodal Support**
|
|
28
|
-
- Analyze mixed datasets (text + images + audio)
|
|
29
|
-
- Media-specific quality metrics
|
|
30
|
-
- Intelligent modality detection
|
|
31
|
-
|
|
32
|
-
## 📦 Installation
|
|
33
|
-
|
|
34
|
-
## 🚀 Quick Start (VS Code + Copilot)
|
|
35
|
-
|
|
36
|
-
The fastest way to install Vesper and configure it for **GitHub Copilot Chat** or **Cursor** is to run the automated setup:
|
|
37
|
-
|
|
38
|
-
```bash
|
|
39
|
-
npx -y -p @vespermcp/mcp-server@latest vespermcp --setup
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
1. Select **Visual Studio Code (Settings.json)** from the list.
|
|
43
|
-
2. Restart VS Code.
|
|
44
|
-
3. Open Copilot Chat and look for the **MCP Servers** section.
|
|
45
|
-
|
|
46
|
-
## 🛠️ Configuration
|
|
47
|
-
Vesper supports:
|
|
48
|
-
- **GitHub Copilot Chat**: Automated setup via `settings.json`.
|
|
49
|
-
- **Cursor**: Automated setup via `mcp.json`.
|
|
50
|
-
- **Claude Desktop**: Automated setup via `claude_desktop_config.json`.
|
|
51
|
-
|
|
52
|
-
### Manual Python Setup (if needed)
|
|
5
|
+
## Install
|
|
53
6
|
|
|
54
7
|
```bash
|
|
55
|
-
|
|
8
|
+
npx vesper-wizard@latest
|
|
56
9
|
```
|
|
57
10
|
|
|
58
|
-
|
|
11
|
+
That's it. The wizard handles everything:
|
|
59
12
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
- **Name**: `vesper`
|
|
65
|
-
- **Type**: `command`
|
|
66
|
-
- **Command**: `vesper`
|
|
13
|
+
1. Creates `~/.vesper/` directories and local API key
|
|
14
|
+
2. Initializes a local credentials vault in unified-key mode (no external API keys required)
|
|
15
|
+
3. Installs `@vespermcp/mcp-server` and auto-configures MCP for all detected agents (Claude, Cursor, VS Code, Codex, Gemini CLI)
|
|
16
|
+
4. Verifies the installation
|
|
67
17
|
|
|
68
|
-
|
|
69
|
-
Vesper attempts to auto-configure itself! Restart Claude and check. If not:
|
|
18
|
+
## What you get
|
|
70
19
|
|
|
71
|
-
|
|
72
|
-
{
|
|
73
|
-
"mcpServers": {
|
|
74
|
-
"vesper": {
|
|
75
|
-
"command": "vesper",
|
|
76
|
-
"args": [],
|
|
77
|
-
"env": {
|
|
78
|
-
"HF_TOKEN": "your-huggingface-token"
|
|
79
|
-
}
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
}
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
> **Note**: If the `vesper` command isn't found, you can stick to the absolute path method.
|
|
20
|
+
After the wizard finishes, your AI assistant can immediately use Vesper tools:
|
|
86
21
|
|
|
87
|
-
|
|
22
|
+
| Tool | Description |
|
|
23
|
+
|------|-------------|
|
|
24
|
+
| `vesper_search` | Search 16,000+ datasets via natural language |
|
|
25
|
+
| `discover_datasets` | Discover from HuggingFace, Kaggle, OpenML, data.world |
|
|
26
|
+
| `download_dataset` | Download any dataset to local storage |
|
|
27
|
+
| `prepare_dataset` | Full pipeline: analyze → clean → split → export |
|
|
28
|
+
| `analyze_quality` | Deep quality analysis with recommendations |
|
|
29
|
+
| `export_dataset` | Export to parquet, csv, feather, jsonl, arrow |
|
|
30
|
+
| `fuse_datasets` | Combine multiple datasets with quality checks |
|
|
88
31
|
|
|
89
|
-
|
|
90
|
-
- `HF_TOKEN`: For private HuggingFace datasets
|
|
32
|
+
## Security
|
|
91
33
|
|
|
92
|
-
|
|
34
|
+
- **Local-only**: Uses one local key in `~/.vesper/config.toml`
|
|
35
|
+
- **Keyring-backed**: Uses OS keyring when available, falls back to local TOML
|
|
36
|
+
- **No cloud**: Zero external API calls during setup
|
|
37
|
+
- **No external keys**: No HuggingFace/Kaggle/Nia key prompts during setup
|
|
93
38
|
|
|
94
|
-
|
|
39
|
+
## Config file
|
|
95
40
|
|
|
96
|
-
|
|
41
|
+
The wizard generates `~/.vesper/config.toml`:
|
|
97
42
|
|
|
98
|
-
```
|
|
99
|
-
|
|
43
|
+
```toml
|
|
44
|
+
api_key = "vesper_sk_local_..."
|
|
45
|
+
auth_mode = "local_unified"
|
|
100
46
|
```
|
|
101
47
|
|
|
102
|
-
|
|
103
|
-
vespermcp config keys
|
|
104
|
-
```
|
|
105
|
-
|
|
106
|
-
The setup wizard supports skip and stores keys securely via OS keyring when available,
|
|
107
|
-
with fallback to `~/.vesper/config.toml`.
|
|
108
|
-
|
|
109
|
-
or use Kaggle's native file:
|
|
48
|
+
## Post-setup
|
|
110
49
|
|
|
111
|
-
|
|
50
|
+
Restart your IDE and try in your AI assistant:
|
|
112
51
|
|
|
113
|
-
If credentials are missing and you run Kaggle commands, Vesper shows:
|
|
114
|
-
|
|
115
|
-
`Kaggle support requires API key. Run 'vespermcp config keys' (30 seconds).`
|
|
116
|
-
|
|
117
|
-
### CLI Examples
|
|
118
|
-
|
|
119
|
-
```bash
|
|
120
|
-
vespermcp discover --source kaggle "credit risk" --limit 10
|
|
121
|
-
vespermcp discover --source huggingface "credit risk" --limit 10
|
|
122
|
-
vespermcp download kaggle username/dataset-name
|
|
123
|
-
vespermcp download kaggle https://www.kaggle.com/datasets/username/dataset-name --target-dir ./data
|
|
124
52
|
```
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
After installation and configuration, restart your AI assistant and try:
|
|
129
|
-
|
|
53
|
+
vesper_search(query="sentiment analysis")
|
|
54
|
+
prepare_dataset(query="image classification cats dogs")
|
|
55
|
+
analyze_quality(dataset_id="imdb")
|
|
130
56
|
```
|
|
131
|
-
search_datasets(query="sentiment analysis", limit=5)
|
|
132
|
-
```
|
|
133
|
-
|
|
134
|
-
```
|
|
135
|
-
prepare_dataset(query="image classification cats vs dogs")
|
|
136
|
-
```
|
|
137
|
-
|
|
138
|
-
```
|
|
139
|
-
generate_quality_report(
|
|
140
|
-
dataset_id="huggingface:imdb",
|
|
141
|
-
dataset_path="/path/to/data"
|
|
142
|
-
)
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
## 📚 Available Tools
|
|
146
|
-
|
|
147
|
-
### Dataset Discovery
|
|
148
|
-
|
|
149
|
-
#### `unified_dataset_api`
|
|
150
|
-
Single facade over multiple dataset backends. Use one tool for provider capability inspection, dataset discovery, dataset download, and dataset info lookup. The gateway prefers public/keyless providers and can also use server-managed credentials for connectors like Kaggle or data.world when configured by the operator.
|
|
151
|
-
|
|
152
|
-
**Parameters:**
|
|
153
|
-
- `operation` (string): `providers`, `discover`, `download`, or `info`
|
|
154
|
-
- `source` (string, optional): `auto`, `huggingface`, `openml`, `kaggle`, `dataworld`, `s3`, `bigquery`
|
|
155
|
-
- `query` (string, required for `discover`)
|
|
156
|
-
- `dataset_id` (string, required for `download`/`info`)
|
|
157
|
-
- `limit` (number, optional)
|
|
158
|
-
- `target_dir` (string, optional)
|
|
159
|
-
- `public_only` (boolean, optional)
|
|
160
|
-
|
|
161
|
-
**Examples:**
|
|
162
|
-
```
|
|
163
|
-
unified_dataset_api(operation="providers")
|
|
164
|
-
```
|
|
165
|
-
|
|
166
|
-
```
|
|
167
|
-
unified_dataset_api(operation="discover", query="credit risk", source="auto")
|
|
168
|
-
```
|
|
169
|
-
|
|
170
|
-
```
|
|
171
|
-
unified_dataset_api(operation="download", dataset_id="huggingface:imdb")
|
|
172
|
-
```
|
|
173
|
-
|
|
174
|
-
---
|
|
175
|
-
|
|
176
|
-
#### `search_datasets`
|
|
177
|
-
Search for datasets across multiple sources.
|
|
178
|
-
|
|
179
|
-
**Parameters:**
|
|
180
|
-
- `query` (string): Search query
|
|
181
|
-
- `limit` (number, optional): Max results (default: 10)
|
|
182
|
-
- `min_quality_score` (number, optional): Minimum quality threshold
|
|
183
|
-
|
|
184
|
-
**Example:**
|
|
185
|
-
```
|
|
186
|
-
search_datasets(query="medical imaging", limit=5, min_quality_score=70)
|
|
187
|
-
```
|
|
188
|
-
|
|
189
|
-
---
|
|
190
|
-
|
|
191
|
-
### Data Preparation
|
|
192
|
-
|
|
193
|
-
#### `prepare_dataset`
|
|
194
|
-
Download, analyze, and prepare a dataset for use.
|
|
195
|
-
|
|
196
|
-
**Parameters:**
|
|
197
|
-
- `query` (string): Dataset search query or ID
|
|
198
|
-
|
|
199
|
-
**Example:**
|
|
200
|
-
```
|
|
201
|
-
prepare_dataset(query="squad")
|
|
202
|
-
```
|
|
203
|
-
|
|
204
|
-
---
|
|
205
|
-
|
|
206
|
-
#### `export_dataset`
|
|
207
|
-
Export a prepared dataset to a custom directory with format conversion.
|
|
208
|
-
|
|
209
|
-
**Parameters:**
|
|
210
|
-
- `dataset_id` (string): Dataset identifier
|
|
211
|
-
- `target_dir` (string): Export directory
|
|
212
|
-
- `format` (string, optional): Output format (csv, json, parquet)
|
|
213
|
-
|
|
214
|
-
**Example:**
|
|
215
|
-
```
|
|
216
|
-
export_dataset(
|
|
217
|
-
dataset_id="huggingface:imdb",
|
|
218
|
-
target_dir="./my-data",
|
|
219
|
-
format="csv"
|
|
220
|
-
)
|
|
221
|
-
```
|
|
222
|
-
|
|
223
|
-
---
|
|
224
|
-
|
|
225
|
-
#### `vesper_download_assets`
|
|
226
|
-
Download image/media assets to a user-controlled local directory.
|
|
227
|
-
|
|
228
|
-
**Parameters:**
|
|
229
|
-
- `dataset_id` (string): Dataset identifier
|
|
230
|
-
- `source` (string): `huggingface`, `kaggle`, or `url`
|
|
231
|
-
- `target_dir` (string, optional): Exact local directory where assets should be written
|
|
232
|
-
- `output_dir` (string, optional): Alias for `target_dir`
|
|
233
|
-
- `output_format` (string, optional): `webdataset`, `imagefolder`, or `parquet`
|
|
234
|
-
|
|
235
|
-
**Example:**
|
|
236
|
-
```
|
|
237
|
-
vesper_download_assets(
|
|
238
|
-
dataset_id="cats_vs_dogs",
|
|
239
|
-
source="kaggle",
|
|
240
|
-
target_dir="./datasets/cats_dogs_100",
|
|
241
|
-
output_format="imagefolder"
|
|
242
|
-
)
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
---
|
|
246
|
-
|
|
247
|
-
### Quality Analysis
|
|
248
|
-
|
|
249
|
-
#### `analyze_image_quality`
|
|
250
|
-
Analyze image datasets for resolution, corruption, and blur.
|
|
251
|
-
|
|
252
|
-
**Parameters:**
|
|
253
|
-
- `path` (string): Path to image file or folder
|
|
254
|
-
|
|
255
|
-
**Example:**
|
|
256
|
-
```
|
|
257
|
-
analyze_image_quality(path="/path/to/images")
|
|
258
|
-
```
|
|
259
|
-
|
|
260
|
-
---
|
|
261
|
-
|
|
262
|
-
#### `analyze_media_quality`
|
|
263
|
-
Analyze audio/video files for quality metrics.
|
|
264
|
-
|
|
265
|
-
**Parameters:**
|
|
266
|
-
- `path` (string): Path to media file or folder
|
|
267
|
-
|
|
268
|
-
**Example:**
|
|
269
|
-
```
|
|
270
|
-
analyze_media_quality(path="/path/to/audio")
|
|
271
|
-
```
|
|
272
|
-
|
|
273
|
-
---
|
|
274
|
-
|
|
275
|
-
#### `generate_quality_report`
|
|
276
|
-
Generate a comprehensive unified quality report for multimodal datasets.
|
|
277
|
-
|
|
278
|
-
**Parameters:**
|
|
279
|
-
- `dataset_id` (string): Dataset identifier
|
|
280
|
-
- `dataset_path` (string): Path to dataset directory
|
|
281
|
-
|
|
282
|
-
**Example:**
|
|
283
|
-
```
|
|
284
|
-
generate_quality_report(
|
|
285
|
-
dataset_id="my-dataset",
|
|
286
|
-
dataset_path="/path/to/data"
|
|
287
|
-
)
|
|
288
|
-
```
|
|
289
|
-
|
|
290
|
-
---
|
|
291
|
-
|
|
292
|
-
### Data Splitting
|
|
293
|
-
|
|
294
|
-
#### `split_dataset`
|
|
295
|
-
Split a dataset into train/test/validation sets.
|
|
296
|
-
|
|
297
|
-
**Parameters:**
|
|
298
|
-
- `dataset_id` (string): Dataset identifier
|
|
299
|
-
- `train_ratio` (number): Training set ratio (0-1)
|
|
300
|
-
- `test_ratio` (number): Test set ratio (0-1)
|
|
301
|
-
- `val_ratio` (number, optional): Validation set ratio (0-1)
|
|
302
|
-
|
|
303
|
-
**Example:**
|
|
304
|
-
```
|
|
305
|
-
split_dataset(
|
|
306
|
-
dataset_id="my-dataset",
|
|
307
|
-
train_ratio=0.7,
|
|
308
|
-
test_ratio=0.2,
|
|
309
|
-
val_ratio=0.1
|
|
310
|
-
)
|
|
311
|
-
```
|
|
312
|
-
|
|
313
|
-
## 🏗️ Architecture
|
|
314
|
-
|
|
315
|
-
Vesper is built with:
|
|
316
|
-
- **TypeScript** for the MCP server
|
|
317
|
-
- **Python** for image/audio/video processing
|
|
318
|
-
- **SQLite** for metadata storage
|
|
319
|
-
- **Transformers.js** for semantic search
|
|
320
|
-
|
|
321
|
-
## 🤝 Contributing
|
|
322
|
-
|
|
323
|
-
Contributions are welcome! Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
|
|
324
|
-
|
|
325
|
-
## 📄 License
|
|
326
|
-
|
|
327
|
-
MIT License - see [LICENSE](LICENSE) for details.
|
|
328
|
-
|
|
329
|
-
## 🐛 Issues & Support
|
|
330
|
-
|
|
331
|
-
- **Issues**: https://github.com/vesper/mcp-server/issues
|
|
332
|
-
- **Discussions**: https://github.com/vesper/mcp-server/discussions
|
|
333
|
-
|
|
334
|
-
## 🌟 Acknowledgments
|
|
335
|
-
|
|
336
|
-
Built with:
|
|
337
|
-
- [Model Context Protocol](https://modelcontextprotocol.io/)
|
|
338
|
-
- [HuggingFace Hub](https://huggingface.co/)
|
|
339
|
-
- [Kaggle API](https://www.kaggle.com/docs/api)
|
|
340
|
-
- [OpenCV](https://opencv.org/)
|
|
341
|
-
- [librosa](https://librosa.org/)
|
|
342
57
|
|
|
343
|
-
|
|
58
|
+
## License
|
|
344
59
|
|
|
345
|
-
|
|
60
|
+
MIT
|
package/package.json
CHANGED
|
@@ -1,100 +1,34 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "vesper-wizard",
|
|
3
|
-
"version": "2.3.
|
|
4
|
-
"description": "
|
|
5
|
-
"
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
"
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
"
|
|
15
|
-
"
|
|
16
|
-
"
|
|
17
|
-
"
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
"
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
"test-fusion-engine": "py src/python/test_fusion_engine.py",
|
|
36
|
-
"setup": "node build/index.js --setup",
|
|
37
|
-
"setup:silent": "node build/index.js --setup --silent",
|
|
38
|
-
"refresh-index": "node scripts/refresh-index.cjs",
|
|
39
|
-
"test": "vitest",
|
|
40
|
-
"start": "node build/index.js"
|
|
41
|
-
},
|
|
42
|
-
"keywords": [
|
|
43
|
-
"mcp",
|
|
44
|
-
"model-context-protocol",
|
|
45
|
-
"dataset",
|
|
46
|
-
"machine-learning",
|
|
47
|
-
"data-quality",
|
|
48
|
-
"huggingface",
|
|
49
|
-
"kaggle",
|
|
50
|
-
"multimodal",
|
|
51
|
-
"image-analysis",
|
|
52
|
-
"audio-analysis",
|
|
53
|
-
"video-analysis",
|
|
54
|
-
"data-preparation",
|
|
55
|
-
"ai",
|
|
56
|
-
"ml"
|
|
57
|
-
],
|
|
58
|
-
"author": "Vesper Team",
|
|
59
|
-
"license": "MIT",
|
|
60
|
-
"repository": {
|
|
61
|
-
"type": "git",
|
|
62
|
-
"url": "git+https://github.com/vesper/mcp-server.git"
|
|
63
|
-
},
|
|
64
|
-
"engines": {
|
|
65
|
-
"node": ">=18.0.0",
|
|
66
|
-
"npm": ">=8.0.0"
|
|
67
|
-
},
|
|
68
|
-
"dependencies": {
|
|
69
|
-
"@huggingface/hub": "^2.7.1",
|
|
70
|
-
"@modelcontextprotocol/sdk": "^1.25.2",
|
|
71
|
-
"@polar-sh/nextjs": "^0.9.4",
|
|
72
|
-
"@supabase/supabase-js": "^2.98.0",
|
|
73
|
-
"@xenova/transformers": "^2.17.2",
|
|
74
|
-
"adm-zip": "^0.5.16",
|
|
75
|
-
"ajv": "^8.17.1",
|
|
76
|
-
"ajv-formats": "^3.0.1",
|
|
77
|
-
"better-sqlite3": "^12.6.0",
|
|
78
|
-
"inquirer": "^13.3.0",
|
|
79
|
-
"lodash": "^4.17.21",
|
|
80
|
-
"uuid": "^13.0.0",
|
|
81
|
-
"zod": "^4.3.5",
|
|
82
|
-
"zod-to-json-schema": "^3.25.1"
|
|
83
|
-
},
|
|
84
|
-
"devDependencies": {
|
|
85
|
-
"@types/adm-zip": "^0.5.7",
|
|
86
|
-
"@types/better-sqlite3": "^7.6.13",
|
|
87
|
-
"@types/lodash": "^4.17.23",
|
|
88
|
-
"@types/node": "^25.0.9",
|
|
89
|
-
"@types/uuid": "^10.0.0",
|
|
90
|
-
"@typescript-eslint/eslint-plugin": "^8.53.0",
|
|
91
|
-
"@typescript-eslint/parser": "^8.53.0",
|
|
92
|
-
"eslint": "^9.39.2",
|
|
93
|
-
"eslint-config-prettier": "^10.1.8",
|
|
94
|
-
"prettier": "^3.8.0",
|
|
95
|
-
"tsx": "^4.21.0",
|
|
96
|
-
"typescript": "^5.9.3",
|
|
97
|
-
"vitest": "^4.0.17"
|
|
98
|
-
},
|
|
99
|
-
"packageManager": "pnpm@10.18.1+sha512.77a884a165cbba2d8d1c19e3b4880eee6d2fcabd0d879121e282196b80042351d5eb3ca0935fa599da1dc51265cc68816ad2bddd2a2de5ea9fdf92adbec7cd34"
|
|
100
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "vesper-wizard",
|
|
3
|
+
"version": "2.3.2",
|
|
4
|
+
"description": "Zero-friction setup wizard for Vesper — local MCP server, unified dataset API, and agent auto-config in 60 seconds",
|
|
5
|
+
"bin": {
|
|
6
|
+
"vesper-wizard": "wizard.js"
|
|
7
|
+
},
|
|
8
|
+
"files": [
|
|
9
|
+
"wizard.js",
|
|
10
|
+
"README.md",
|
|
11
|
+
"vesper-mcp-config.json"
|
|
12
|
+
],
|
|
13
|
+
"keywords": [
|
|
14
|
+
"vesper",
|
|
15
|
+
"mcp",
|
|
16
|
+
"wizard",
|
|
17
|
+
"setup",
|
|
18
|
+
"datasets",
|
|
19
|
+
"machine-learning",
|
|
20
|
+
"huggingface",
|
|
21
|
+
"kaggle",
|
|
22
|
+
"openml"
|
|
23
|
+
],
|
|
24
|
+
"author": "Vesper Team",
|
|
25
|
+
"license": "MIT",
|
|
26
|
+
"repository": {
|
|
27
|
+
"type": "git",
|
|
28
|
+
"url": "https://github.com/vesper/mcp-server"
|
|
29
|
+
},
|
|
30
|
+
"engines": {
|
|
31
|
+
"node": ">=18.0.0"
|
|
32
|
+
},
|
|
33
|
+
"dependencies": {}
|
|
34
|
+
}
|
|
@@ -60,9 +60,9 @@ function magenta(text) { return `\x1b[35m${text}\x1b[0m`; }
|
|
|
60
60
|
// ── Vesper API URL resolution ────────────────────────────────
|
|
61
61
|
const VESPER_API_URL = process.env.VESPER_API_URL || '';
|
|
62
62
|
const DEFAULT_VESPER_API_CANDIDATES = [
|
|
63
|
+
'https://getvesper.dev',
|
|
63
64
|
'http://localhost:3000',
|
|
64
65
|
'http://127.0.0.1:3000',
|
|
65
|
-
'https://vesper.dev',
|
|
66
66
|
];
|
|
67
67
|
|
|
68
68
|
// ── Device Auth Helpers ──────────────────────────────────────
|
|
@@ -221,10 +221,14 @@ async function chooseAuthMode(existingKey, existingAuthMode) {
|
|
|
221
221
|
}
|
|
222
222
|
|
|
223
223
|
const choices = [];
|
|
224
|
-
|
|
224
|
+
if (hasExistingKey) {
|
|
225
|
+
choices.push({ value: 'keep', label: 'Keep current key as-is' });
|
|
226
|
+
}
|
|
225
227
|
choices.push({ value: 'manual', label: 'Provide Vesper API key manually' });
|
|
228
|
+
choices.push({ value: 'browser', label: 'Sign in through the browser' });
|
|
229
|
+
choices.push({ value: 'local', label: 'Use local-only key' });
|
|
226
230
|
|
|
227
|
-
return await askChoice(`${cyan('→')} How do you want to authenticate Vesper?`, choices, 'browser');
|
|
231
|
+
return await askChoice(`${cyan('→')} How do you want to authenticate Vesper?`, choices, hasExistingKey ? 'keep' : 'browser');
|
|
228
232
|
}
|
|
229
233
|
|
|
230
234
|
async function deviceAuthFlow() {
|
|
@@ -236,7 +240,7 @@ async function deviceAuthFlow() {
|
|
|
236
240
|
console.log(` ${red('✗')} ${red('Could not reach any Vesper auth endpoint.')}`);
|
|
237
241
|
console.log(` ${dim('Tried:')} ${dim((VESPER_API_URL ? [VESPER_API_URL] : DEFAULT_VESPER_API_CANDIDATES).join(', '))}`);
|
|
238
242
|
console.log(` ${dim('If your landing app is running locally, start it on http://localhost:3000 or set VESPER_API_URL.')}`);
|
|
239
|
-
console.log(` ${dim('Falling back to
|
|
243
|
+
console.log(` ${dim('Falling back to local-only mode.\n')}`);
|
|
240
244
|
return null;
|
|
241
245
|
}
|
|
242
246
|
|
|
@@ -245,7 +249,7 @@ async function deviceAuthFlow() {
|
|
|
245
249
|
console.log(` ${dim('Endpoint:')} ${dim(resolvedApiBaseUrl.baseUrl)}`);
|
|
246
250
|
console.log(` ${dim('Reason:')} ${dim(resolvedApiBaseUrl.message || 'Apply Supabase migrations first.')}`);
|
|
247
251
|
console.log(` ${dim('Run the SQL in supabase/migrations/001_device_auth.sql and 002_rate_limits.sql, then retry.')}`);
|
|
248
|
-
console.log(` ${dim('Falling back to
|
|
252
|
+
console.log(` ${dim('Falling back to local-only mode.\n')}`);
|
|
249
253
|
return null;
|
|
250
254
|
}
|
|
251
255
|
|
|
@@ -259,7 +263,7 @@ async function deviceAuthFlow() {
|
|
|
259
263
|
} catch (err) {
|
|
260
264
|
console.log(` ${red('✗')}`);
|
|
261
265
|
console.log(` ${red('Could not reach Vesper API at')} ${dim(resolvedApiBaseUrl.baseUrl)}`);
|
|
262
|
-
console.log(` ${dim('Falling back to
|
|
266
|
+
console.log(` ${dim('Falling back to local-only mode.\n')}`);
|
|
263
267
|
return null;
|
|
264
268
|
}
|
|
265
269
|
|
|
@@ -459,7 +463,9 @@ async function main() {
|
|
|
459
463
|
|
|
460
464
|
const authChoice = await chooseAuthMode(localKey, authMode);
|
|
461
465
|
|
|
462
|
-
if (authChoice === '
|
|
466
|
+
if (authChoice === 'keep' && localKey) {
|
|
467
|
+
console.log(` ${green('✓')} Keeping current key`);
|
|
468
|
+
} else if (authChoice === 'manual') {
|
|
463
469
|
localKey = await promptForManualApiKey();
|
|
464
470
|
authMode = 'cloud';
|
|
465
471
|
console.log(` ${green('✓')} Cloud API key saved from manual input`);
|
|
@@ -469,10 +475,28 @@ async function main() {
|
|
|
469
475
|
localKey = cloudKey;
|
|
470
476
|
authMode = 'cloud';
|
|
471
477
|
} else {
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
478
|
+
const fallbackChoice = await askChoice(`${yellow('!')} Browser sign-in did not complete. Choose a fallback:`, [
|
|
479
|
+
{ value: 'manual', label: 'Provide Vesper API key manually' },
|
|
480
|
+
{ value: 'local', label: 'Use local-only key' },
|
|
481
|
+
], 'manual');
|
|
482
|
+
|
|
483
|
+
if (fallbackChoice === 'manual') {
|
|
484
|
+
localKey = await promptForManualApiKey();
|
|
485
|
+
authMode = 'cloud';
|
|
486
|
+
} else {
|
|
487
|
+
if (!localKey || isCloudApiKey(localKey)) {
|
|
488
|
+
localKey = generateLocalKey();
|
|
489
|
+
}
|
|
490
|
+
authMode = 'local_unified';
|
|
491
|
+
console.log(`\n ${yellow('⚠')} Using local-only key. Run the wizard again anytime to link an account.`);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
} else {
|
|
495
|
+
if (!localKey || isCloudApiKey(localKey)) {
|
|
496
|
+
localKey = generateLocalKey();
|
|
475
497
|
}
|
|
498
|
+
authMode = 'local_unified';
|
|
499
|
+
console.log(` ${green('✓')} Local-only key ready`);
|
|
476
500
|
}
|
|
477
501
|
|
|
478
502
|
const configData = { ...existing, api_key: localKey, auth_mode: authMode };
|
package/LICENSE
DELETED
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2026 Vesper Team
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
package/build/cache/cdn.js
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import fs from "fs";
|
|
2
|
-
import path from "path";
|
|
3
|
-
export class CDNService {
|
|
4
|
-
baseDir;
|
|
5
|
-
baseUrl;
|
|
6
|
-
constructor(baseDir = "data/cdn_mock", baseUrl = "https://cdn.vesper.ai") {
|
|
7
|
-
this.baseDir = path.resolve(baseDir);
|
|
8
|
-
this.baseUrl = baseUrl;
|
|
9
|
-
if (!fs.existsSync(this.baseDir)) {
|
|
10
|
-
fs.mkdirSync(this.baseDir, { recursive: true });
|
|
11
|
-
}
|
|
12
|
-
}
|
|
13
|
-
/**
|
|
14
|
-
* Uploads a file to the CDN.
|
|
15
|
-
* @param fileName Name of the file (including extension)
|
|
16
|
-
* @param content String or Buffer content
|
|
17
|
-
* @returns The public URL of the file
|
|
18
|
-
*/
|
|
19
|
-
async upload(fileName, content) {
|
|
20
|
-
const filePath = path.join(this.baseDir, fileName);
|
|
21
|
-
fs.writeFileSync(filePath, content);
|
|
22
|
-
// Return a simulated cloud URL
|
|
23
|
-
return `${this.baseUrl}/${fileName}`;
|
|
24
|
-
}
|
|
25
|
-
/**
|
|
26
|
-
* Deletes a file from the CDN.
|
|
27
|
-
*/
|
|
28
|
-
async delete(fileName) {
|
|
29
|
-
const filePath = path.join(this.baseDir, fileName);
|
|
30
|
-
if (fs.existsSync(filePath)) {
|
|
31
|
-
fs.unlinkSync(filePath);
|
|
32
|
-
}
|
|
33
|
-
}
|
|
34
|
-
}
|