@vespermcp/mcp-server 1.2.21 → 1.2.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +49 -0
  2. package/build/cloud/adapters/supabase.js +49 -0
  3. package/build/cloud/storage-manager.js +6 -0
  4. package/build/export/exporter.js +22 -9
  5. package/build/gateway/unified-dataset-gateway.js +410 -0
  6. package/build/index.js +1587 -845
  7. package/build/ingestion/ingestor.js +7 -4
  8. package/build/install/install-service.js +11 -6
  9. package/build/lib/supabase.js +3 -0
  10. package/build/metadata/scraper.js +85 -14
  11. package/build/python/asset_downloader_engine.py +2 -0
  12. package/build/python/convert_engine.py +92 -0
  13. package/build/python/export_engine.py +45 -0
  14. package/build/python/kaggle_engine.py +77 -5
  15. package/build/python/normalize_engine.py +83 -0
  16. package/build/python/vesper/core/asset_downloader.py +5 -1
  17. package/build/search/engine.js +43 -5
  18. package/build/search/jit-orchestrator.js +18 -14
  19. package/build/search/query-intent.js +509 -0
  20. package/build/tools/formatter.js +6 -3
  21. package/build/utils/python-runtime.js +130 -0
  22. package/package.json +7 -5
  23. package/scripts/postinstall.cjs +87 -31
  24. package/scripts/wizard.cjs +601 -0
  25. package/scripts/wizard.js +306 -12
  26. package/src/python/__pycache__/config.cpython-312.pyc +0 -0
  27. package/src/python/__pycache__/kaggle_engine.cpython-312.pyc +0 -0
  28. package/src/python/asset_downloader_engine.py +2 -0
  29. package/src/python/convert_engine.py +92 -0
  30. package/src/python/export_engine.py +45 -0
  31. package/src/python/kaggle_engine.py +77 -5
  32. package/src/python/normalize_engine.py +83 -0
  33. package/src/python/requirements.txt +12 -0
  34. package/src/python/vesper/core/asset_downloader.py +5 -1
  35. package/wizard.cjs +3 -0
@@ -0,0 +1,83 @@
1
+ """
2
+ Normalize any supported dataset file to parquet format.
3
+ Usage: normalize_engine.py <input_path> <output_path>
4
+ Outputs JSON: {"ok": true, "output_path": "...", "rows": N} or {"ok": false, "error": "..."}
5
+ """
6
+ import sys
7
+ import json
8
+ import os
9
+
10
+ try:
11
+ import polars as pl
12
+ except Exception:
13
+ print(json.dumps({"ok": False, "error": "polars is required"}))
14
+ sys.exit(1)
15
+
16
+
17
+ def _load(src: str) -> pl.DataFrame:
18
+ ext = os.path.splitext(src)[1].lower()
19
+
20
+ if ext == ".csv":
21
+ return pl.read_csv(src, ignore_errors=True, infer_schema_length=10000)
22
+ if ext in (".tsv", ".tab"):
23
+ return pl.read_csv(src, separator="\t", ignore_errors=True, infer_schema_length=10000)
24
+ if ext in (".parquet", ".pq"):
25
+ return pl.read_parquet(src)
26
+ if ext in (".feather", ".ftr", ".arrow", ".ipc"):
27
+ return pl.read_ipc(src)
28
+ if ext in (".jsonl", ".ndjson"):
29
+ return pl.read_ndjson(src)
30
+ if ext == ".json":
31
+ raw = open(src, "r", encoding="utf-8").read().strip()
32
+ if raw.startswith("["):
33
+ return pl.read_json(src)
34
+ # Try NDJSON
35
+ if "\n" in raw and raw.split("\n")[0].strip().startswith("{"):
36
+ return pl.read_ndjson(src)
37
+ # Try wrapper object
38
+ obj = json.loads(raw)
39
+ if isinstance(obj, dict):
40
+ for key in ("data", "rows", "items", "records", "results", "entries", "samples"):
41
+ if key in obj and isinstance(obj[key], list):
42
+ return pl.DataFrame(obj[key])
43
+ # Last resort - take first list value
44
+ for v in obj.values():
45
+ if isinstance(v, list) and len(v) > 0 and isinstance(v[0], dict):
46
+ return pl.DataFrame(v)
47
+ return pl.read_json(src)
48
+ if ext == ".txt":
49
+ return pl.read_csv(src, ignore_errors=True, infer_schema_length=10000)
50
+
51
+ # Fallback: try csv
52
+ return pl.read_csv(src, ignore_errors=True, infer_schema_length=10000)
53
+
54
+
55
+ def normalize(input_path: str, output_path: str):
56
+ df = _load(input_path)
57
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
58
+ df.write_parquet(output_path)
59
+ return df.height
60
+
61
+
62
+ def main():
63
+ if len(sys.argv) < 3:
64
+ print(json.dumps({"ok": False, "error": "Usage: normalize_engine.py <input> <output>"}))
65
+ sys.exit(1)
66
+
67
+ input_path = sys.argv[1]
68
+ output_path = sys.argv[2]
69
+
70
+ if not os.path.exists(input_path):
71
+ print(json.dumps({"ok": False, "error": f"File not found: {input_path}"}))
72
+ sys.exit(1)
73
+
74
+ try:
75
+ rows = normalize(input_path, output_path)
76
+ print(json.dumps({"ok": True, "output_path": output_path, "rows": rows}))
77
+ except Exception as e:
78
+ print(json.dumps({"ok": False, "error": str(e)}))
79
+ sys.exit(1)
80
+
81
+
82
+ if __name__ == "__main__":
83
+ main()
@@ -0,0 +1,12 @@
1
+ polars==1.2.0
2
+ pandas==2.2.0
3
+ numpy==1.26.0
4
+ scikit-learn==1.4.0
5
+ # Optional source/download extras:
6
+ kaggle>=1.6.17
7
+ aiohttp>=3.9.0
8
+ aiofiles>=24.1.0
9
+ datasets>=2.20.0
10
+ webdataset>=0.2.86
11
+ # Optional for secure key storage (preferred over file fallback):
12
+ # keyring>=24.0.0
@@ -191,6 +191,7 @@ class AssetDownloader:
191
191
  kaggle_ref: Optional[str] = None,
192
192
  urls: Optional[List[str]] = None,
193
193
  output_format: str = "webdataset",
194
+ output_dir: Optional[str] = None,
194
195
  max_items: Optional[int] = None,
195
196
  image_column: Optional[str] = None,
196
197
  ) -> Dict[str, Any]:
@@ -231,7 +232,10 @@ class AssetDownloader:
231
232
  raise ValueError("urls are required for source=url")
232
233
 
233
234
  # --- Now safe to create directories ---
234
- dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
235
+ if output_dir:
236
+ dataset_dir = Path(output_dir).expanduser().resolve()
237
+ else:
238
+ dataset_dir = self.output_root / dataset_id.replace("/", "_").replace(":", "_")
235
239
  images_dir = dataset_dir / "images"
236
240
  dataset_dir.mkdir(parents=True, exist_ok=True)
237
241
  images_dir.mkdir(parents=True, exist_ok=True)
package/wizard.cjs ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+
3
+ require('./scripts/wizard.cjs');