vesper-wizard 2.3.1 → 2.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (214) hide show
  1. package/README.md +37 -322
  2. package/package.json +34 -100
  3. package/vesper-mcp-config.json +6 -0
  4. package/{scripts/wizard.js → wizard.js} +34 -10
  5. package/LICENSE +0 -21
  6. package/build/cache/cdn.js +0 -34
  7. package/build/cache/service.js +0 -63
  8. package/build/cleaning/cleaner.js +0 -81
  9. package/build/cleaning/evaluator.js +0 -89
  10. package/build/cleaning/executor.js +0 -62
  11. package/build/cleaning/exporter.js +0 -87
  12. package/build/cleaning/planner.js +0 -127
  13. package/build/cleaning/rules.js +0 -57
  14. package/build/cleaning/types.js +0 -1
  15. package/build/cloud/adapters/local.js +0 -37
  16. package/build/cloud/adapters/s3.js +0 -24
  17. package/build/cloud/adapters/supabase.js +0 -49
  18. package/build/cloud/storage-manager.js +0 -26
  19. package/build/cloud/types.js +0 -1
  20. package/build/compliance/service.js +0 -73
  21. package/build/compliance/store.js +0 -80
  22. package/build/compliance/types.js +0 -1
  23. package/build/config/config-manager.js +0 -221
  24. package/build/config/secure-keys.js +0 -51
  25. package/build/config/user-config.js +0 -48
  26. package/build/data/processing-worker.js +0 -23
  27. package/build/data/streaming.js +0 -38
  28. package/build/data/worker-pool.js +0 -39
  29. package/build/export/exporter.js +0 -82
  30. package/build/export/packager.js +0 -100
  31. package/build/export/types.js +0 -1
  32. package/build/fusion/aligner.js +0 -56
  33. package/build/fusion/deduplicator.js +0 -69
  34. package/build/fusion/engine.js +0 -69
  35. package/build/fusion/harmonizer.js +0 -39
  36. package/build/fusion/orchestrator.js +0 -86
  37. package/build/fusion/types.js +0 -1
  38. package/build/gateway/unified-dataset-gateway.js +0 -410
  39. package/build/index.js +0 -3068
  40. package/build/ingestion/hf-downloader.js +0 -171
  41. package/build/ingestion/ingestor.js +0 -271
  42. package/build/ingestion/kaggle-downloader.js +0 -102
  43. package/build/install/install-service.js +0 -46
  44. package/build/jobs/manager.js +0 -136
  45. package/build/jobs/queue.js +0 -59
  46. package/build/jobs/types.js +0 -1
  47. package/build/lib/supabase.js +0 -3
  48. package/build/metadata/dataworld-source.js +0 -89
  49. package/build/metadata/domain.js +0 -147
  50. package/build/metadata/github-scraper.js +0 -47
  51. package/build/metadata/institutional-scrapers.js +0 -49
  52. package/build/metadata/kaggle-scraper.js +0 -182
  53. package/build/metadata/kaggle-source.js +0 -70
  54. package/build/metadata/license.js +0 -68
  55. package/build/metadata/monitoring-service.js +0 -107
  56. package/build/metadata/monitoring-store.js +0 -78
  57. package/build/metadata/monitoring-types.js +0 -1
  58. package/build/metadata/openml-source.js +0 -87
  59. package/build/metadata/quality.js +0 -48
  60. package/build/metadata/rate-limiter.js +0 -128
  61. package/build/metadata/scraper.js +0 -448
  62. package/build/metadata/store.js +0 -340
  63. package/build/metadata/types.js +0 -1
  64. package/build/metadata/uci-scraper.js +0 -49
  65. package/build/monitoring/observability.js +0 -76
  66. package/build/preparation/target-detector.js +0 -75
  67. package/build/python/__pycache__/config.cpython-312.pyc +0 -0
  68. package/build/python/asset_downloader_engine.py +0 -94
  69. package/build/python/cleaner.py +0 -226
  70. package/build/python/config.py +0 -263
  71. package/build/python/convert_engine.py +0 -92
  72. package/build/python/dataworld_engine.py +0 -208
  73. package/build/python/export_engine.py +0 -288
  74. package/build/python/framework_adapters.py +0 -100
  75. package/build/python/fusion_engine.py +0 -368
  76. package/build/python/github_adapter.py +0 -106
  77. package/build/python/hf_fallback.py +0 -298
  78. package/build/python/image_engine.py +0 -86
  79. package/build/python/kaggle_engine.py +0 -295
  80. package/build/python/media_engine.py +0 -133
  81. package/build/python/nasa_adapter.py +0 -82
  82. package/build/python/normalize_engine.py +0 -83
  83. package/build/python/openml_engine.py +0 -146
  84. package/build/python/quality_engine.py +0 -267
  85. package/build/python/row_count.py +0 -54
  86. package/build/python/splitter_engine.py +0 -283
  87. package/build/python/target_engine.py +0 -154
  88. package/build/python/test_framework_adapters.py +0 -61
  89. package/build/python/test_fusion_engine.py +0 -89
  90. package/build/python/uci_adapter.py +0 -94
  91. package/build/python/vesper/__init__.py +0 -1
  92. package/build/python/vesper/__pycache__/__init__.cpython-312.pyc +0 -0
  93. package/build/python/vesper/core/__init__.py +0 -1
  94. package/build/python/vesper/core/__pycache__/__init__.cpython-312.pyc +0 -0
  95. package/build/python/vesper/core/__pycache__/asset_downloader.cpython-312.pyc +0 -0
  96. package/build/python/vesper/core/__pycache__/download_recipe.cpython-312.pyc +0 -0
  97. package/build/python/vesper/core/asset_downloader.py +0 -679
  98. package/build/python/vesper/core/download_recipe.py +0 -104
  99. package/build/python/worldbank_adapter.py +0 -99
  100. package/build/quality/analyzer.js +0 -93
  101. package/build/quality/image-analyzer.js +0 -114
  102. package/build/quality/media-analyzer.js +0 -115
  103. package/build/quality/quality-orchestrator.js +0 -162
  104. package/build/quality/types.js +0 -1
  105. package/build/scripts/build-index.js +0 -54
  106. package/build/scripts/check-db.js +0 -73
  107. package/build/scripts/check-jobs.js +0 -24
  108. package/build/scripts/check-naruto.js +0 -17
  109. package/build/scripts/cleanup-kaggle.js +0 -41
  110. package/build/scripts/demo-full-pipeline.js +0 -62
  111. package/build/scripts/demo-ui.js +0 -58
  112. package/build/scripts/e2e-demo.js +0 -72
  113. package/build/scripts/massive-scrape.js +0 -103
  114. package/build/scripts/ops-dashboard.js +0 -33
  115. package/build/scripts/repro-bug.js +0 -37
  116. package/build/scripts/repro-export-bug.js +0 -56
  117. package/build/scripts/scrape-metadata.js +0 -100
  118. package/build/scripts/search-cli.js +0 -26
  119. package/build/scripts/test-bias.js +0 -45
  120. package/build/scripts/test-caching.js +0 -51
  121. package/build/scripts/test-cleaning.js +0 -76
  122. package/build/scripts/test-cloud-storage.js +0 -48
  123. package/build/scripts/test-compliance.js +0 -58
  124. package/build/scripts/test-conversion.js +0 -64
  125. package/build/scripts/test-custom-rules.js +0 -58
  126. package/build/scripts/test-db-opt.js +0 -63
  127. package/build/scripts/test-export-custom.js +0 -33
  128. package/build/scripts/test-exporter.js +0 -53
  129. package/build/scripts/test-fusion.js +0 -61
  130. package/build/scripts/test-github.js +0 -27
  131. package/build/scripts/test-group-split.js +0 -52
  132. package/build/scripts/test-hf-download.js +0 -29
  133. package/build/scripts/test-holdout-manager.js +0 -61
  134. package/build/scripts/test-hybrid-search.js +0 -41
  135. package/build/scripts/test-image-analysis.js +0 -50
  136. package/build/scripts/test-ingestion-infra.js +0 -39
  137. package/build/scripts/test-install.js +0 -40
  138. package/build/scripts/test-institutional.js +0 -26
  139. package/build/scripts/test-integrity.js +0 -41
  140. package/build/scripts/test-jit.js +0 -42
  141. package/build/scripts/test-job-queue.js +0 -62
  142. package/build/scripts/test-kaggle-download.js +0 -34
  143. package/build/scripts/test-large-data.js +0 -50
  144. package/build/scripts/test-mcp-v5.js +0 -74
  145. package/build/scripts/test-media-analysis.js +0 -61
  146. package/build/scripts/test-monitoring.js +0 -91
  147. package/build/scripts/test-observability.js +0 -106
  148. package/build/scripts/test-packager.js +0 -55
  149. package/build/scripts/test-pipeline.js +0 -50
  150. package/build/scripts/test-planning.js +0 -64
  151. package/build/scripts/test-privacy.js +0 -38
  152. package/build/scripts/test-production-sync.js +0 -36
  153. package/build/scripts/test-quality.js +0 -43
  154. package/build/scripts/test-robust-ingestion.js +0 -41
  155. package/build/scripts/test-schema.js +0 -45
  156. package/build/scripts/test-split-validation.js +0 -40
  157. package/build/scripts/test-splitter.js +0 -93
  158. package/build/scripts/test-target-detector.js +0 -29
  159. package/build/scripts/test-uci.js +0 -27
  160. package/build/scripts/test-unified-quality.js +0 -86
  161. package/build/scripts/test-write.js +0 -14
  162. package/build/scripts/verify-integration.js +0 -57
  163. package/build/scripts/verify-priority.js +0 -33
  164. package/build/search/embedder.js +0 -34
  165. package/build/search/engine.js +0 -190
  166. package/build/search/jit-orchestrator.js +0 -262
  167. package/build/search/query-intent.js +0 -509
  168. package/build/search/vector-store.js +0 -123
  169. package/build/splitting/splitter.js +0 -82
  170. package/build/splitting/types.js +0 -1
  171. package/build/tools/formatter.js +0 -251
  172. package/build/utils/downloader.js +0 -52
  173. package/build/utils/python-runtime.js +0 -130
  174. package/build/utils/selector.js +0 -69
  175. package/mcp-config-template.json +0 -18
  176. package/scripts/postinstall.cjs +0 -170
  177. package/scripts/preindex_registry.cjs +0 -157
  178. package/scripts/refresh-index.cjs +0 -87
  179. package/scripts/wizard.cjs +0 -601
  180. package/src/python/__pycache__/config.cpython-312.pyc +0 -0
  181. package/src/python/__pycache__/export_engine.cpython-312.pyc +0 -0
  182. package/src/python/__pycache__/framework_adapters.cpython-312.pyc +0 -0
  183. package/src/python/__pycache__/fusion_engine.cpython-312.pyc +0 -0
  184. package/src/python/__pycache__/kaggle_engine.cpython-312.pyc +0 -0
  185. package/src/python/asset_downloader_engine.py +0 -94
  186. package/src/python/cleaner.py +0 -226
  187. package/src/python/config.py +0 -263
  188. package/src/python/convert_engine.py +0 -92
  189. package/src/python/dataworld_engine.py +0 -208
  190. package/src/python/export_engine.py +0 -288
  191. package/src/python/framework_adapters.py +0 -100
  192. package/src/python/fusion_engine.py +0 -368
  193. package/src/python/github_adapter.py +0 -106
  194. package/src/python/hf_fallback.py +0 -298
  195. package/src/python/image_engine.py +0 -86
  196. package/src/python/kaggle_engine.py +0 -295
  197. package/src/python/media_engine.py +0 -133
  198. package/src/python/nasa_adapter.py +0 -82
  199. package/src/python/normalize_engine.py +0 -83
  200. package/src/python/openml_engine.py +0 -146
  201. package/src/python/quality_engine.py +0 -267
  202. package/src/python/requirements.txt +0 -12
  203. package/src/python/row_count.py +0 -54
  204. package/src/python/splitter_engine.py +0 -283
  205. package/src/python/target_engine.py +0 -154
  206. package/src/python/test_framework_adapters.py +0 -61
  207. package/src/python/test_fusion_engine.py +0 -89
  208. package/src/python/uci_adapter.py +0 -94
  209. package/src/python/vesper/__init__.py +0 -1
  210. package/src/python/vesper/core/__init__.py +0 -1
  211. package/src/python/vesper/core/asset_downloader.py +0 -679
  212. package/src/python/vesper/core/download_recipe.py +0 -104
  213. package/src/python/worldbank_adapter.py +0 -99
  214. package/wizard.cjs +0 -3
@@ -1,154 +0,0 @@
1
- import sys
2
- import json
3
- import pandas as pd
4
- import numpy as np
5
-
6
- # Common names for target variables in datasets
7
- TARGET_CANDIDATES = [
8
- 'target', 'label', 'class', 'outcome', 'y',
9
- 'price', 'saleprice', 'sales', 'cost', 'value', 'total',
10
- 'diagnosis', 'species', 'churn', 'survived', 'credit_risk'
11
- ]
12
-
13
- def load_data(file_path):
14
- if file_path.endswith('.csv'):
15
- return pd.read_csv(file_path)
16
- elif file_path.endswith('.parquet'):
17
- return pd.read_parquet(file_path)
18
- else:
19
- raise ValueError("Unsupported file format")
20
-
21
- def detect_target(file_path):
22
- try:
23
- df = load_data(file_path)
24
- columns = [c.lower() for c in df.columns]
25
- candidates = []
26
-
27
- # 1. Exact Name Match
28
- for col_original in df.columns:
29
- col_lower = col_original.lower()
30
- confidence = 0.0
31
- reasons = []
32
-
33
- if col_lower in TARGET_CANDIDATES:
34
- confidence += 0.6
35
- reasons.append(f"Matches common target name '{col_lower}'")
36
-
37
- # Boost if exact match 'target' or 'label'
38
- if col_lower in ['target', 'label', 'class']:
39
- confidence += 0.2
40
-
41
- # 2. Position Heuristic (Last column is often target)
42
- if col_original == df.columns[-1]:
43
- confidence += 0.3
44
- reasons.append("Is the last column")
45
-
46
- # 3. Completeness
47
- missing_rate = df[col_original].isnull().mean()
48
- if missing_rate > 0.5:
49
- confidence -= 0.5
50
- reasons.append(f"High missing rate ({missing_rate:.1%})")
51
- elif missing_rate > 0:
52
- confidence -= 0.1
53
- reasons.append(f"Has missing values ({missing_rate:.1%})")
54
-
55
- # 4. Cardinality / Unique Values
56
- # If regression-like (many unique numeric values) or class-like (few unique values)
57
- # This is hard to score generally, but extremes are bad for targets (e.g. all unique = ID usually)
58
- n_unique = df[col_original].nunique()
59
- if n_unique == len(df):
60
- confidence -= 0.8
61
- reasons.append("All values are unique (likely ID)")
62
-
63
- if confidence > 0.3:
64
- candidates.append({
65
- "column": col_original,
66
- "confidence": min(confidence, 1.0),
67
- "reason": reasons
68
- })
69
-
70
- # Sort by confidence
71
- candidates.sort(key=lambda x: x['confidence'], reverse=True)
72
-
73
- best_target = None
74
- best_conf = 0.0
75
-
76
- if candidates:
77
- best_target = candidates[0]['column']
78
- best_conf = candidates[0]['confidence']
79
-
80
- return {
81
- "target_column": best_target,
82
- "confidence": best_conf,
83
- "candidates": candidates,
84
- "is_unified": False # Wrapper will handle unification logic
85
- }
86
-
87
- except Exception as e:
88
- return {"error": str(e)}
89
-
90
- def validate_target(file_path, target_column):
91
- try:
92
- df = load_data(file_path)
93
- if target_column not in df.columns:
94
- return {"error": f"Column '{target_column}' not found in dataset."}
95
-
96
- series = df[target_column]
97
- total_rows = len(df)
98
- missing_count = series.isnull().sum()
99
-
100
- # Determine type
101
- is_numeric = pd.api.types.is_numeric_dtype(series)
102
- n_unique = series.nunique()
103
-
104
- problem_type = "unknown"
105
- if is_numeric and n_unique > 20:
106
- problem_type = "regression"
107
- elif n_unique < 50: # String or few numeric values
108
- problem_type = "classification"
109
- else:
110
- # Heuristic fallback
111
- problem_type = "regression" if is_numeric else "classification"
112
-
113
- warnings = []
114
- if missing_count > 0:
115
- warnings.append(f"Target has {missing_count} missing values.")
116
-
117
- # Imbalance check for classification
118
- if problem_type == "classification":
119
- counts = series.value_counts(normalize=True)
120
- if counts.iloc[0] > 0.9: # Dominant class > 90%
121
- warnings.append(f"Highly imbalanced target: Class '{counts.index[0]}' is {counts.iloc[0]:.1%}")
122
-
123
- return {
124
- "valid": True,
125
- "problem_type": problem_type,
126
- "missing_count": int(missing_count),
127
- "total_rows": total_rows,
128
- "warnings": warnings
129
- }
130
-
131
- except Exception as e:
132
- return {"error": str(e)}
133
-
134
- if __name__ == "__main__":
135
- if len(sys.argv) < 3:
136
- print(json.dumps({"error": "Usage: target_engine.py <action> <file_path> [args]"}));
137
- sys.exit(1)
138
-
139
- action = sys.argv[1]
140
- file_path = sys.argv[2]
141
-
142
- result = {}
143
- if action == "detect":
144
- result = detect_target(file_path)
145
- elif action == "validate":
146
- target_col = sys.argv[3] if len(sys.argv) > 3 else None
147
- if target_col:
148
- result = validate_target(file_path, target_col)
149
- else:
150
- result = {"error": "Target column required for validation"}
151
- else:
152
- result = {"error": f"Unknown action: {action}"}
153
-
154
- print(json.dumps(result))
@@ -1,61 +0,0 @@
1
-
2
- import sys
3
- import os
4
- import polars as pl
5
- import numpy as np
6
-
7
- # Mock data creation
8
- def create_mock_data():
9
- df = pl.DataFrame({
10
- "feature1": np.random.rand(100),
11
- "feature2": np.random.rand(100),
12
- "label": np.random.randint(0, 2, 100)
13
- })
14
- os.makedirs("test_adapters", exist_ok=True)
15
- df.write_parquet("test_adapters/data.parquet")
16
- df.write_csv("test_adapters/data.csv")
17
- print("Created mock data in test_adapters/")
18
-
19
- def test_pytorch():
20
- print("\n--- Testing PyTorch Adapter ---")
21
- try:
22
- from framework_adapters import VesperPyTorchDataset
23
- import torch
24
- from torch.utils.data import DataLoader
25
-
26
- dataset = VesperPyTorchDataset("test_adapters/data.parquet", target_col="label")
27
- loader = DataLoader(dataset, batch_size=10, shuffle=True)
28
-
29
- batch = next(iter(loader))
30
- print(f"Loaded batch: {batch}")
31
- print("PASS: PyTorch DataLoader works")
32
-
33
- except ImportError:
34
- print("SKIP: PyTorch not installed")
35
- except Exception as e:
36
- print(f"FAIL: PyTorch test failed: {e}")
37
-
38
- def test_huggingface():
39
- print("\n--- Testing HuggingFace Adapter ---")
40
- try:
41
- from framework_adapters import load_vesper_dataset
42
- ds = load_vesper_dataset("test_adapters/data.csv")
43
- print(f"Loaded dataset: {ds}")
44
- print("PASS: HuggingFace Dataset works")
45
-
46
- except ImportError:
47
- print("SKIP: HuggingFace datasets not installed")
48
- except Exception as e:
49
- print(f"FAIL: HuggingFace test failed: {e}")
50
-
51
- if __name__ == "__main__":
52
- create_mock_data()
53
- # Add src/python to path to import adapters
54
- sys.path.append(os.path.join(os.getcwd(), "src", "python"))
55
-
56
- test_pytorch()
57
- test_huggingface()
58
-
59
- # Cleanup
60
- import shutil
61
- shutil.rmtree("test_adapters")
@@ -1,89 +0,0 @@
1
- import os
2
- import tempfile
3
- import polars as pl
4
- from fusion_engine import fuse_datasets
5
-
6
-
7
- def run_basic_tests():
8
- tmp = tempfile.gettempdir()
9
-
10
- # ----- Test 1: concat -----
11
- p1 = os.path.join(tmp, "fuse_test_a.csv")
12
- p2 = os.path.join(tmp, "fuse_test_b.csv")
13
- out_concat = os.path.join(tmp, "fuse_test_concat.feather")
14
-
15
- df1 = pl.DataFrame({
16
- "id": [1, 2, 3],
17
- "text": ["a", "b", "c"],
18
- "price": [10.0, 20.0, 30.0],
19
- })
20
- df2 = pl.DataFrame({
21
- "id": [4, 5, 3],
22
- "text": ["d", "e", "c"],
23
- "price": [40.0, 50.0, 30.0],
24
- "image_path": ["img1.jpg", "img2.jpg", "img3.jpg"],
25
- })
26
-
27
- df1.write_csv(p1)
28
- df2.write_csv(p2)
29
-
30
- concat_res = fuse_datasets(
31
- sources=[p1, p2],
32
- strategy="concat",
33
- dedup=True,
34
- run_quality_after=False,
35
- leakage_check=True,
36
- output_path=out_concat,
37
- output_format="feather",
38
- compression="lz4",
39
- preview=True,
40
- id_column="id",
41
- )
42
-
43
- assert concat_res.get("success") is True, f"Concat failed: {concat_res}"
44
- assert os.path.exists(out_concat), "Concat output file missing"
45
-
46
- # ----- Test 2: join with conflicting column names -----
47
- p3 = os.path.join(tmp, "fuse_test_c.csv")
48
- p4 = os.path.join(tmp, "fuse_test_d.csv")
49
- out_join = os.path.join(tmp, "fuse_test_join.parquet")
50
-
51
- left = pl.DataFrame({
52
- "id": [1, 2, 3],
53
- "price": [100, 200, 300],
54
- "text": ["x", "y", "z"],
55
- })
56
- right = pl.DataFrame({
57
- "id": [2, 3, 4],
58
- "price": [999, 888, 777],
59
- "caption": ["two", "three", "four"],
60
- })
61
-
62
- left.write_csv(p3)
63
- right.write_csv(p4)
64
-
65
- join_res = fuse_datasets(
66
- sources=[p3, p4],
67
- strategy="join",
68
- join_on="id",
69
- how="inner",
70
- dedup=True,
71
- run_quality_after=False,
72
- leakage_check=False,
73
- output_path=out_join,
74
- output_format="parquet",
75
- compression="snappy",
76
- preview=True,
77
- )
78
-
79
- assert join_res.get("success") is True, f"Join failed: {join_res}"
80
- assert os.path.exists(out_join), "Join output file missing"
81
- assert len(join_res.get("stats", {}).get("conflict_renames", [])) >= 1, "Expected conflict rename for price column"
82
-
83
- print("✅ Fusion tests passed")
84
- print("Concat:", concat_res["stats"])
85
- print("Join:", join_res["stats"])
86
-
87
-
88
- if __name__ == "__main__":
89
- run_basic_tests()
@@ -1,94 +0,0 @@
1
- import sys
2
- import json
3
- import argparse
4
- import urllib.request
5
- import urllib.parse
6
- from datetime import datetime
7
-
8
- # API Endpoint found in network inspection of UCI website
9
- UCI_API_URL = "https://archive.ics.uci.edu/api/datasets/list"
10
-
11
- def search_uci(query: str, limit: int = 10):
12
- """
13
- Search UCI datasets using their internal API.
14
- """
15
- try:
16
- # Fetch data dictionary from API
17
- # Only fetching first 100 to filter locally
18
- params = {
19
- "skip": 0,
20
- "take": 100,
21
- "sort": "desc",
22
- "orderBy": "NumHits",
23
- "search": query
24
- }
25
-
26
- query_string = urllib.parse.urlencode(params)
27
- url = f"{UCI_API_URL}?{query_string}"
28
-
29
- req = urllib.request.Request(url)
30
- with urllib.request.urlopen(req) as response:
31
- data = json.load(response)
32
-
33
- datasets = data.get('data', [])
34
- if not datasets:
35
- datasets = []
36
-
37
- results = []
38
- count = 0
39
-
40
- # We trust the API search mostly, but can do extra filtering if needed
41
- # The API "search" param is supported
42
-
43
- for ds in datasets:
44
- # Normalize to Vesper schema
45
- # API fields: id, name, abstract, numHits, area, task, dateDonated
46
-
47
- metadata = {
48
- "id": f"uci:{ds.get('id')}",
49
- "source": "uci",
50
- "name": ds.get('name'),
51
- "description": ds.get('abstract') or "No description available.",
52
- "downloads": ds.get('numHits') or 0,
53
- "likes": 0,
54
- "last_updated": ds.get('dateDonated') or datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
55
- "quality_score": 80,
56
- "license": {
57
- "id": "other",
58
- "category": "open",
59
- "usage_restrictions": [],
60
- "warnings": []
61
- },
62
- "tags": [t for t in [ds.get('area'), ds.get('task')] if t],
63
- "total_examples": ds.get('numInstances'),
64
- "is_safe_source": True,
65
- "is_structured": True,
66
- "metadata_url": f"https://archive.ics.uci.edu/dataset/{ds.get('id')}/{ds.get('name').replace(' ', '+')}"
67
- }
68
-
69
- results.append(metadata)
70
- count += 1
71
- if count >= limit:
72
- break
73
-
74
- return results
75
-
76
- except Exception as e:
77
- # Fallback empty or specific error
78
- return {"error": str(e)}
79
-
80
- def main():
81
- parser = argparse.ArgumentParser(description="UCI Adapter")
82
- parser.add_argument("--action", required=True, choices=["search"])
83
- parser.add_argument("--query", required=True)
84
- parser.add_argument("--limit", type=int, default=10)
85
-
86
- args = parser.parse_args()
87
-
88
- if args.action == "search":
89
- results = search_uci(args.query, args.limit)
90
- # JSON dump print for stdout capture
91
- print(json.dumps(results))
92
-
93
- if __name__ == "__main__":
94
- main()
@@ -1 +0,0 @@
1
- """Vesper Python runtime package."""
@@ -1 +0,0 @@
1
- """Core data engines for Vesper."""