@vespermcp/mcp-server 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +259 -0
  3. package/build/cache/cdn.js +34 -0
  4. package/build/cache/service.js +63 -0
  5. package/build/cleaning/cleaner.js +50 -0
  6. package/build/cleaning/evaluator.js +89 -0
  7. package/build/cleaning/executor.js +60 -0
  8. package/build/cleaning/exporter.js +87 -0
  9. package/build/cleaning/planner.js +111 -0
  10. package/build/cleaning/rules.js +57 -0
  11. package/build/cleaning/types.js +1 -0
  12. package/build/cloud/adapters/local.js +37 -0
  13. package/build/cloud/adapters/s3.js +24 -0
  14. package/build/cloud/storage-manager.js +20 -0
  15. package/build/cloud/types.js +1 -0
  16. package/build/compliance/service.js +73 -0
  17. package/build/compliance/store.js +80 -0
  18. package/build/compliance/types.js +1 -0
  19. package/build/data/processing-worker.js +23 -0
  20. package/build/data/streaming.js +38 -0
  21. package/build/data/worker-pool.js +39 -0
  22. package/build/export/exporter.js +45 -0
  23. package/build/export/packager.js +100 -0
  24. package/build/export/types.js +1 -0
  25. package/build/fusion/aligner.js +56 -0
  26. package/build/fusion/deduplicator.js +69 -0
  27. package/build/fusion/harmonizer.js +39 -0
  28. package/build/fusion/orchestrator.js +86 -0
  29. package/build/fusion/types.js +1 -0
  30. package/build/index.js +632 -0
  31. package/build/ingestion/hf-downloader.js +64 -0
  32. package/build/ingestion/ingestor.js +96 -0
  33. package/build/ingestion/kaggle-downloader.js +79 -0
  34. package/build/install/install-service.js +41 -0
  35. package/build/jobs/manager.js +129 -0
  36. package/build/jobs/queue.js +59 -0
  37. package/build/jobs/types.js +1 -0
  38. package/build/metadata/domain.js +147 -0
  39. package/build/metadata/github-scraper.js +47 -0
  40. package/build/metadata/institutional-scrapers.js +49 -0
  41. package/build/metadata/kaggle-scraper.js +182 -0
  42. package/build/metadata/license.js +68 -0
  43. package/build/metadata/monitoring-service.js +107 -0
  44. package/build/metadata/monitoring-store.js +78 -0
  45. package/build/metadata/monitoring-types.js +1 -0
  46. package/build/metadata/quality.js +48 -0
  47. package/build/metadata/rate-limiter.js +128 -0
  48. package/build/metadata/scraper.js +353 -0
  49. package/build/metadata/store.js +325 -0
  50. package/build/metadata/types.js +1 -0
  51. package/build/metadata/uci-scraper.js +49 -0
  52. package/build/monitoring/observability.js +76 -0
  53. package/build/quality/analyzer.js +57 -0
  54. package/build/quality/image-analyzer.js +46 -0
  55. package/build/quality/media-analyzer.js +46 -0
  56. package/build/quality/quality-orchestrator.js +162 -0
  57. package/build/quality/types.js +1 -0
  58. package/build/scripts/build-index.js +54 -0
  59. package/build/scripts/check-db.js +73 -0
  60. package/build/scripts/check-jobs.js +24 -0
  61. package/build/scripts/check-naruto.js +17 -0
  62. package/build/scripts/demo-full-pipeline.js +62 -0
  63. package/build/scripts/demo-ui.js +58 -0
  64. package/build/scripts/e2e-demo.js +72 -0
  65. package/build/scripts/massive-scrape.js +103 -0
  66. package/build/scripts/ops-dashboard.js +33 -0
  67. package/build/scripts/scrape-metadata.js +100 -0
  68. package/build/scripts/search-cli.js +26 -0
  69. package/build/scripts/test-bias.js +45 -0
  70. package/build/scripts/test-caching.js +51 -0
  71. package/build/scripts/test-cleaning.js +76 -0
  72. package/build/scripts/test-cloud-storage.js +48 -0
  73. package/build/scripts/test-compliance.js +58 -0
  74. package/build/scripts/test-conversion.js +64 -0
  75. package/build/scripts/test-custom-rules.js +58 -0
  76. package/build/scripts/test-db-opt.js +63 -0
  77. package/build/scripts/test-export-custom.js +33 -0
  78. package/build/scripts/test-exporter.js +53 -0
  79. package/build/scripts/test-fusion.js +61 -0
  80. package/build/scripts/test-github.js +27 -0
  81. package/build/scripts/test-group-split.js +52 -0
  82. package/build/scripts/test-hf-download.js +29 -0
  83. package/build/scripts/test-holdout-manager.js +61 -0
  84. package/build/scripts/test-hybrid-search.js +41 -0
  85. package/build/scripts/test-image-analysis.js +50 -0
  86. package/build/scripts/test-ingestion-infra.js +39 -0
  87. package/build/scripts/test-install.js +40 -0
  88. package/build/scripts/test-institutional.js +26 -0
  89. package/build/scripts/test-integrity.js +41 -0
  90. package/build/scripts/test-jit.js +42 -0
  91. package/build/scripts/test-job-queue.js +62 -0
  92. package/build/scripts/test-kaggle-download.js +34 -0
  93. package/build/scripts/test-large-data.js +50 -0
  94. package/build/scripts/test-mcp-v5.js +73 -0
  95. package/build/scripts/test-media-analysis.js +61 -0
  96. package/build/scripts/test-monitoring.js +91 -0
  97. package/build/scripts/test-observability.js +106 -0
  98. package/build/scripts/test-packager.js +55 -0
  99. package/build/scripts/test-pipeline.js +50 -0
  100. package/build/scripts/test-planning.js +64 -0
  101. package/build/scripts/test-privacy.js +38 -0
  102. package/build/scripts/test-quality.js +43 -0
  103. package/build/scripts/test-robust-ingestion.js +41 -0
  104. package/build/scripts/test-schema.js +45 -0
  105. package/build/scripts/test-split-validation.js +40 -0
  106. package/build/scripts/test-splitter.js +93 -0
  107. package/build/scripts/test-uci.js +27 -0
  108. package/build/scripts/test-unified-quality.js +86 -0
  109. package/build/search/embedder.js +34 -0
  110. package/build/search/engine.js +129 -0
  111. package/build/search/jit-orchestrator.js +232 -0
  112. package/build/search/vector-store.js +105 -0
  113. package/build/splitting/splitter.js +57 -0
  114. package/build/splitting/types.js +1 -0
  115. package/build/tools/formatter.js +227 -0
  116. package/build/utils/downloader.js +52 -0
  117. package/mcp-config-template.json +15 -0
  118. package/package.json +84 -0
  119. package/src/python/__pycache__/framework_adapters.cpython-312.pyc +0 -0
  120. package/src/python/cleaner.py +196 -0
  121. package/src/python/export_engine.py +112 -0
  122. package/src/python/framework_adapters.py +100 -0
  123. package/src/python/github_adapter.py +106 -0
  124. package/src/python/image_engine.py +86 -0
  125. package/src/python/media_engine.py +133 -0
  126. package/src/python/nasa_adapter.py +82 -0
  127. package/src/python/quality_engine.py +243 -0
  128. package/src/python/splitter_engine.py +283 -0
  129. package/src/python/test_framework_adapters.py +61 -0
  130. package/src/python/uci_adapter.py +94 -0
  131. package/src/python/worldbank_adapter.py +99 -0
@@ -0,0 +1,100 @@
1
+ import os
2
+ import json
3
+ import warnings
4
+
5
+ # --- PyTorch Adapter ---
6
+ try:
7
+ import torch
8
+ from torch.utils.data import Dataset
9
+ import polars as pl
10
+
11
+ class VesperPyTorchDataset(Dataset):
12
+ """
13
+ PyTorch Dataset wrapper for Vesper exported files (Parquet/CSV/Arrow).
14
+ Efficiently loads data using Polars and converts to Tensors on demand.
15
+ """
16
+ def __init__(self, file_path, target_col=None, transform=None):
17
+ self.file_path = file_path
18
+ self.target_col = target_col
19
+ self.transform = transform
20
+
21
+ # Auto-detect format
22
+ if file_path.endswith(".parquet"):
23
+ self.df = pl.read_parquet(file_path)
24
+ elif file_path.endswith(".csv"):
25
+ self.df = pl.read_csv(file_path, ignore_errors=True)
26
+ elif file_path.endswith(".arrow"):
27
+ self.df = pl.read_ipc(file_path)
28
+ else:
29
+ raise ValueError(f"Unsupported file format for PyTorch loader: {file_path}")
30
+
31
+ self.data = self.df.to_pandas() # Convert to pandas for easier row access in __getitem__ (Polars slice can be slow row-wise)
32
+
33
+ def __len__(self):
34
+ return len(self.data)
35
+
36
+ def __getitem__(self, idx):
37
+ row = self.data.iloc[idx]
38
+
39
+ # Simple assumption: all numeric columns except target are features
40
+ # In production, metadata would tell us which columns are features
41
+ if self.target_col and self.target_col in row:
42
+ y = row[self.target_col]
43
+ x = row.drop(self.target_col).values
44
+
45
+ # Convert to tensors
46
+ x = torch.tensor(x, dtype=torch.float32)
47
+ # Auto-detect target type (scalar vs class index)
48
+ if isinstance(y, (int, float)):
49
+ y = torch.tensor(y, dtype=torch.float32) # Regression/Binary
50
+ else:
51
+ # TODO: Label encoding if string
52
+ pass
53
+
54
+ sample = (x, y)
55
+ else:
56
+ # Unsupervised
57
+ x = torch.tensor(row.values, dtype=torch.float32)
58
+ sample = x
59
+
60
+ if self.transform:
61
+ sample = self.transform(sample)
62
+
63
+ return sample
64
+
65
+ except ImportError:
66
+ class VesperPyTorchDataset:
67
+ def __init__(self, *args, **kwargs):
68
+ raise ImportError("PyTorch or Polars not installed.")
69
+
70
+ # --- HuggingFace Adapter ---
71
+ try:
72
+ from datasets import load_dataset as hf_load_dataset
73
+
74
+ def load_vesper_dataset(file_path):
75
+ """
76
+ Loads a Vesper export into a Hugging Face Dataset.
77
+ Supported: Parquet, CSV, JSONL, Arrow.
78
+ """
79
+ output_format = "parquet" # Default fallback
80
+ if file_path.endswith(".csv"): output_format = "csv"
81
+ elif file_path.endswith(".jsonl"): output_format = "json"
82
+ elif file_path.endswith(".arrow"): output_format = "arrow"
83
+
84
+ # 'arrow' format in HF might need custom script, but usually parquet/csv/json are native
85
+ if output_format == "arrow":
86
+ # Use pandas/polars to read then convert to HF dataset
87
+ import polars as pl
88
+ from datasets import Dataset
89
+ df = pl.read_ipc(file_path).to_pandas()
90
+ return Dataset.from_pandas(df)
91
+
92
+ return hf_load_dataset(output_format, data_files=file_path, split="train")
93
+
94
+ except ImportError:
95
+ def load_vesper_dataset(*args, **kwargs):
96
+ raise ImportError("HuggingFace 'datasets' library not installed.")
97
+
98
+ if __name__ == "__main__":
99
+ print("Vesper Framework Adapters Library")
100
+ print("Usage: import this module in your training script.")
@@ -0,0 +1,106 @@
1
+ import sys
2
+ import json
3
+ import argparse
4
+ import urllib.request
5
+ import urllib.parse
6
+ import os
7
+ from datetime import datetime
8
+
9
+ GITHUB_API_URL = "https://api.github.com/search/repositories"
10
+
11
+ def search_github(query: str, limit: int = 10):
12
+ """
13
+ Search GitHub for dataset repositories.
14
+ """
15
+ try:
16
+ # Construct refined query:
17
+ # User query + (topic:dataset OR topic:data)
18
+ # We also filter for repositories with > 5 stars to ensure some relevance
19
+ refined_query = f"{query} topic:dataset stars:>5"
20
+
21
+ params = {
22
+ "q": refined_query,
23
+ "sort": "stars",
24
+ "order": "desc",
25
+ "per_page": limit
26
+ }
27
+
28
+ query_string = urllib.parse.urlencode(params)
29
+ url = f"{GITHUB_API_URL}?{query_string}"
30
+
31
+ req = urllib.request.Request(url)
32
+
33
+ # Add User-Agent (Required by GitHub)
34
+ req.add_header("User-Agent", "Vesper-Dataset-Search")
35
+
36
+ # Add Authorization if token exists
37
+ token = os.environ.get("GITHUB_TOKEN")
38
+ if token:
39
+ req.add_header("Authorization", f"token {token}")
40
+
41
+ with urllib.request.urlopen(req) as response:
42
+ data = json.load(response)
43
+
44
+ items = data.get('items', [])
45
+ results = []
46
+
47
+ for item in items:
48
+ # Map GitHub fields to Vesper schema
49
+ # repo: owner/name
50
+ repo_id = item.get("full_name")
51
+
52
+ # Simple licensing map
53
+ license_info = item.get("license") or {}
54
+ license_key = license_info.get("key", "unknown")
55
+ license_category = "safe" if license_key in ["mit", "apache-2.0", "cc0-1.0", "bsd-3-clause"] else "unknown"
56
+
57
+ metadata = {
58
+ "id": f"github:{repo_id}",
59
+ "source": "github",
60
+ "name": item.get("name"),
61
+ "description": item.get("description") or "No description provided.",
62
+ "downloads": item.get("forks_count") * 10, # Proxy estimation
63
+ "likes": item.get("stargazers_count"),
64
+ "stars": item.get("stargazers_count"),
65
+ "last_updated": item.get("updated_at"),
66
+ "quality_score": min(100, 50 + (item.get("stargazers_count", 0))), # Rough heuristic
67
+ "license": {
68
+ "id": license_key,
69
+ "name": license_info.get("name", "Unknown"),
70
+ "category": license_category,
71
+ "usage_restrictions": [],
72
+ "warnings": []
73
+ },
74
+ "tags": item.get("topics", []),
75
+ "total_examples": 0, # Unknown without drilling deeper
76
+ "is_safe_source": True, # GitHub is generally safe code, content varies
77
+ "is_structured": False, # Often contains code + data
78
+ "metadata_url": item.get("html_url"),
79
+ "domain": "general"
80
+ }
81
+
82
+ results.append(metadata)
83
+
84
+ return results
85
+
86
+ except urllib.error.HTTPError as e:
87
+ if e.code == 403:
88
+ return {"error": "Rate limit exceeded. Set GITHUB_TOKEN environment variable."}
89
+ return {"error": f"HTTP Error {e.code}: {e.reason}"}
90
+ except Exception as e:
91
+ return {"error": str(e)}
92
+
93
+ def main():
94
+ parser = argparse.ArgumentParser(description="GitHub Adapter")
95
+ parser.add_argument("--action", required=True, choices=["search"])
96
+ parser.add_argument("--query", required=True)
97
+ parser.add_argument("--limit", type=int, default=10)
98
+
99
+ args = parser.parse_args()
100
+
101
+ if args.action == "search":
102
+ results = search_github(args.query, args.limit)
103
+ print(json.dumps(results))
104
+
105
+ if __name__ == "__main__":
106
+ main()
@@ -0,0 +1,86 @@
1
+ import sys
2
+ import json
3
+ import os
4
+ from PIL import Image
5
+ import cv2
6
+ import numpy as np
7
+
8
+ def analyze_image(image_path):
9
+ stats = {
10
+ "path": image_path,
11
+ "filename": os.path.basename(image_path),
12
+ "status": "ok",
13
+ "error": None
14
+ }
15
+
16
+ try:
17
+ # 1. Basic Metadata with Pillow
18
+ img = Image.open(image_path)
19
+ stats["width"], stats["height"] = img.size
20
+ stats["format"] = img.format
21
+ stats["mode"] = img.mode
22
+
23
+ # 2. Advanced Analysis with OpenCV
24
+ cv_img = cv2.imread(image_path)
25
+ if cv_img is None:
26
+ stats["status"] = "corrupted"
27
+ stats["error"] = "OpenCV failed to decode image"
28
+ return stats
29
+
30
+ # Blur detection (Laplacian variance)
31
+ gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
32
+ laplacian_var = cv2.Laplacian(gray, cv2.CV_64F).var()
33
+ stats["blur_score"] = laplacian_var
34
+ stats["is_blurry"] = laplacian_var < 100 # Rule of thumb threshold
35
+
36
+ # Brightness
37
+ stats["brightness"] = np.mean(gray)
38
+
39
+ # Aspect Ratio
40
+ stats["aspect_ratio"] = stats["width"] / stats["height"]
41
+
42
+ except Exception as e:
43
+ stats["status"] = "failed"
44
+ stats["error"] = str(e)
45
+
46
+ return stats
47
+
48
+ def main():
49
+ if len(sys.argv) < 2:
50
+ print(json.dumps({"error": "No path provided"}))
51
+ sys.exit(1)
52
+
53
+ input_path = sys.argv[1]
54
+ results = []
55
+
56
+ if os.path.isfile(input_path):
57
+ results.append(analyze_image(input_path))
58
+ elif os.path.isdir(input_path):
59
+ # Analyze first 50 images for performance in this demo
60
+ valid_exts = (".jpg", ".jpeg", ".png", ".bmp", ".webp")
61
+ files = [os.path.join(input_path, f) for f in os.listdir(input_path) if f.lower().endswith(valid_exts)]
62
+ for f in files[:50]:
63
+ results.append(analyze_image(f))
64
+ else:
65
+ print(json.dumps({"error": "Invalid path"}))
66
+ sys.exit(1)
67
+
68
+ # Aggregate stats
69
+ if not results:
70
+ print(json.dumps({"error": "No images found"}))
71
+ sys.exit(1)
72
+
73
+ report = {
74
+ "total_images": len(results),
75
+ "corrupted_count": len([r for r in results if r["status"] == "corrupted"]),
76
+ "failed_count": len([r for r in results if r["status"] == "failed"]),
77
+ "average_width": np.mean([r["width"] for r in results if "width" in r]),
78
+ "average_height": np.mean([r["height"] for r in results if "height" in r]),
79
+ "blurry_count": len([r for r in results if r.get("is_blurry")]),
80
+ "individual_results": results
81
+ }
82
+
83
+ print(json.dumps(report))
84
+
85
+ if __name__ == "__main__":
86
+ main()
@@ -0,0 +1,133 @@
1
+ import sys
2
+ import json
3
+ import os
4
+ import cv2
5
+ import numpy as np
6
+
7
+ # Audio analysis depends on librosa/soundfile. Fallback if not available.
8
+ try:
9
+ import librosa
10
+ AUDIO_SUPPORT = True
11
+ except ImportError:
12
+ AUDIO_SUPPORT = False
13
+
14
+ def analyze_audio(path):
15
+ if not AUDIO_SUPPORT:
16
+ return {"status": "error", "error": "librosa not installed"}
17
+
18
+ try:
19
+ # Load audio (mono, default sr)
20
+ y, sr = librosa.load(path, sr=None)
21
+ duration = librosa.get_duration(y=y, sr=sr)
22
+
23
+ # Audio metrics
24
+ rms = librosa.feature.rms(y=y)
25
+ avg_rms = float(np.mean(rms))
26
+
27
+ return {
28
+ "status": "ok",
29
+ "type": "audio",
30
+ "filename": os.path.basename(path),
31
+ "sample_rate": int(sr),
32
+ "duration": float(duration),
33
+ "avg_volume_rms": avg_rms,
34
+ "is_silent": avg_rms < 0.001
35
+ }
36
+ except Exception as e:
37
+ return {"status": "error", "error": str(e)}
38
+
39
+ def analyze_video(path):
40
+ try:
41
+ cap = cv2.VideoCapture(path)
42
+ if not cap.isOpened():
43
+ return {"status": "error", "error": "Could not open video file"}
44
+
45
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
46
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
47
+ fps = cap.get(cv2.CAP_PROP_FPS)
48
+ frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
49
+ duration = frame_count / fps if fps > 0 else 0
50
+
51
+ # Check integrity by reading a few frames
52
+ test_frame_indices = [0, frame_count // 2, frame_count - 1] if frame_count > 0 else []
53
+ failed_frames = 0
54
+
55
+ for idx in test_frame_indices:
56
+ cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
57
+ ret, frame = cap.read()
58
+ if not ret or frame is None:
59
+ failed_frames += 1
60
+
61
+ cap.release()
62
+
63
+ return {
64
+ "status": "ok",
65
+ "type": "video",
66
+ "filename": os.path.basename(path),
67
+ "width": width,
68
+ "height": height,
69
+ "fps": float(fps),
70
+ "duration": float(duration),
71
+ "frame_count": frame_count,
72
+ "corruption_risk": "high" if failed_frames > 0 else "low"
73
+ }
74
+ except Exception as e:
75
+ return {"status": "error", "error": str(e)}
76
+
77
+ def main():
78
+ if len(sys.argv) < 2:
79
+ print(json.dumps({"error": "No path provided"}))
80
+ sys.exit(1)
81
+
82
+ input_path = sys.argv[1]
83
+ results = []
84
+
85
+ # Supported extensions
86
+ AUDIO_EXTS = (".wav", ".mp3", ".flac", ".ogg", ".m4a")
87
+ VIDEO_EXTS = (".mp4", ".avi", ".mkv", ".mov", ".wmv")
88
+
89
+ if os.path.isfile(input_path):
90
+ ext = os.path.splitext(input_path.lower())[1]
91
+ if ext in AUDIO_EXTS:
92
+ results.append(analyze_audio(input_path))
93
+ elif ext in VIDEO_EXTS:
94
+ results.append(analyze_video(input_path))
95
+ else:
96
+ results.append({"status": "error", "error": f"Unsupported file type: {ext}"})
97
+ elif os.path.isdir(input_path):
98
+ files = [os.path.join(input_path, f) for f in os.listdir(input_path)]
99
+ for f in files[:50]: # Limit for demo
100
+ ext = os.path.splitext(f.lower())[1]
101
+ if ext in AUDIO_EXTS:
102
+ results.append(analyze_audio(f))
103
+ elif ext in VIDEO_EXTS:
104
+ results.append(analyze_video(f))
105
+ else:
106
+ print(json.dumps({"error": "Invalid path"}))
107
+ sys.exit(1)
108
+
109
+ # Filtering failed results for report aggregation
110
+ ok_results = [r for r in results if r.get("status") == "ok"]
111
+
112
+ report = {
113
+ "total_files": len(results),
114
+ "ok_files": len(ok_results),
115
+ "failed_files": len(results) - len(ok_results),
116
+ "details": results
117
+ }
118
+
119
+ # Calculate some averages if files were found
120
+ if ok_results:
121
+ audio_files = [r for r in ok_results if r["type"] == "audio"]
122
+ video_files = [r for r in ok_results if r["type"] == "video"]
123
+
124
+ if audio_files:
125
+ report["avg_audio_duration"] = float(np.mean([r["duration"] for r in audio_files]))
126
+ if video_files:
127
+ report["avg_video_duration"] = float(np.mean([r["duration"] for r in video_files]))
128
+ report["avg_fps"] = float(np.mean([r["fps"] for r in video_files]))
129
+
130
+ print(json.dumps(report))
131
+
132
+ if __name__ == "__main__":
133
+ main()
@@ -0,0 +1,82 @@
1
+ import sys
2
+ import json
3
+ import argparse
4
+ import urllib.request
5
+ import urllib.parse
6
+ from datetime import datetime
7
+
8
+ # NASA Data Portal uses Socrata
9
+ NASA_API_URL = "https://api.us.socrata.com/api/catalog/v1"
10
+ NASA_DOMAIN = "data.nasa.gov"
11
+
12
+ def search_nasa(query: str, limit: int = 10):
13
+ """
14
+ Search NASA data portal.
15
+ """
16
+ try:
17
+ params = {
18
+ "q": query,
19
+ "limit": limit,
20
+ "domains": NASA_DOMAIN,
21
+ "search_context": NASA_DOMAIN
22
+ }
23
+
24
+ query_string = urllib.parse.urlencode(params)
25
+ url = f"{NASA_API_URL}?{query_string}"
26
+
27
+ req = urllib.request.Request(url)
28
+ with urllib.request.urlopen(req) as response:
29
+ data = json.load(response)
30
+
31
+ results = []
32
+ # Socrata catalog results are in 'results'
33
+ items = data.get('results', [])
34
+
35
+ for item in items:
36
+ ds = item.get('resource', {})
37
+
38
+ metadata = {
39
+ "id": f"nasa:{ds.get('id')}",
40
+ "source": "nasa",
41
+ "name": ds.get('name'),
42
+ "description": ds.get('description') or "No description available.",
43
+ "downloads": ds.get('download_count', 0),
44
+ "likes": ds.get('view_count', 0) // 10,
45
+ "last_updated": ds.get('updatedAt') or datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
46
+ "quality_score": 90,
47
+ "license": {
48
+ "id": "public_domain",
49
+ "name": "Public Domain",
50
+ "category": "safe",
51
+ "usage_restrictions": [],
52
+ "warnings": []
53
+ },
54
+ "tags": ds.get('tags', []),
55
+ "total_examples": 0,
56
+ "is_safe_source": True,
57
+ "is_structured": True,
58
+ "metadata_url": f"https://data.nasa.gov/d/{ds.get('id')}",
59
+ "domain": "science"
60
+ }
61
+
62
+ results.append(metadata)
63
+
64
+ return results
65
+
66
+ except Exception as e:
67
+ return {"error": str(e)}
68
+
69
+ def main():
70
+ parser = argparse.ArgumentParser(description="NASA Adapter")
71
+ parser.add_argument("--action", required=True, choices=["search"])
72
+ parser.add_argument("--query", required=True)
73
+ parser.add_argument("--limit", type=int, default=10)
74
+
75
+ args = parser.parse_args()
76
+
77
+ if args.action == "search":
78
+ results = search_nasa(args.query, args.limit)
79
+ print(json.dumps(results))
80
+
81
+ if __name__ == "__main__":
82
+ main()