@vespermcp/mcp-server 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +6 -4
  2. package/build/cleaning/cleaner.js +27 -2
  3. package/build/cleaning/executor.js +7 -6
  4. package/build/cleaning/planner.js +16 -4
  5. package/build/config/config-manager.js +199 -0
  6. package/build/export/exporter.js +26 -2
  7. package/build/index.js +272 -72
  8. package/build/ingestion/ingestor.js +17 -16
  9. package/build/ingestion/kaggle-downloader.js +25 -2
  10. package/build/install/install-service.js +1 -1
  11. package/build/jobs/manager.js +17 -10
  12. package/build/metadata/monitoring-service.js +2 -2
  13. package/build/metadata/scraper.js +8 -8
  14. package/build/metadata/store.js +17 -2
  15. package/build/monitoring/observability.js +2 -2
  16. package/build/preparation/target-detector.js +75 -0
  17. package/build/python/cleaner.py +226 -0
  18. package/build/python/export_engine.py +131 -0
  19. package/build/python/framework_adapters.py +100 -0
  20. package/build/python/github_adapter.py +106 -0
  21. package/build/python/image_engine.py +86 -0
  22. package/build/python/media_engine.py +133 -0
  23. package/build/python/nasa_adapter.py +82 -0
  24. package/build/python/quality_engine.py +243 -0
  25. package/build/python/splitter_engine.py +283 -0
  26. package/build/python/target_engine.py +154 -0
  27. package/build/python/test_framework_adapters.py +61 -0
  28. package/build/python/uci_adapter.py +94 -0
  29. package/build/python/worldbank_adapter.py +99 -0
  30. package/build/quality/analyzer.js +40 -4
  31. package/build/quality/image-analyzer.js +73 -5
  32. package/build/quality/media-analyzer.js +74 -5
  33. package/build/scripts/cleanup-kaggle.js +41 -0
  34. package/build/scripts/repro-bug.js +37 -0
  35. package/build/scripts/repro-export-bug.js +56 -0
  36. package/build/scripts/test-mcp-v5.js +12 -11
  37. package/build/scripts/test-production-sync.js +36 -0
  38. package/build/scripts/test-target-detector.js +29 -0
  39. package/build/scripts/test-write.js +14 -0
  40. package/build/scripts/verify-integration.js +57 -0
  41. package/build/scripts/verify-priority.js +33 -0
  42. package/build/search/engine.js +13 -2
  43. package/build/search/jit-orchestrator.js +6 -40
  44. package/build/search/vector-store.js +18 -0
  45. package/build/splitting/splitter.js +27 -2
  46. package/build/tools/formatter.js +23 -8
  47. package/build/utils/downloader.js +2 -2
  48. package/build/utils/selector.js +69 -0
  49. package/package.json +8 -4
  50. package/src/python/cleaner.py +33 -3
  51. package/src/python/export_engine.py +19 -0
  52. package/src/python/target_engine.py +154 -0
@@ -1,12 +1,44 @@
1
1
  import { spawn } from "child_process";
2
2
  import path from "path";
3
+ import fs from "fs";
3
4
  export class QualityAnalyzer {
4
5
  cache;
5
6
  pythonPath = "python"; // Assumes python is in PATH
6
7
  scriptPath;
7
- constructor(cache, projectRoot = process.cwd()) {
8
+ constructor(cache, buildDir = process.cwd()) {
9
+ // buildDir is the directory containing the compiled JS (e.g., build/)
10
+ // Priority:
11
+ // 1. ~/.vesper/python (stable synced location)
12
+ // 2. build/python (production)
13
+ // 3. src/python (development)
8
14
  this.cache = cache;
9
- this.scriptPath = path.join(projectRoot, "src", "python", "quality_engine.py");
15
+ const homeDir = process.env.HOME || process.env.USERPROFILE || buildDir;
16
+ const dataRoot = path.join(homeDir, ".vesper");
17
+ const scriptPath0 = path.resolve(dataRoot, "python", "quality_engine.py");
18
+ const scriptPath1 = path.resolve(buildDir, "python", "quality_engine.py");
19
+ const scriptPath2 = path.resolve(buildDir, "..", "src", "python", "quality_engine.py");
20
+ const scriptPath3 = path.resolve(buildDir, "..", "python", "quality_engine.py");
21
+ if (fs.existsSync(scriptPath0)) {
22
+ this.scriptPath = scriptPath0;
23
+ }
24
+ else if (fs.existsSync(scriptPath1)) {
25
+ this.scriptPath = scriptPath1;
26
+ }
27
+ else if (fs.existsSync(scriptPath2)) {
28
+ this.scriptPath = scriptPath2;
29
+ }
30
+ else if (fs.existsSync(scriptPath3)) {
31
+ this.scriptPath = scriptPath3;
32
+ }
33
+ else {
34
+ // Fallback to stable data path, error will be caught during execution
35
+ this.scriptPath = scriptPath0;
36
+ console.error(`[QualityAnalyzer] WARNING: Python script not found!`);
37
+ }
38
+ // Detect Python command (Windows may use 'py' instead of 'python')
39
+ if (process.platform === "win32") {
40
+ this.pythonPath = "py";
41
+ }
10
42
  }
11
43
  /**
12
44
  * Run quality analysis on a local file (CSV/Parquet/JSON)
@@ -16,7 +48,7 @@ export class QualityAnalyzer {
16
48
  if (this.cache && datasetId) {
17
49
  const cached = await this.cache.getReport(datasetId);
18
50
  if (cached) {
19
- console.log(`[QualityAnalyzer] Cache hit for ${datasetId}`);
51
+ console.error(`[QualityAnalyzer] Cache hit for ${datasetId}`);
20
52
  return cached;
21
53
  }
22
54
  }
@@ -32,7 +64,11 @@ export class QualityAnalyzer {
32
64
  });
33
65
  process.on("close", (code) => {
34
66
  if (code !== 0) {
35
- reject(new Error(`Quality Analyzer failed (code ${code}): ${stderr}`));
67
+ const errorDetails = `Quality Analyzer failed (code ${code})
68
+ Command: ${this.pythonPath} ${this.scriptPath} ${filePath}
69
+ Script path exists: ${fs.existsSync(this.scriptPath)}
70
+ Error output: ${stderr}`;
71
+ reject(new Error(errorDetails));
36
72
  return;
37
73
  }
38
74
  try {
@@ -1,10 +1,36 @@
1
1
  import { spawn } from "child_process";
2
2
  import path from "path";
3
+ import fs from "fs";
3
4
  export class ImageAnalyzer {
4
5
  pythonPath = "python";
5
6
  scriptPath;
6
- constructor(projectRoot = process.cwd()) {
7
- this.scriptPath = path.join(projectRoot, "src", "python", "image_engine.py");
7
+ constructor(buildDir = process.cwd()) {
8
+ // buildDir is the directory containing the compiled JS (e.g., build/)
9
+ const homeDir = process.env.HOME || process.env.USERPROFILE || buildDir;
10
+ const dataRoot = path.join(homeDir, ".vesper");
11
+ const scriptPath0 = path.resolve(dataRoot, "python", "image_engine.py");
12
+ const scriptPath1 = path.resolve(buildDir, "python", "image_engine.py");
13
+ const scriptPath2 = path.resolve(buildDir, "..", "src", "python", "image_engine.py");
14
+ const scriptPath3 = path.resolve(buildDir, "..", "python", "image_engine.py");
15
+ if (fs.existsSync(scriptPath0)) {
16
+ this.scriptPath = scriptPath0;
17
+ }
18
+ else if (fs.existsSync(scriptPath1)) {
19
+ this.scriptPath = scriptPath1;
20
+ }
21
+ else if (fs.existsSync(scriptPath2)) {
22
+ this.scriptPath = scriptPath2;
23
+ }
24
+ else if (fs.existsSync(scriptPath3)) {
25
+ this.scriptPath = scriptPath3;
26
+ }
27
+ else {
28
+ this.scriptPath = scriptPath0;
29
+ }
30
+ // Detect Python command (Windows may use 'py' instead of 'python')
31
+ if (process.platform === "win32") {
32
+ this.pythonPath = "py";
33
+ }
8
34
  }
9
35
  /**
10
36
  * Analyze image quality for a single file or a directory
@@ -23,22 +49,64 @@ export class ImageAnalyzer {
23
49
  process.stderr.on("data", (data) => {
24
50
  stderr += data.toString();
25
51
  });
52
+ process.on("error", (err) => {
53
+ if (err.code === "ENOENT") {
54
+ // Python not found - return a graceful failure report
55
+ resolve({
56
+ total_images: 1,
57
+ ok_images: 0,
58
+ failed_images: 1,
59
+ details: [{
60
+ status: "error",
61
+ error: "Python not installed or not in PATH. Please install Python to use image analysis features."
62
+ }]
63
+ });
64
+ }
65
+ else {
66
+ reject(new Error(`Failed to start image analysis process: ${err.message}`));
67
+ }
68
+ });
26
69
  process.on("close", (code) => {
27
70
  if (code !== 0) {
28
- reject(new Error(`Image Analyzer failed (code ${code}): ${stderr}`));
71
+ // Handle case where script fails
72
+ resolve({
73
+ total_images: 1,
74
+ ok_images: 0,
75
+ failed_images: 1,
76
+ details: [{
77
+ status: "error",
78
+ error: `Image Analyzer process failed (code ${code}): ${stderr || "Unknown error"}`
79
+ }]
80
+ });
29
81
  return;
30
82
  }
31
83
  try {
32
84
  const result = JSON.parse(stdout);
33
85
  if (result.error) {
34
- reject(new Error(result.error));
86
+ resolve({
87
+ total_images: 1,
88
+ ok_images: 0,
89
+ failed_images: 1,
90
+ details: [{
91
+ status: "error",
92
+ error: result.error
93
+ }]
94
+ });
35
95
  }
36
96
  else {
37
97
  resolve(result);
38
98
  }
39
99
  }
40
100
  catch (e) {
41
- reject(new Error(`Failed to parse image analyzer output: ${stdout}`));
101
+ resolve({
102
+ total_images: 1,
103
+ ok_images: 0,
104
+ failed_images: 1,
105
+ details: [{
106
+ status: "error",
107
+ error: `Failed to parse image analyzer output: ${stdout}`
108
+ }]
109
+ });
42
110
  }
43
111
  });
44
112
  });
@@ -1,10 +1,36 @@
1
1
  import { spawn } from "child_process";
2
2
  import path from "path";
3
+ import fs from "fs";
3
4
  export class MediaAnalyzer {
4
5
  pythonPath = "python";
5
6
  scriptPath;
6
- constructor(projectRoot = process.cwd()) {
7
- this.scriptPath = path.join(projectRoot, "src", "python", "media_engine.py");
7
+ constructor(buildDir = process.cwd()) {
8
+ // buildDir is the directory containing the compiled JS (e.g., build/)
9
+ const homeDir = process.env.HOME || process.env.USERPROFILE || buildDir;
10
+ const dataRoot = path.join(homeDir, ".vesper");
11
+ const scriptPath0 = path.resolve(dataRoot, "python", "media_engine.py");
12
+ const scriptPath1 = path.resolve(buildDir, "python", "media_engine.py");
13
+ const scriptPath2 = path.resolve(buildDir, "..", "src", "python", "media_engine.py");
14
+ const scriptPath3 = path.resolve(buildDir, "..", "python", "media_engine.py");
15
+ if (fs.existsSync(scriptPath0)) {
16
+ this.scriptPath = scriptPath0;
17
+ }
18
+ else if (fs.existsSync(scriptPath1)) {
19
+ this.scriptPath = scriptPath1;
20
+ }
21
+ else if (fs.existsSync(scriptPath2)) {
22
+ this.scriptPath = scriptPath2;
23
+ }
24
+ else if (fs.existsSync(scriptPath3)) {
25
+ this.scriptPath = scriptPath3;
26
+ }
27
+ else {
28
+ this.scriptPath = scriptPath0;
29
+ }
30
+ // Detect Python command (Windows may use 'py' instead of 'python')
31
+ if (process.platform === "win32") {
32
+ this.pythonPath = "py";
33
+ }
8
34
  }
9
35
  /**
10
36
  * Analyze audio/video quality for a single file or a directory
@@ -23,22 +49,65 @@ export class MediaAnalyzer {
23
49
  process.stderr.on("data", (data) => {
24
50
  stderr += data.toString();
25
51
  });
52
+ process.on("error", (err) => {
53
+ if (err.code === "ENOENT") {
54
+ // Python not found - return a graceful failure report
55
+ resolve({
56
+ total_files: 1,
57
+ ok_files: 0,
58
+ failed_files: 1,
59
+ details: [{
60
+ status: "error",
61
+ error: "Python not installed or not in PATH. Please install Python to use media analysis features."
62
+ }]
63
+ });
64
+ }
65
+ else {
66
+ reject(new Error(`Failed to start media analysis process: ${err.message}`));
67
+ }
68
+ });
26
69
  process.on("close", (code) => {
27
70
  if (code !== 0) {
28
- reject(new Error(`Media Analyzer failed (code ${code}): ${stderr}`));
71
+ // Handle case where python exists but script fails
72
+ resolve({
73
+ total_files: 1,
74
+ ok_files: 0,
75
+ failed_files: 1,
76
+ details: [{
77
+ status: "error",
78
+ error: `Media Analyzer process failed (code ${code}): ${stderr || "Unknown error"}`
79
+ }]
80
+ });
29
81
  return;
30
82
  }
31
83
  try {
32
84
  const result = JSON.parse(stdout);
33
85
  if (result.error) {
34
- reject(new Error(result.error));
86
+ // Return error as part of report instead of rejecting
87
+ resolve({
88
+ total_files: 1,
89
+ ok_files: 0,
90
+ failed_files: 1,
91
+ details: [{
92
+ status: "error",
93
+ error: result.error
94
+ }]
95
+ });
35
96
  }
36
97
  else {
37
98
  resolve(result);
38
99
  }
39
100
  }
40
101
  catch (e) {
41
- reject(new Error(`Failed to parse media analyzer output: ${stdout}`));
102
+ resolve({
103
+ total_files: 1,
104
+ ok_files: 0,
105
+ failed_files: 1,
106
+ details: [{
107
+ status: "error",
108
+ error: `Failed to parse media analyzer output: ${stdout}`
109
+ }]
110
+ });
42
111
  }
43
112
  });
44
113
  });
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Cleanup script to remove all Kaggle datasets from Vesper
4
+ */
5
+ import { fileURLToPath } from "url";
6
+ import path from "path";
7
+ import { MetadataStore } from "../metadata/store.js";
8
+ import { VectorStore } from "../search/vector-store.js";
9
+ const __filename = fileURLToPath(import.meta.url);
10
+ const __dirname = path.dirname(__filename);
11
+ const homeDir = process.env.HOME || process.env.USERPROFILE || path.join(__dirname, "..");
12
+ const dataRoot = path.join(homeDir, ".vesper");
13
+ const dbPath = path.join(dataRoot, "data", "metadata.db");
14
+ const vectorPath = path.join(dataRoot, "data", "vectors.json");
15
+ console.log("🧹 Vesper Kaggle Cleanup");
16
+ console.log("========================\n");
17
+ try {
18
+ const metadataStore = new MetadataStore(dbPath);
19
+ const vectorStore = new VectorStore(vectorPath);
20
+ // Get all Kaggle dataset IDs
21
+ const kaggleIds = metadataStore.getDatasetIdsBySource("kaggle");
22
+ console.log(`Found ${kaggleIds.length} Kaggle datasets in database`);
23
+ if (kaggleIds.length === 0) {
24
+ console.log("✅ No Kaggle datasets to remove");
25
+ process.exit(0);
26
+ }
27
+ // Delete from vector store
28
+ const vectorsDeleted = vectorStore.deleteMany(kaggleIds);
29
+ console.log(`🗑️ Deleted ${vectorsDeleted} vectors from vector store`);
30
+ vectorStore.save();
31
+ // Delete from metadata database
32
+ const datasetsDeleted = metadataStore.deleteBySource("kaggle");
33
+ console.log(`🗑️ Deleted ${datasetsDeleted} datasets from metadata database`);
34
+ metadataStore.close();
35
+ console.log("\n✅ Cleanup complete! Kaggle datasets have been removed.");
36
+ console.log(" You can now search without seeing Kaggle results.");
37
+ }
38
+ catch (error) {
39
+ console.error("❌ Cleanup failed:", error.message);
40
+ process.exit(1);
41
+ }
@@ -0,0 +1,37 @@
1
+ import { JobManager } from "../jobs/manager.js";
2
+ import { MetadataStore } from "../metadata/store.js";
3
+ import fs from "fs";
4
+ async function repro() {
5
+ const dbPath = "repro_test.db";
6
+ if (fs.existsSync(dbPath))
7
+ fs.unlinkSync(dbPath);
8
+ const store = new MetadataStore(dbPath);
9
+ const jobManager = JobManager.getInstance(store);
10
+ console.log("Setting up listener...");
11
+ jobManager.on("processJob", async (job, execute) => {
12
+ console.log(`Listener received job ${job.id}`);
13
+ const task = async () => {
14
+ console.log("Running task...");
15
+ return "success";
16
+ };
17
+ try {
18
+ await execute(task);
19
+ console.log("Execute finished");
20
+ }
21
+ catch (e) {
22
+ console.error("Execute failed in listener:", e.message);
23
+ }
24
+ });
25
+ console.log("Creating job...");
26
+ const job = jobManager.createJob("prepare", 0, { query: "test" });
27
+ console.log(`Job created: ${job.id}`);
28
+ // Wait for a bit
29
+ await new Promise(r => setTimeout(r, 2000));
30
+ const finalJob = store.getJob(job.id);
31
+ console.log("Final job status:", finalJob?.status);
32
+ console.log("Final job status text:", finalJob?.status_text);
33
+ store.close();
34
+ if (fs.existsSync(dbPath))
35
+ fs.unlinkSync(dbPath);
36
+ }
37
+ repro().catch(console.error);
@@ -0,0 +1,56 @@
1
+ import path from "path";
2
+ import fs from "fs";
3
+ import { spawnSync } from "child_process";
4
+ const pythonPath = "python";
5
+ const scriptPath = path.join(process.cwd(), "src", "python", "cleaner.py");
6
+ const testDir = path.join(process.cwd(), "test_repro");
7
+ if (!fs.existsSync(testDir))
8
+ fs.mkdirSync(testDir);
9
+ async function runRepro() {
10
+ console.log("=== Reproducing CSV Export Bug ===\n");
11
+ const parquetFile = path.join(testDir, "test_nested.parquet");
12
+ const csvOutput = path.join(testDir, "test_nested_cleaned.csv");
13
+ // 1. Create a Parquet file with nested data (Lists/Structs) using Python
14
+ console.log("Creating nested Parquet file...");
15
+ const createScript = `
16
+ import polars as pl
17
+ df = pl.DataFrame({
18
+ "id": [1, 2, 3],
19
+ "tags": [["a", "b"], ["c"], []],
20
+ "meta": [{"score": 0.9, "safe": True}, {"score": 0.4, "safe": False}, {"score": 0.1, "safe": True}]
21
+ })
22
+ df.write_parquet(r"${parquetFile}")
23
+ `;
24
+ fs.writeFileSync(path.join(testDir, "create_data.py"), createScript);
25
+ spawnSync(pythonPath, [path.join(testDir, "create_data.py")], { stdio: 'inherit' });
26
+ // 2. Call cleaner.py to convert to CSV
27
+ console.log("Calling cleaner.py to convert to CSV...");
28
+ const result = spawnSync(pythonPath, [
29
+ scriptPath,
30
+ parquetFile,
31
+ "[]",
32
+ "csv"
33
+ ]);
34
+ console.log("Exit Code:", result.status);
35
+ console.log("Stdout:", result.stdout?.toString());
36
+ console.log("Stderr:", result.stderr?.toString());
37
+ if (result.status === 0) {
38
+ try {
39
+ const data = JSON.parse(result.stdout.toString());
40
+ if (data.success) {
41
+ console.log("SUCCESS! Output file:", data.output_path);
42
+ if (fs.existsSync(data.output_path)) {
43
+ console.log("File exists on disk.");
44
+ }
45
+ }
46
+ else {
47
+ console.error("cleaner.py reported failure:", data.error);
48
+ }
49
+ }
50
+ catch (e) {
51
+ console.error("Failed to parse JSON output:", e);
52
+ }
53
+ }
54
+ }
55
+ runRepro().catch(console.error);
56
+ runRepro().catch(console.error);
@@ -47,16 +47,18 @@ async function testPhase5Tools() {
47
47
  // Create job (Logic from index.ts)
48
48
  const job = jobManager.createJob("prepare", 0, { query });
49
49
  console.log(` - Job Created: ${job.id}`);
50
- // Start background task
51
- console.log(" - Starting autonomous preparation...");
52
- const jobPromise = jobManager.runJob(job.id, async (update) => {
53
- update({ progress: 20, status_text: "Searching..." });
54
- await new Promise(r => setTimeout(r, 1000));
55
- update({ progress: 50, status_text: "Analyzing quality..." });
56
- await new Promise(r => setTimeout(r, 1000));
57
- update({ progress: 80, status_text: "Formatting export..." });
58
- await new Promise(r => setTimeout(r, 1000));
59
- return "data/exports/prepared_dataset.parquet";
50
+ // Register listener for simulated work (Logic from index.ts)
51
+ jobManager.on("processJob", async (currJob, execute) => {
52
+ if (currJob.id !== job.id)
53
+ return;
54
+ await execute(async () => {
55
+ console.log(" - [Worker] Starting autonomous preparation task...");
56
+ await new Promise(r => setTimeout(r, 1000));
57
+ console.log(" - [Worker] Phase 1: Search complete");
58
+ await new Promise(r => setTimeout(r, 1000));
59
+ console.log(" - [Worker] Phase 2: Quality analysis complete");
60
+ return "data/exports/prepared_dataset.parquet";
61
+ });
60
62
  });
61
63
  // 4. Test check_job_status (Polling)
62
64
  console.log("\nStep 4: Polling Job Status (Simulating UI Check)...");
@@ -67,7 +69,6 @@ async function testPhase5Tools() {
67
69
  break;
68
70
  await new Promise(r => setTimeout(r, 800));
69
71
  }
70
- await jobPromise;
71
72
  console.log("\n Phase 5 tools logic verified.");
72
73
  }
73
74
  testPhase5Tools().catch(console.error);
@@ -0,0 +1,36 @@
1
+ import { JobManager } from "../jobs/manager.js";
2
+ import { MetadataStore } from "../metadata/store.js";
3
+ import fs from "fs";
4
+ async function testSync() {
5
+ console.log("Starting Production Sync Test...");
6
+ const dbPath = "prod_sync.db";
7
+ if (fs.existsSync(dbPath))
8
+ fs.unlinkSync(dbPath);
9
+ const store = new MetadataStore(dbPath);
10
+ const jobManager = JobManager.getInstance(store);
11
+ console.log("Attaching listener (same as index.ts)...");
12
+ jobManager.on("processJob", async (job, execute) => {
13
+ console.log(`[Listener 1] Received job ${job.id}`);
14
+ if (typeof execute !== 'function') {
15
+ console.error(`[Listener 1] ERROR: execute is not a function! It is: ${typeof execute}`);
16
+ return;
17
+ }
18
+ const task = async () => {
19
+ console.log("[Listener 1] Task running...");
20
+ return "ok";
21
+ };
22
+ await execute(task);
23
+ console.log("[Listener 1] Task finished.");
24
+ });
25
+ console.log("Emitting job...");
26
+ const job = jobManager.createJob("prepare", 0, { query: "test" });
27
+ // Wait for the background loop
28
+ await new Promise(r => setTimeout(r, 1000));
29
+ const finalJob = store.getJob(job.id);
30
+ console.log(`Job Result: ${finalJob?.status} - ${finalJob?.status_text}`);
31
+ store.close();
32
+ if (fs.existsSync(dbPath))
33
+ fs.unlinkSync(dbPath);
34
+ console.log("Test Complete.");
35
+ }
36
+ testSync().catch(console.error);
@@ -0,0 +1,29 @@
1
+ import { TargetDetector } from "../preparation/target-detector.js";
2
+ import path from "path";
3
+ async function testDetector() {
4
+ // 1. Test existing build dir
5
+ const detector = new TargetDetector(path.join(process.cwd(), "build"));
6
+ // 2. Create a dummy CSV for testing
7
+ const testFile = path.join(process.cwd(), "test_target.csv");
8
+ const fs = (await import("fs")).default;
9
+ // Test Case 1: SalePrice (Regression)
10
+ console.log("--- Test Case 1: SalePrice ---");
11
+ fs.writeFileSync(testFile, "id,feature1,feature2,SalePrice\n1,10,20,100000\n2,11,21,120000\n3,12,22,110000");
12
+ let result = await detector.detectTarget(testFile);
13
+ console.log("Detection:", result.target_column, result.confidence);
14
+ if (result.target_column) {
15
+ let val = await detector.validateTarget(testFile, result.target_column);
16
+ console.log("Validation:", val.problem_type, val.valid);
17
+ }
18
+ // Test Case 2: diagnosis (Classification)
19
+ console.log("\n--- Test Case 2: diagnosis ---");
20
+ fs.writeFileSync(testFile, "id,age,diagnosis\n1,50,M\n2,60,B\n3,45,M");
21
+ result = await detector.detectTarget(testFile);
22
+ console.log("Detection:", result.target_column, result.confidence);
23
+ if (result.target_column) {
24
+ let val = await detector.validateTarget(testFile, result.target_column);
25
+ console.log("Validation:", val.problem_type, val.valid);
26
+ }
27
+ fs.unlinkSync(testFile);
28
+ }
29
+ testDetector().catch(console.error);
@@ -0,0 +1,14 @@
1
+ import fs from 'fs';
2
+ const target = "C:\\Users\\нурбулан\\AppData\\Roaming\\Code\\User\\mcp.json";
3
+ const content = JSON.stringify({ mcpServers: { test: { command: "node" } } }, null, 2);
4
+ try {
5
+ console.log(`Testing write to: ${target}`);
6
+ fs.writeFileSync(target, content, "utf8");
7
+ const stat = fs.statSync(target);
8
+ console.log(`Success! File size: ${stat.size} bytes`);
9
+ const readBack = fs.readFileSync(target, "utf8");
10
+ console.log("Read back content:", readBack);
11
+ }
12
+ catch (e) {
13
+ console.error("Failed to write:", e);
14
+ }
@@ -0,0 +1,57 @@
1
+ import { TargetDetector } from "../preparation/target-detector.js";
2
+ import path from "path";
3
+ import fs from "fs";
4
+ async function verifyIntegration() {
5
+ // 1. Create a dummy CSV with a clear target
6
+ const testFile = path.join(process.cwd(), "data", "raw", "integration_test.csv");
7
+ const testId = "integration_test";
8
+ if (!fs.existsSync(path.dirname(testFile))) {
9
+ fs.mkdirSync(path.dirname(testFile), { recursive: true });
10
+ }
11
+ console.log("Creating test file:", testFile);
12
+ fs.writeFileSync(testFile, "id,feature1,feature2,SalePrice\n1,10,20,100000\n2,11,21,100000\n3,12,22,110000");
13
+ // 2. Call the preview_cleaning tool (simulated by calling valid request handler logic or via MCP client if possible)
14
+ // Since we can't easily call the MCP server from here without a client, we will simulate
15
+ // the logic we added to index.ts to ensure it runs without error.
16
+ try {
17
+ const { QualityAnalyzer } = await import("../quality/analyzer.js");
18
+ const { CleaningPlanner } = await import("../cleaning/planner.js");
19
+ const { CacheService, MockRedisProvider } = await import("../cache/service.js");
20
+ console.log("Initializing services...");
21
+ const cacheService = new CacheService(new MockRedisProvider());
22
+ // Use build/ directory to simulate runtime environment
23
+ const buildDir = path.join(process.cwd(), "build");
24
+ const qualityAnalyzer = new QualityAnalyzer(cacheService, buildDir);
25
+ const cleaningPlanner = new CleaningPlanner(cacheService, buildDir);
26
+ console.log("Running analysis...");
27
+ const report = await qualityAnalyzer.analyze(testFile);
28
+ console.log("Running target detection...");
29
+ const detector = new TargetDetector(buildDir);
30
+ const targetResult = await detector.detectTarget(testFile);
31
+ console.log("Detected:", targetResult);
32
+ const targetInfo = targetResult.target_column ? {
33
+ target: targetResult.target_column,
34
+ confidence: targetResult.confidence
35
+ } : undefined;
36
+ console.log("Generating plan...");
37
+ const plan = await cleaningPlanner.generatePlan(testId, report, undefined, targetInfo);
38
+ console.log("Plan Operations:", JSON.stringify(plan.operations, null, 2));
39
+ const hasRename = plan.operations.some(op => op.type === "RenameTarget");
40
+ if (hasRename) {
41
+ console.log("✅ SUCCESS: RenameTarget operation found in plan!");
42
+ }
43
+ else {
44
+ console.error("❌ FAILURE: RenameTarget operation NOT found.");
45
+ process.exit(1);
46
+ }
47
+ }
48
+ catch (e) {
49
+ console.error("Error during verification:", e);
50
+ process.exit(1);
51
+ }
52
+ finally {
53
+ if (fs.existsSync(testFile))
54
+ fs.unlinkSync(testFile);
55
+ }
56
+ }
57
+ verifyIntegration();
@@ -0,0 +1,33 @@
1
+ import path from "path";
2
+ import { MetadataStore } from "../metadata/store.js";
3
+ import { VectorStore } from "../search/vector-store.js";
4
+ import { Embedder } from "../search/embedder.js";
5
+ import { SearchEngine } from "../search/engine.js";
6
+ import { formatSearchResults } from "../tools/formatter.js";
7
+ import fs from "fs";
8
+ const query = process.argv[2] || "anime";
9
+ // Use the actual .vesper data path if it exists, otherwise use local data/
10
+ const homeDir = process.env.HOME || process.env.USERPROFILE || process.cwd();
11
+ const vesperDataRoot = path.join(homeDir, ".vesper");
12
+ let dbPath = path.join(vesperDataRoot, "data", "metadata.db");
13
+ let vectorPath = path.join(vesperDataRoot, "data", "vectors.json");
14
+ if (!fs.existsSync(dbPath)) {
15
+ console.error("Using local project data directory as fallback...");
16
+ dbPath = path.join(process.cwd(), "data", "metadata.db");
17
+ vectorPath = path.join(process.cwd(), "data", "vectors.json");
18
+ }
19
+ const metadataStore = new MetadataStore(dbPath);
20
+ const vectorStore = new VectorStore(vectorPath);
21
+ const embedder = Embedder.getInstance();
22
+ const searchEngine = new SearchEngine(metadataStore, vectorStore, embedder);
23
+ async function run() {
24
+ console.log(`\n=== VERIFYING SOURCE PRIORITIZATION [Query: "${query}"] ===\n`);
25
+ const results = await searchEngine.search(query, { limit: 5 });
26
+ if (results.length === 0) {
27
+ console.log("No results found. Run a search that triggers JIT first!");
28
+ return;
29
+ }
30
+ // Print formatted results to show badges
31
+ console.log(formatSearchResults(results));
32
+ }
33
+ run().catch(console.error);
@@ -96,12 +96,23 @@ export class SearchEngine {
96
96
  if (lexicalScore === 0 && positiveKeywords.length > 1) {
97
97
  penalty += 0.2;
98
98
  }
99
+ // D. Accessibility Bonuses (Prioritize low-friction sources)
100
+ let bonus = 0;
101
+ const sourceBonuses = {
102
+ "huggingface": 0.1,
103
+ "uci": 0.1,
104
+ "github": 0.1,
105
+ "worldbank": 0.1,
106
+ "nasa": 0.1
107
+ };
108
+ bonus = sourceBonuses[metadata.source] || 0;
99
109
  // Final Combined Score
100
- // 70% Vector, 30% Lexical, minus Penalties
101
- const finalScore = (vectorScore * 0.7) + (lexicalScore * 0.3) - penalty;
110
+ // 70% Vector, 30% Lexical, minus Penalties, plus Bonuses
111
+ const finalScore = (vectorScore * 0.7) + (lexicalScore * 0.3) - penalty + bonus;
102
112
  metadata.relevance_score = Math.round(finalScore * 100) / 100;
103
113
  metadata.vector_score = Math.round(vectorScore * 100) / 100;
104
114
  metadata.lexical_score = Math.round(lexicalScore * 100) / 100;
115
+ metadata.accessibility_bonus = bonus;
105
116
  results.push(metadata);
106
117
  }
107
118
  // Sort by final score and limit