@vespermcp/mcp-server 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/build/cleaning/cleaner.js +27 -2
- package/build/cleaning/executor.js +7 -6
- package/build/cleaning/planner.js +16 -4
- package/build/config/config-manager.js +199 -0
- package/build/export/exporter.js +26 -2
- package/build/index.js +272 -72
- package/build/ingestion/ingestor.js +17 -16
- package/build/ingestion/kaggle-downloader.js +25 -2
- package/build/install/install-service.js +1 -1
- package/build/jobs/manager.js +17 -10
- package/build/metadata/monitoring-service.js +2 -2
- package/build/metadata/scraper.js +8 -8
- package/build/metadata/store.js +17 -2
- package/build/monitoring/observability.js +2 -2
- package/build/preparation/target-detector.js +75 -0
- package/build/python/cleaner.py +226 -0
- package/build/python/export_engine.py +131 -0
- package/build/python/framework_adapters.py +100 -0
- package/build/python/github_adapter.py +106 -0
- package/build/python/image_engine.py +86 -0
- package/build/python/media_engine.py +133 -0
- package/build/python/nasa_adapter.py +82 -0
- package/build/python/quality_engine.py +243 -0
- package/build/python/splitter_engine.py +283 -0
- package/build/python/target_engine.py +154 -0
- package/build/python/test_framework_adapters.py +61 -0
- package/build/python/uci_adapter.py +94 -0
- package/build/python/worldbank_adapter.py +99 -0
- package/build/quality/analyzer.js +40 -4
- package/build/quality/image-analyzer.js +73 -5
- package/build/quality/media-analyzer.js +74 -5
- package/build/scripts/cleanup-kaggle.js +41 -0
- package/build/scripts/repro-bug.js +37 -0
- package/build/scripts/repro-export-bug.js +56 -0
- package/build/scripts/test-mcp-v5.js +12 -11
- package/build/scripts/test-production-sync.js +36 -0
- package/build/scripts/test-target-detector.js +29 -0
- package/build/scripts/test-write.js +14 -0
- package/build/scripts/verify-integration.js +57 -0
- package/build/scripts/verify-priority.js +33 -0
- package/build/search/engine.js +13 -2
- package/build/search/jit-orchestrator.js +6 -40
- package/build/search/vector-store.js +18 -0
- package/build/splitting/splitter.js +27 -2
- package/build/tools/formatter.js +23 -8
- package/build/utils/downloader.js +2 -2
- package/build/utils/selector.js +69 -0
- package/package.json +8 -4
- package/src/python/cleaner.py +33 -3
- package/src/python/export_engine.py +19 -0
- package/src/python/target_engine.py +154 -0
|
@@ -1,12 +1,44 @@
|
|
|
1
1
|
import { spawn } from "child_process";
|
|
2
2
|
import path from "path";
|
|
3
|
+
import fs from "fs";
|
|
3
4
|
export class QualityAnalyzer {
|
|
4
5
|
cache;
|
|
5
6
|
pythonPath = "python"; // Assumes python is in PATH
|
|
6
7
|
scriptPath;
|
|
7
|
-
constructor(cache,
|
|
8
|
+
constructor(cache, buildDir = process.cwd()) {
|
|
9
|
+
// buildDir is the directory containing the compiled JS (e.g., build/)
|
|
10
|
+
// Priority:
|
|
11
|
+
// 1. ~/.vesper/python (stable synced location)
|
|
12
|
+
// 2. build/python (production)
|
|
13
|
+
// 3. src/python (development)
|
|
8
14
|
this.cache = cache;
|
|
9
|
-
|
|
15
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || buildDir;
|
|
16
|
+
const dataRoot = path.join(homeDir, ".vesper");
|
|
17
|
+
const scriptPath0 = path.resolve(dataRoot, "python", "quality_engine.py");
|
|
18
|
+
const scriptPath1 = path.resolve(buildDir, "python", "quality_engine.py");
|
|
19
|
+
const scriptPath2 = path.resolve(buildDir, "..", "src", "python", "quality_engine.py");
|
|
20
|
+
const scriptPath3 = path.resolve(buildDir, "..", "python", "quality_engine.py");
|
|
21
|
+
if (fs.existsSync(scriptPath0)) {
|
|
22
|
+
this.scriptPath = scriptPath0;
|
|
23
|
+
}
|
|
24
|
+
else if (fs.existsSync(scriptPath1)) {
|
|
25
|
+
this.scriptPath = scriptPath1;
|
|
26
|
+
}
|
|
27
|
+
else if (fs.existsSync(scriptPath2)) {
|
|
28
|
+
this.scriptPath = scriptPath2;
|
|
29
|
+
}
|
|
30
|
+
else if (fs.existsSync(scriptPath3)) {
|
|
31
|
+
this.scriptPath = scriptPath3;
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
// Fallback to stable data path, error will be caught during execution
|
|
35
|
+
this.scriptPath = scriptPath0;
|
|
36
|
+
console.error(`[QualityAnalyzer] WARNING: Python script not found!`);
|
|
37
|
+
}
|
|
38
|
+
// Detect Python command (Windows may use 'py' instead of 'python')
|
|
39
|
+
if (process.platform === "win32") {
|
|
40
|
+
this.pythonPath = "py";
|
|
41
|
+
}
|
|
10
42
|
}
|
|
11
43
|
/**
|
|
12
44
|
* Run quality analysis on a local file (CSV/Parquet/JSON)
|
|
@@ -16,7 +48,7 @@ export class QualityAnalyzer {
|
|
|
16
48
|
if (this.cache && datasetId) {
|
|
17
49
|
const cached = await this.cache.getReport(datasetId);
|
|
18
50
|
if (cached) {
|
|
19
|
-
console.
|
|
51
|
+
console.error(`[QualityAnalyzer] Cache hit for ${datasetId}`);
|
|
20
52
|
return cached;
|
|
21
53
|
}
|
|
22
54
|
}
|
|
@@ -32,7 +64,11 @@ export class QualityAnalyzer {
|
|
|
32
64
|
});
|
|
33
65
|
process.on("close", (code) => {
|
|
34
66
|
if (code !== 0) {
|
|
35
|
-
|
|
67
|
+
const errorDetails = `Quality Analyzer failed (code ${code})
|
|
68
|
+
Command: ${this.pythonPath} ${this.scriptPath} ${filePath}
|
|
69
|
+
Script path exists: ${fs.existsSync(this.scriptPath)}
|
|
70
|
+
Error output: ${stderr}`;
|
|
71
|
+
reject(new Error(errorDetails));
|
|
36
72
|
return;
|
|
37
73
|
}
|
|
38
74
|
try {
|
|
@@ -1,10 +1,36 @@
|
|
|
1
1
|
import { spawn } from "child_process";
|
|
2
2
|
import path from "path";
|
|
3
|
+
import fs from "fs";
|
|
3
4
|
export class ImageAnalyzer {
|
|
4
5
|
pythonPath = "python";
|
|
5
6
|
scriptPath;
|
|
6
|
-
constructor(
|
|
7
|
-
|
|
7
|
+
constructor(buildDir = process.cwd()) {
|
|
8
|
+
// buildDir is the directory containing the compiled JS (e.g., build/)
|
|
9
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || buildDir;
|
|
10
|
+
const dataRoot = path.join(homeDir, ".vesper");
|
|
11
|
+
const scriptPath0 = path.resolve(dataRoot, "python", "image_engine.py");
|
|
12
|
+
const scriptPath1 = path.resolve(buildDir, "python", "image_engine.py");
|
|
13
|
+
const scriptPath2 = path.resolve(buildDir, "..", "src", "python", "image_engine.py");
|
|
14
|
+
const scriptPath3 = path.resolve(buildDir, "..", "python", "image_engine.py");
|
|
15
|
+
if (fs.existsSync(scriptPath0)) {
|
|
16
|
+
this.scriptPath = scriptPath0;
|
|
17
|
+
}
|
|
18
|
+
else if (fs.existsSync(scriptPath1)) {
|
|
19
|
+
this.scriptPath = scriptPath1;
|
|
20
|
+
}
|
|
21
|
+
else if (fs.existsSync(scriptPath2)) {
|
|
22
|
+
this.scriptPath = scriptPath2;
|
|
23
|
+
}
|
|
24
|
+
else if (fs.existsSync(scriptPath3)) {
|
|
25
|
+
this.scriptPath = scriptPath3;
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
this.scriptPath = scriptPath0;
|
|
29
|
+
}
|
|
30
|
+
// Detect Python command (Windows may use 'py' instead of 'python')
|
|
31
|
+
if (process.platform === "win32") {
|
|
32
|
+
this.pythonPath = "py";
|
|
33
|
+
}
|
|
8
34
|
}
|
|
9
35
|
/**
|
|
10
36
|
* Analyze image quality for a single file or a directory
|
|
@@ -23,22 +49,64 @@ export class ImageAnalyzer {
|
|
|
23
49
|
process.stderr.on("data", (data) => {
|
|
24
50
|
stderr += data.toString();
|
|
25
51
|
});
|
|
52
|
+
process.on("error", (err) => {
|
|
53
|
+
if (err.code === "ENOENT") {
|
|
54
|
+
// Python not found - return a graceful failure report
|
|
55
|
+
resolve({
|
|
56
|
+
total_images: 1,
|
|
57
|
+
ok_images: 0,
|
|
58
|
+
failed_images: 1,
|
|
59
|
+
details: [{
|
|
60
|
+
status: "error",
|
|
61
|
+
error: "Python not installed or not in PATH. Please install Python to use image analysis features."
|
|
62
|
+
}]
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
reject(new Error(`Failed to start image analysis process: ${err.message}`));
|
|
67
|
+
}
|
|
68
|
+
});
|
|
26
69
|
process.on("close", (code) => {
|
|
27
70
|
if (code !== 0) {
|
|
28
|
-
|
|
71
|
+
// Handle case where script fails
|
|
72
|
+
resolve({
|
|
73
|
+
total_images: 1,
|
|
74
|
+
ok_images: 0,
|
|
75
|
+
failed_images: 1,
|
|
76
|
+
details: [{
|
|
77
|
+
status: "error",
|
|
78
|
+
error: `Image Analyzer process failed (code ${code}): ${stderr || "Unknown error"}`
|
|
79
|
+
}]
|
|
80
|
+
});
|
|
29
81
|
return;
|
|
30
82
|
}
|
|
31
83
|
try {
|
|
32
84
|
const result = JSON.parse(stdout);
|
|
33
85
|
if (result.error) {
|
|
34
|
-
|
|
86
|
+
resolve({
|
|
87
|
+
total_images: 1,
|
|
88
|
+
ok_images: 0,
|
|
89
|
+
failed_images: 1,
|
|
90
|
+
details: [{
|
|
91
|
+
status: "error",
|
|
92
|
+
error: result.error
|
|
93
|
+
}]
|
|
94
|
+
});
|
|
35
95
|
}
|
|
36
96
|
else {
|
|
37
97
|
resolve(result);
|
|
38
98
|
}
|
|
39
99
|
}
|
|
40
100
|
catch (e) {
|
|
41
|
-
|
|
101
|
+
resolve({
|
|
102
|
+
total_images: 1,
|
|
103
|
+
ok_images: 0,
|
|
104
|
+
failed_images: 1,
|
|
105
|
+
details: [{
|
|
106
|
+
status: "error",
|
|
107
|
+
error: `Failed to parse image analyzer output: ${stdout}`
|
|
108
|
+
}]
|
|
109
|
+
});
|
|
42
110
|
}
|
|
43
111
|
});
|
|
44
112
|
});
|
|
@@ -1,10 +1,36 @@
|
|
|
1
1
|
import { spawn } from "child_process";
|
|
2
2
|
import path from "path";
|
|
3
|
+
import fs from "fs";
|
|
3
4
|
export class MediaAnalyzer {
|
|
4
5
|
pythonPath = "python";
|
|
5
6
|
scriptPath;
|
|
6
|
-
constructor(
|
|
7
|
-
|
|
7
|
+
constructor(buildDir = process.cwd()) {
|
|
8
|
+
// buildDir is the directory containing the compiled JS (e.g., build/)
|
|
9
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || buildDir;
|
|
10
|
+
const dataRoot = path.join(homeDir, ".vesper");
|
|
11
|
+
const scriptPath0 = path.resolve(dataRoot, "python", "media_engine.py");
|
|
12
|
+
const scriptPath1 = path.resolve(buildDir, "python", "media_engine.py");
|
|
13
|
+
const scriptPath2 = path.resolve(buildDir, "..", "src", "python", "media_engine.py");
|
|
14
|
+
const scriptPath3 = path.resolve(buildDir, "..", "python", "media_engine.py");
|
|
15
|
+
if (fs.existsSync(scriptPath0)) {
|
|
16
|
+
this.scriptPath = scriptPath0;
|
|
17
|
+
}
|
|
18
|
+
else if (fs.existsSync(scriptPath1)) {
|
|
19
|
+
this.scriptPath = scriptPath1;
|
|
20
|
+
}
|
|
21
|
+
else if (fs.existsSync(scriptPath2)) {
|
|
22
|
+
this.scriptPath = scriptPath2;
|
|
23
|
+
}
|
|
24
|
+
else if (fs.existsSync(scriptPath3)) {
|
|
25
|
+
this.scriptPath = scriptPath3;
|
|
26
|
+
}
|
|
27
|
+
else {
|
|
28
|
+
this.scriptPath = scriptPath0;
|
|
29
|
+
}
|
|
30
|
+
// Detect Python command (Windows may use 'py' instead of 'python')
|
|
31
|
+
if (process.platform === "win32") {
|
|
32
|
+
this.pythonPath = "py";
|
|
33
|
+
}
|
|
8
34
|
}
|
|
9
35
|
/**
|
|
10
36
|
* Analyze audio/video quality for a single file or a directory
|
|
@@ -23,22 +49,65 @@ export class MediaAnalyzer {
|
|
|
23
49
|
process.stderr.on("data", (data) => {
|
|
24
50
|
stderr += data.toString();
|
|
25
51
|
});
|
|
52
|
+
process.on("error", (err) => {
|
|
53
|
+
if (err.code === "ENOENT") {
|
|
54
|
+
// Python not found - return a graceful failure report
|
|
55
|
+
resolve({
|
|
56
|
+
total_files: 1,
|
|
57
|
+
ok_files: 0,
|
|
58
|
+
failed_files: 1,
|
|
59
|
+
details: [{
|
|
60
|
+
status: "error",
|
|
61
|
+
error: "Python not installed or not in PATH. Please install Python to use media analysis features."
|
|
62
|
+
}]
|
|
63
|
+
});
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
reject(new Error(`Failed to start media analysis process: ${err.message}`));
|
|
67
|
+
}
|
|
68
|
+
});
|
|
26
69
|
process.on("close", (code) => {
|
|
27
70
|
if (code !== 0) {
|
|
28
|
-
|
|
71
|
+
// Handle case where python exists but script fails
|
|
72
|
+
resolve({
|
|
73
|
+
total_files: 1,
|
|
74
|
+
ok_files: 0,
|
|
75
|
+
failed_files: 1,
|
|
76
|
+
details: [{
|
|
77
|
+
status: "error",
|
|
78
|
+
error: `Media Analyzer process failed (code ${code}): ${stderr || "Unknown error"}`
|
|
79
|
+
}]
|
|
80
|
+
});
|
|
29
81
|
return;
|
|
30
82
|
}
|
|
31
83
|
try {
|
|
32
84
|
const result = JSON.parse(stdout);
|
|
33
85
|
if (result.error) {
|
|
34
|
-
|
|
86
|
+
// Return error as part of report instead of rejecting
|
|
87
|
+
resolve({
|
|
88
|
+
total_files: 1,
|
|
89
|
+
ok_files: 0,
|
|
90
|
+
failed_files: 1,
|
|
91
|
+
details: [{
|
|
92
|
+
status: "error",
|
|
93
|
+
error: result.error
|
|
94
|
+
}]
|
|
95
|
+
});
|
|
35
96
|
}
|
|
36
97
|
else {
|
|
37
98
|
resolve(result);
|
|
38
99
|
}
|
|
39
100
|
}
|
|
40
101
|
catch (e) {
|
|
41
|
-
|
|
102
|
+
resolve({
|
|
103
|
+
total_files: 1,
|
|
104
|
+
ok_files: 0,
|
|
105
|
+
failed_files: 1,
|
|
106
|
+
details: [{
|
|
107
|
+
status: "error",
|
|
108
|
+
error: `Failed to parse media analyzer output: ${stdout}`
|
|
109
|
+
}]
|
|
110
|
+
});
|
|
42
111
|
}
|
|
43
112
|
});
|
|
44
113
|
});
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Cleanup script to remove all Kaggle datasets from Vesper
|
|
4
|
+
*/
|
|
5
|
+
import { fileURLToPath } from "url";
|
|
6
|
+
import path from "path";
|
|
7
|
+
import { MetadataStore } from "../metadata/store.js";
|
|
8
|
+
import { VectorStore } from "../search/vector-store.js";
|
|
9
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
10
|
+
const __dirname = path.dirname(__filename);
|
|
11
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || path.join(__dirname, "..");
|
|
12
|
+
const dataRoot = path.join(homeDir, ".vesper");
|
|
13
|
+
const dbPath = path.join(dataRoot, "data", "metadata.db");
|
|
14
|
+
const vectorPath = path.join(dataRoot, "data", "vectors.json");
|
|
15
|
+
console.log("🧹 Vesper Kaggle Cleanup");
|
|
16
|
+
console.log("========================\n");
|
|
17
|
+
try {
|
|
18
|
+
const metadataStore = new MetadataStore(dbPath);
|
|
19
|
+
const vectorStore = new VectorStore(vectorPath);
|
|
20
|
+
// Get all Kaggle dataset IDs
|
|
21
|
+
const kaggleIds = metadataStore.getDatasetIdsBySource("kaggle");
|
|
22
|
+
console.log(`Found ${kaggleIds.length} Kaggle datasets in database`);
|
|
23
|
+
if (kaggleIds.length === 0) {
|
|
24
|
+
console.log("✅ No Kaggle datasets to remove");
|
|
25
|
+
process.exit(0);
|
|
26
|
+
}
|
|
27
|
+
// Delete from vector store
|
|
28
|
+
const vectorsDeleted = vectorStore.deleteMany(kaggleIds);
|
|
29
|
+
console.log(`🗑️ Deleted ${vectorsDeleted} vectors from vector store`);
|
|
30
|
+
vectorStore.save();
|
|
31
|
+
// Delete from metadata database
|
|
32
|
+
const datasetsDeleted = metadataStore.deleteBySource("kaggle");
|
|
33
|
+
console.log(`🗑️ Deleted ${datasetsDeleted} datasets from metadata database`);
|
|
34
|
+
metadataStore.close();
|
|
35
|
+
console.log("\n✅ Cleanup complete! Kaggle datasets have been removed.");
|
|
36
|
+
console.log(" You can now search without seeing Kaggle results.");
|
|
37
|
+
}
|
|
38
|
+
catch (error) {
|
|
39
|
+
console.error("❌ Cleanup failed:", error.message);
|
|
40
|
+
process.exit(1);
|
|
41
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { JobManager } from "../jobs/manager.js";
|
|
2
|
+
import { MetadataStore } from "../metadata/store.js";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
async function repro() {
|
|
5
|
+
const dbPath = "repro_test.db";
|
|
6
|
+
if (fs.existsSync(dbPath))
|
|
7
|
+
fs.unlinkSync(dbPath);
|
|
8
|
+
const store = new MetadataStore(dbPath);
|
|
9
|
+
const jobManager = JobManager.getInstance(store);
|
|
10
|
+
console.log("Setting up listener...");
|
|
11
|
+
jobManager.on("processJob", async (job, execute) => {
|
|
12
|
+
console.log(`Listener received job ${job.id}`);
|
|
13
|
+
const task = async () => {
|
|
14
|
+
console.log("Running task...");
|
|
15
|
+
return "success";
|
|
16
|
+
};
|
|
17
|
+
try {
|
|
18
|
+
await execute(task);
|
|
19
|
+
console.log("Execute finished");
|
|
20
|
+
}
|
|
21
|
+
catch (e) {
|
|
22
|
+
console.error("Execute failed in listener:", e.message);
|
|
23
|
+
}
|
|
24
|
+
});
|
|
25
|
+
console.log("Creating job...");
|
|
26
|
+
const job = jobManager.createJob("prepare", 0, { query: "test" });
|
|
27
|
+
console.log(`Job created: ${job.id}`);
|
|
28
|
+
// Wait for a bit
|
|
29
|
+
await new Promise(r => setTimeout(r, 2000));
|
|
30
|
+
const finalJob = store.getJob(job.id);
|
|
31
|
+
console.log("Final job status:", finalJob?.status);
|
|
32
|
+
console.log("Final job status text:", finalJob?.status_text);
|
|
33
|
+
store.close();
|
|
34
|
+
if (fs.existsSync(dbPath))
|
|
35
|
+
fs.unlinkSync(dbPath);
|
|
36
|
+
}
|
|
37
|
+
repro().catch(console.error);
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import fs from "fs";
|
|
3
|
+
import { spawnSync } from "child_process";
|
|
4
|
+
const pythonPath = "python";
|
|
5
|
+
const scriptPath = path.join(process.cwd(), "src", "python", "cleaner.py");
|
|
6
|
+
const testDir = path.join(process.cwd(), "test_repro");
|
|
7
|
+
if (!fs.existsSync(testDir))
|
|
8
|
+
fs.mkdirSync(testDir);
|
|
9
|
+
async function runRepro() {
|
|
10
|
+
console.log("=== Reproducing CSV Export Bug ===\n");
|
|
11
|
+
const parquetFile = path.join(testDir, "test_nested.parquet");
|
|
12
|
+
const csvOutput = path.join(testDir, "test_nested_cleaned.csv");
|
|
13
|
+
// 1. Create a Parquet file with nested data (Lists/Structs) using Python
|
|
14
|
+
console.log("Creating nested Parquet file...");
|
|
15
|
+
const createScript = `
|
|
16
|
+
import polars as pl
|
|
17
|
+
df = pl.DataFrame({
|
|
18
|
+
"id": [1, 2, 3],
|
|
19
|
+
"tags": [["a", "b"], ["c"], []],
|
|
20
|
+
"meta": [{"score": 0.9, "safe": True}, {"score": 0.4, "safe": False}, {"score": 0.1, "safe": True}]
|
|
21
|
+
})
|
|
22
|
+
df.write_parquet(r"${parquetFile}")
|
|
23
|
+
`;
|
|
24
|
+
fs.writeFileSync(path.join(testDir, "create_data.py"), createScript);
|
|
25
|
+
spawnSync(pythonPath, [path.join(testDir, "create_data.py")], { stdio: 'inherit' });
|
|
26
|
+
// 2. Call cleaner.py to convert to CSV
|
|
27
|
+
console.log("Calling cleaner.py to convert to CSV...");
|
|
28
|
+
const result = spawnSync(pythonPath, [
|
|
29
|
+
scriptPath,
|
|
30
|
+
parquetFile,
|
|
31
|
+
"[]",
|
|
32
|
+
"csv"
|
|
33
|
+
]);
|
|
34
|
+
console.log("Exit Code:", result.status);
|
|
35
|
+
console.log("Stdout:", result.stdout?.toString());
|
|
36
|
+
console.log("Stderr:", result.stderr?.toString());
|
|
37
|
+
if (result.status === 0) {
|
|
38
|
+
try {
|
|
39
|
+
const data = JSON.parse(result.stdout.toString());
|
|
40
|
+
if (data.success) {
|
|
41
|
+
console.log("SUCCESS! Output file:", data.output_path);
|
|
42
|
+
if (fs.existsSync(data.output_path)) {
|
|
43
|
+
console.log("File exists on disk.");
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
else {
|
|
47
|
+
console.error("cleaner.py reported failure:", data.error);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
catch (e) {
|
|
51
|
+
console.error("Failed to parse JSON output:", e);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
runRepro().catch(console.error);
|
|
56
|
+
runRepro().catch(console.error);
|
|
@@ -47,16 +47,18 @@ async function testPhase5Tools() {
|
|
|
47
47
|
// Create job (Logic from index.ts)
|
|
48
48
|
const job = jobManager.createJob("prepare", 0, { query });
|
|
49
49
|
console.log(` - Job Created: ${job.id}`);
|
|
50
|
-
//
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
await
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
50
|
+
// Register listener for simulated work (Logic from index.ts)
|
|
51
|
+
jobManager.on("processJob", async (currJob, execute) => {
|
|
52
|
+
if (currJob.id !== job.id)
|
|
53
|
+
return;
|
|
54
|
+
await execute(async () => {
|
|
55
|
+
console.log(" - [Worker] Starting autonomous preparation task...");
|
|
56
|
+
await new Promise(r => setTimeout(r, 1000));
|
|
57
|
+
console.log(" - [Worker] Phase 1: Search complete");
|
|
58
|
+
await new Promise(r => setTimeout(r, 1000));
|
|
59
|
+
console.log(" - [Worker] Phase 2: Quality analysis complete");
|
|
60
|
+
return "data/exports/prepared_dataset.parquet";
|
|
61
|
+
});
|
|
60
62
|
});
|
|
61
63
|
// 4. Test check_job_status (Polling)
|
|
62
64
|
console.log("\nStep 4: Polling Job Status (Simulating UI Check)...");
|
|
@@ -67,7 +69,6 @@ async function testPhase5Tools() {
|
|
|
67
69
|
break;
|
|
68
70
|
await new Promise(r => setTimeout(r, 800));
|
|
69
71
|
}
|
|
70
|
-
await jobPromise;
|
|
71
72
|
console.log("\n Phase 5 tools logic verified.");
|
|
72
73
|
}
|
|
73
74
|
testPhase5Tools().catch(console.error);
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { JobManager } from "../jobs/manager.js";
|
|
2
|
+
import { MetadataStore } from "../metadata/store.js";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
async function testSync() {
|
|
5
|
+
console.log("Starting Production Sync Test...");
|
|
6
|
+
const dbPath = "prod_sync.db";
|
|
7
|
+
if (fs.existsSync(dbPath))
|
|
8
|
+
fs.unlinkSync(dbPath);
|
|
9
|
+
const store = new MetadataStore(dbPath);
|
|
10
|
+
const jobManager = JobManager.getInstance(store);
|
|
11
|
+
console.log("Attaching listener (same as index.ts)...");
|
|
12
|
+
jobManager.on("processJob", async (job, execute) => {
|
|
13
|
+
console.log(`[Listener 1] Received job ${job.id}`);
|
|
14
|
+
if (typeof execute !== 'function') {
|
|
15
|
+
console.error(`[Listener 1] ERROR: execute is not a function! It is: ${typeof execute}`);
|
|
16
|
+
return;
|
|
17
|
+
}
|
|
18
|
+
const task = async () => {
|
|
19
|
+
console.log("[Listener 1] Task running...");
|
|
20
|
+
return "ok";
|
|
21
|
+
};
|
|
22
|
+
await execute(task);
|
|
23
|
+
console.log("[Listener 1] Task finished.");
|
|
24
|
+
});
|
|
25
|
+
console.log("Emitting job...");
|
|
26
|
+
const job = jobManager.createJob("prepare", 0, { query: "test" });
|
|
27
|
+
// Wait for the background loop
|
|
28
|
+
await new Promise(r => setTimeout(r, 1000));
|
|
29
|
+
const finalJob = store.getJob(job.id);
|
|
30
|
+
console.log(`Job Result: ${finalJob?.status} - ${finalJob?.status_text}`);
|
|
31
|
+
store.close();
|
|
32
|
+
if (fs.existsSync(dbPath))
|
|
33
|
+
fs.unlinkSync(dbPath);
|
|
34
|
+
console.log("Test Complete.");
|
|
35
|
+
}
|
|
36
|
+
testSync().catch(console.error);
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { TargetDetector } from "../preparation/target-detector.js";
|
|
2
|
+
import path from "path";
|
|
3
|
+
async function testDetector() {
|
|
4
|
+
// 1. Test existing build dir
|
|
5
|
+
const detector = new TargetDetector(path.join(process.cwd(), "build"));
|
|
6
|
+
// 2. Create a dummy CSV for testing
|
|
7
|
+
const testFile = path.join(process.cwd(), "test_target.csv");
|
|
8
|
+
const fs = (await import("fs")).default;
|
|
9
|
+
// Test Case 1: SalePrice (Regression)
|
|
10
|
+
console.log("--- Test Case 1: SalePrice ---");
|
|
11
|
+
fs.writeFileSync(testFile, "id,feature1,feature2,SalePrice\n1,10,20,100000\n2,11,21,120000\n3,12,22,110000");
|
|
12
|
+
let result = await detector.detectTarget(testFile);
|
|
13
|
+
console.log("Detection:", result.target_column, result.confidence);
|
|
14
|
+
if (result.target_column) {
|
|
15
|
+
let val = await detector.validateTarget(testFile, result.target_column);
|
|
16
|
+
console.log("Validation:", val.problem_type, val.valid);
|
|
17
|
+
}
|
|
18
|
+
// Test Case 2: diagnosis (Classification)
|
|
19
|
+
console.log("\n--- Test Case 2: diagnosis ---");
|
|
20
|
+
fs.writeFileSync(testFile, "id,age,diagnosis\n1,50,M\n2,60,B\n3,45,M");
|
|
21
|
+
result = await detector.detectTarget(testFile);
|
|
22
|
+
console.log("Detection:", result.target_column, result.confidence);
|
|
23
|
+
if (result.target_column) {
|
|
24
|
+
let val = await detector.validateTarget(testFile, result.target_column);
|
|
25
|
+
console.log("Validation:", val.problem_type, val.valid);
|
|
26
|
+
}
|
|
27
|
+
fs.unlinkSync(testFile);
|
|
28
|
+
}
|
|
29
|
+
testDetector().catch(console.error);
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
const target = "C:\\Users\\нурбулан\\AppData\\Roaming\\Code\\User\\mcp.json";
|
|
3
|
+
const content = JSON.stringify({ mcpServers: { test: { command: "node" } } }, null, 2);
|
|
4
|
+
try {
|
|
5
|
+
console.log(`Testing write to: ${target}`);
|
|
6
|
+
fs.writeFileSync(target, content, "utf8");
|
|
7
|
+
const stat = fs.statSync(target);
|
|
8
|
+
console.log(`Success! File size: ${stat.size} bytes`);
|
|
9
|
+
const readBack = fs.readFileSync(target, "utf8");
|
|
10
|
+
console.log("Read back content:", readBack);
|
|
11
|
+
}
|
|
12
|
+
catch (e) {
|
|
13
|
+
console.error("Failed to write:", e);
|
|
14
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { TargetDetector } from "../preparation/target-detector.js";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
async function verifyIntegration() {
|
|
5
|
+
// 1. Create a dummy CSV with a clear target
|
|
6
|
+
const testFile = path.join(process.cwd(), "data", "raw", "integration_test.csv");
|
|
7
|
+
const testId = "integration_test";
|
|
8
|
+
if (!fs.existsSync(path.dirname(testFile))) {
|
|
9
|
+
fs.mkdirSync(path.dirname(testFile), { recursive: true });
|
|
10
|
+
}
|
|
11
|
+
console.log("Creating test file:", testFile);
|
|
12
|
+
fs.writeFileSync(testFile, "id,feature1,feature2,SalePrice\n1,10,20,100000\n2,11,21,100000\n3,12,22,110000");
|
|
13
|
+
// 2. Call the preview_cleaning tool (simulated by calling valid request handler logic or via MCP client if possible)
|
|
14
|
+
// Since we can't easily call the MCP server from here without a client, we will simulate
|
|
15
|
+
// the logic we added to index.ts to ensure it runs without error.
|
|
16
|
+
try {
|
|
17
|
+
const { QualityAnalyzer } = await import("../quality/analyzer.js");
|
|
18
|
+
const { CleaningPlanner } = await import("../cleaning/planner.js");
|
|
19
|
+
const { CacheService, MockRedisProvider } = await import("../cache/service.js");
|
|
20
|
+
console.log("Initializing services...");
|
|
21
|
+
const cacheService = new CacheService(new MockRedisProvider());
|
|
22
|
+
// Use build/ directory to simulate runtime environment
|
|
23
|
+
const buildDir = path.join(process.cwd(), "build");
|
|
24
|
+
const qualityAnalyzer = new QualityAnalyzer(cacheService, buildDir);
|
|
25
|
+
const cleaningPlanner = new CleaningPlanner(cacheService, buildDir);
|
|
26
|
+
console.log("Running analysis...");
|
|
27
|
+
const report = await qualityAnalyzer.analyze(testFile);
|
|
28
|
+
console.log("Running target detection...");
|
|
29
|
+
const detector = new TargetDetector(buildDir);
|
|
30
|
+
const targetResult = await detector.detectTarget(testFile);
|
|
31
|
+
console.log("Detected:", targetResult);
|
|
32
|
+
const targetInfo = targetResult.target_column ? {
|
|
33
|
+
target: targetResult.target_column,
|
|
34
|
+
confidence: targetResult.confidence
|
|
35
|
+
} : undefined;
|
|
36
|
+
console.log("Generating plan...");
|
|
37
|
+
const plan = await cleaningPlanner.generatePlan(testId, report, undefined, targetInfo);
|
|
38
|
+
console.log("Plan Operations:", JSON.stringify(plan.operations, null, 2));
|
|
39
|
+
const hasRename = plan.operations.some(op => op.type === "RenameTarget");
|
|
40
|
+
if (hasRename) {
|
|
41
|
+
console.log("✅ SUCCESS: RenameTarget operation found in plan!");
|
|
42
|
+
}
|
|
43
|
+
else {
|
|
44
|
+
console.error("❌ FAILURE: RenameTarget operation NOT found.");
|
|
45
|
+
process.exit(1);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
catch (e) {
|
|
49
|
+
console.error("Error during verification:", e);
|
|
50
|
+
process.exit(1);
|
|
51
|
+
}
|
|
52
|
+
finally {
|
|
53
|
+
if (fs.existsSync(testFile))
|
|
54
|
+
fs.unlinkSync(testFile);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
verifyIntegration();
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import path from "path";
|
|
2
|
+
import { MetadataStore } from "../metadata/store.js";
|
|
3
|
+
import { VectorStore } from "../search/vector-store.js";
|
|
4
|
+
import { Embedder } from "../search/embedder.js";
|
|
5
|
+
import { SearchEngine } from "../search/engine.js";
|
|
6
|
+
import { formatSearchResults } from "../tools/formatter.js";
|
|
7
|
+
import fs from "fs";
|
|
8
|
+
const query = process.argv[2] || "anime";
|
|
9
|
+
// Use the actual .vesper data path if it exists, otherwise use local data/
|
|
10
|
+
const homeDir = process.env.HOME || process.env.USERPROFILE || process.cwd();
|
|
11
|
+
const vesperDataRoot = path.join(homeDir, ".vesper");
|
|
12
|
+
let dbPath = path.join(vesperDataRoot, "data", "metadata.db");
|
|
13
|
+
let vectorPath = path.join(vesperDataRoot, "data", "vectors.json");
|
|
14
|
+
if (!fs.existsSync(dbPath)) {
|
|
15
|
+
console.error("Using local project data directory as fallback...");
|
|
16
|
+
dbPath = path.join(process.cwd(), "data", "metadata.db");
|
|
17
|
+
vectorPath = path.join(process.cwd(), "data", "vectors.json");
|
|
18
|
+
}
|
|
19
|
+
const metadataStore = new MetadataStore(dbPath);
|
|
20
|
+
const vectorStore = new VectorStore(vectorPath);
|
|
21
|
+
const embedder = Embedder.getInstance();
|
|
22
|
+
const searchEngine = new SearchEngine(metadataStore, vectorStore, embedder);
|
|
23
|
+
async function run() {
|
|
24
|
+
console.log(`\n=== VERIFYING SOURCE PRIORITIZATION [Query: "${query}"] ===\n`);
|
|
25
|
+
const results = await searchEngine.search(query, { limit: 5 });
|
|
26
|
+
if (results.length === 0) {
|
|
27
|
+
console.log("No results found. Run a search that triggers JIT first!");
|
|
28
|
+
return;
|
|
29
|
+
}
|
|
30
|
+
// Print formatted results to show badges
|
|
31
|
+
console.log(formatSearchResults(results));
|
|
32
|
+
}
|
|
33
|
+
run().catch(console.error);
|
package/build/search/engine.js
CHANGED
|
@@ -96,12 +96,23 @@ export class SearchEngine {
|
|
|
96
96
|
if (lexicalScore === 0 && positiveKeywords.length > 1) {
|
|
97
97
|
penalty += 0.2;
|
|
98
98
|
}
|
|
99
|
+
// D. Accessibility Bonuses (Prioritize low-friction sources)
|
|
100
|
+
let bonus = 0;
|
|
101
|
+
const sourceBonuses = {
|
|
102
|
+
"huggingface": 0.1,
|
|
103
|
+
"uci": 0.1,
|
|
104
|
+
"github": 0.1,
|
|
105
|
+
"worldbank": 0.1,
|
|
106
|
+
"nasa": 0.1
|
|
107
|
+
};
|
|
108
|
+
bonus = sourceBonuses[metadata.source] || 0;
|
|
99
109
|
// Final Combined Score
|
|
100
|
-
// 70% Vector, 30% Lexical, minus Penalties
|
|
101
|
-
const finalScore = (vectorScore * 0.7) + (lexicalScore * 0.3) - penalty;
|
|
110
|
+
// 70% Vector, 30% Lexical, minus Penalties, plus Bonuses
|
|
111
|
+
const finalScore = (vectorScore * 0.7) + (lexicalScore * 0.3) - penalty + bonus;
|
|
102
112
|
metadata.relevance_score = Math.round(finalScore * 100) / 100;
|
|
103
113
|
metadata.vector_score = Math.round(vectorScore * 100) / 100;
|
|
104
114
|
metadata.lexical_score = Math.round(lexicalScore * 100) / 100;
|
|
115
|
+
metadata.accessibility_bonus = bonus;
|
|
105
116
|
results.push(metadata);
|
|
106
117
|
}
|
|
107
118
|
// Sort by final score and limit
|