@vespermcp/mcp-server 1.1.3 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +34 -0
- package/build/config/secure-keys.js +51 -0
- package/build/config/user-config.js +48 -0
- package/build/fusion/engine.js +69 -0
- package/build/index.js +900 -50
- package/build/ingestion/hf-downloader.js +12 -3
- package/build/ingestion/ingestor.js +33 -9
- package/build/ingestion/kaggle-downloader.js +2 -2
- package/build/metadata/kaggle-source.js +70 -0
- package/build/metadata/scraper.js +34 -10
- package/build/python/config.py +259 -0
- package/build/python/export_engine.py +148 -52
- package/build/python/fusion_engine.py +368 -0
- package/build/python/kaggle_engine.py +204 -0
- package/build/python/row_count.py +54 -0
- package/build/python/test_fusion_engine.py +89 -0
- package/build/scripts/build-index.js +5 -5
- package/build/search/jit-orchestrator.js +72 -12
- package/build/tools/formatter.js +14 -14
- package/package.json +9 -3
- package/scripts/refresh-index.cjs +87 -0
- package/src/python/__pycache__/export_engine.cpython-312.pyc +0 -0
- package/src/python/__pycache__/fusion_engine.cpython-312.pyc +0 -0
- package/src/python/config.py +259 -0
- package/src/python/export_engine.py +148 -52
- package/src/python/fusion_engine.py +368 -0
- package/src/python/kaggle_engine.py +204 -0
- package/src/python/row_count.py +54 -0
- package/src/python/test_fusion_engine.py +89 -0
|
@@ -8,12 +8,16 @@ export class HFDownloader {
|
|
|
8
8
|
this.hfToken = token || process.env.HF_TOKEN || process.env.HUGGINGFACE_TOKEN;
|
|
9
9
|
this.downloader = new RobustDownloader();
|
|
10
10
|
}
|
|
11
|
+
getToken() {
|
|
12
|
+
return this.hfToken || process.env.HF_TOKEN || process.env.HUGGINGFACE_TOKEN;
|
|
13
|
+
}
|
|
11
14
|
/**
|
|
12
15
|
* Finds the most suitable data file in a repository
|
|
13
16
|
* Returns the relative path within the repo
|
|
14
17
|
*/
|
|
15
18
|
async findBestFile(repoId) {
|
|
16
19
|
try {
|
|
20
|
+
const token = this.getToken();
|
|
17
21
|
const files = [];
|
|
18
22
|
const blacklist = [
|
|
19
23
|
".gitattributes",
|
|
@@ -28,7 +32,7 @@ export class HFDownloader {
|
|
|
28
32
|
for await (const file of listFiles({
|
|
29
33
|
repo: { type: "dataset", name: repoId },
|
|
30
34
|
recursive: true,
|
|
31
|
-
...(
|
|
35
|
+
...(token ? { accessToken: token } : {})
|
|
32
36
|
})) {
|
|
33
37
|
if (file.type === "file") {
|
|
34
38
|
const fileName = path.basename(file.path);
|
|
@@ -62,7 +66,11 @@ export class HFDownloader {
|
|
|
62
66
|
return fallback || null;
|
|
63
67
|
}
|
|
64
68
|
catch (error) {
|
|
65
|
-
|
|
69
|
+
const msg = String(error?.message || error);
|
|
70
|
+
if (msg.includes("401") || msg.includes("403") || msg.toLowerCase().includes("unauthorized")) {
|
|
71
|
+
throw new Error("Hugging Face gated/private dataset requires token. Run 'vespermcp config keys' to set HF token.");
|
|
72
|
+
}
|
|
73
|
+
console.error(`[HF] Failed to list files for ${repoId}:`, msg);
|
|
66
74
|
return null;
|
|
67
75
|
}
|
|
68
76
|
}
|
|
@@ -70,9 +78,10 @@ export class HFDownloader {
|
|
|
70
78
|
* Downloads a file from HF to local path
|
|
71
79
|
*/
|
|
72
80
|
async download(repoId, filePath, targetPath, onProgress) {
|
|
81
|
+
const token = this.getToken();
|
|
73
82
|
const url = `https://huggingface.co/datasets/${repoId}/resolve/main/${filePath}`;
|
|
74
83
|
await this.downloader.download(url, targetPath, {
|
|
75
|
-
headers:
|
|
84
|
+
headers: token ? { 'Authorization': `Bearer ${token}` } : {},
|
|
76
85
|
resume: true,
|
|
77
86
|
onProgress: (bytes, total) => {
|
|
78
87
|
if (total > 0 && onProgress) {
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import path from "path";
|
|
2
2
|
import fs from "fs";
|
|
3
3
|
import { HFDownloader } from "./hf-downloader.js";
|
|
4
|
-
import {
|
|
4
|
+
import { KaggleSource } from "../metadata/kaggle-source.js";
|
|
5
|
+
import { SecureKeysManager } from "../config/secure-keys.js";
|
|
5
6
|
export class DataIngestor {
|
|
6
7
|
projectRoot;
|
|
7
8
|
store;
|
|
8
9
|
rawDataDir;
|
|
9
10
|
hfDownloader;
|
|
10
|
-
|
|
11
|
+
kaggleSource;
|
|
12
|
+
secureKeys;
|
|
11
13
|
constructor(projectRoot, store) {
|
|
12
14
|
this.projectRoot = projectRoot;
|
|
13
15
|
this.store = store;
|
|
@@ -16,19 +18,26 @@ export class DataIngestor {
|
|
|
16
18
|
fs.mkdirSync(this.rawDataDir, { recursive: true });
|
|
17
19
|
}
|
|
18
20
|
this.hfDownloader = new HFDownloader();
|
|
19
|
-
this.
|
|
21
|
+
this.kaggleSource = new KaggleSource();
|
|
22
|
+
this.secureKeys = new SecureKeysManager();
|
|
20
23
|
}
|
|
21
24
|
/**
|
|
22
25
|
* Check if Kaggle credentials are available
|
|
23
26
|
*/
|
|
24
27
|
hasKaggleCredentials() {
|
|
25
|
-
|
|
28
|
+
if (process.env.KAGGLE_USERNAME && process.env.KAGGLE_KEY)
|
|
29
|
+
return true;
|
|
30
|
+
const keys = this.secureKeys.getAll();
|
|
31
|
+
if (keys.kaggle_username && keys.kaggle_key)
|
|
32
|
+
return true;
|
|
33
|
+
const kaggleJsonPath = path.join(process.env.HOME || process.env.USERPROFILE || "", ".kaggle", "kaggle.json");
|
|
34
|
+
return !!(kaggleJsonPath && fs.existsSync(kaggleJsonPath));
|
|
26
35
|
}
|
|
27
36
|
/**
|
|
28
37
|
* Get helpful error message if Kaggle credentials are missing
|
|
29
38
|
*/
|
|
30
39
|
getKaggleCredentialError() {
|
|
31
|
-
return
|
|
40
|
+
return "Kaggle support requires API key. Run 'vespermcp config keys' (30 seconds), or provide ~/.kaggle/kaggle.json.";
|
|
32
41
|
}
|
|
33
42
|
/**
|
|
34
43
|
* Ensures a dataset is available locally
|
|
@@ -67,10 +76,25 @@ export class DataIngestor {
|
|
|
67
76
|
}
|
|
68
77
|
}
|
|
69
78
|
else if (source === "kaggle") {
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
79
|
+
if (!this.hasKaggleCredentials()) {
|
|
80
|
+
const errorMsg = this.getKaggleCredentialError();
|
|
81
|
+
this.failDownload(datasetId, errorMsg);
|
|
82
|
+
throw new Error(errorMsg);
|
|
83
|
+
}
|
|
84
|
+
const targetDir = path.join(this.rawDataDir, datasetId.replace(/\//g, "_"));
|
|
85
|
+
this.store.registerDownload(datasetId, targetDir, "downloading");
|
|
86
|
+
try {
|
|
87
|
+
onProgress?.("Downloading from Kaggle...");
|
|
88
|
+
const result = await this.kaggleSource.download(datasetId, targetDir);
|
|
89
|
+
const stats = fs.statSync(result.local_path);
|
|
90
|
+
this.completeDownload(datasetId, result.local_path, stats.size);
|
|
91
|
+
onProgress?.("Kaggle download complete", 100);
|
|
92
|
+
return result.local_path;
|
|
93
|
+
}
|
|
94
|
+
catch (e) {
|
|
95
|
+
this.failDownload(datasetId, e.message);
|
|
96
|
+
throw e;
|
|
97
|
+
}
|
|
74
98
|
}
|
|
75
99
|
throw new Error(`Download logic for ${source} not yet implemented`);
|
|
76
100
|
}
|
|
@@ -23,8 +23,8 @@ export class KaggleDownloader {
|
|
|
23
23
|
getCredentialError() {
|
|
24
24
|
if (!this.username && !this.key) {
|
|
25
25
|
return "Kaggle credentials missing. Please set KAGGLE_USERNAME and KAGGLE_KEY environment variables.\n" +
|
|
26
|
-
"
|
|
27
|
-
"
|
|
26
|
+
"Tip: Get your API token from https://www.kaggle.com/settings -> API -> Create New Token\n" +
|
|
27
|
+
"Alternative: Download the dataset manually and use analyze_quality() on local files.";
|
|
28
28
|
}
|
|
29
29
|
if (!this.username) {
|
|
30
30
|
return "KAGGLE_USERNAME is missing. Please set it in your MCP config or environment variables.";
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { spawn } from "child_process";
|
|
2
|
+
import path from "path";
|
|
3
|
+
import fs from "fs";
|
|
4
|
+
import os from "os";
|
|
5
|
+
export class KaggleSource {
|
|
6
|
+
pythonPath = "python";
|
|
7
|
+
scriptPath;
|
|
8
|
+
constructor(buildDir = process.cwd()) {
|
|
9
|
+
const homeDir = os.homedir() || process.env.HOME || process.env.USERPROFILE || buildDir;
|
|
10
|
+
const dataRoot = path.join(homeDir, ".vesper");
|
|
11
|
+
const scriptPath0 = path.resolve(dataRoot, "python", "kaggle_engine.py");
|
|
12
|
+
const scriptPath1 = path.resolve(buildDir, "python", "kaggle_engine.py");
|
|
13
|
+
const scriptPath2 = path.resolve(buildDir, "..", "src", "python", "kaggle_engine.py");
|
|
14
|
+
if (fs.existsSync(scriptPath0)) {
|
|
15
|
+
this.scriptPath = scriptPath0;
|
|
16
|
+
}
|
|
17
|
+
else if (fs.existsSync(scriptPath1)) {
|
|
18
|
+
this.scriptPath = scriptPath1;
|
|
19
|
+
}
|
|
20
|
+
else if (fs.existsSync(scriptPath2)) {
|
|
21
|
+
this.scriptPath = scriptPath2;
|
|
22
|
+
}
|
|
23
|
+
else {
|
|
24
|
+
this.scriptPath = scriptPath0;
|
|
25
|
+
}
|
|
26
|
+
if (process.platform === "win32")
|
|
27
|
+
this.pythonPath = "py";
|
|
28
|
+
}
|
|
29
|
+
async discover(query, limit = 20) {
|
|
30
|
+
const result = await this.run(["discover", query, String(limit)]);
|
|
31
|
+
if (!result.ok) {
|
|
32
|
+
throw new Error(result.error || "Kaggle discover failed");
|
|
33
|
+
}
|
|
34
|
+
return (result.results || []);
|
|
35
|
+
}
|
|
36
|
+
async download(datasetRef, targetDir) {
|
|
37
|
+
const args = ["download", datasetRef];
|
|
38
|
+
if (targetDir)
|
|
39
|
+
args.push(targetDir);
|
|
40
|
+
const result = await this.run(args);
|
|
41
|
+
if (!result.ok) {
|
|
42
|
+
throw new Error(result.error || "Kaggle download failed");
|
|
43
|
+
}
|
|
44
|
+
return {
|
|
45
|
+
local_path: result.local_path,
|
|
46
|
+
target_dir: result.target_dir,
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
async run(args) {
|
|
50
|
+
return new Promise((resolve, reject) => {
|
|
51
|
+
const processRef = spawn(this.pythonPath, [this.scriptPath, ...args]);
|
|
52
|
+
let stdout = "";
|
|
53
|
+
let stderr = "";
|
|
54
|
+
processRef.stdout.on("data", (d) => (stdout += d.toString()));
|
|
55
|
+
processRef.stderr.on("data", (d) => (stderr += d.toString()));
|
|
56
|
+
processRef.on("close", (code) => {
|
|
57
|
+
if (code !== 0) {
|
|
58
|
+
reject(new Error(stderr || stdout || `kaggle_engine exited with code ${code}`));
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
try {
|
|
62
|
+
resolve(JSON.parse(stdout));
|
|
63
|
+
}
|
|
64
|
+
catch {
|
|
65
|
+
reject(new Error(`Failed to parse kaggle_engine output: ${stdout}`));
|
|
66
|
+
}
|
|
67
|
+
});
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
@@ -93,15 +93,23 @@ export class HuggingFaceScraper {
|
|
|
93
93
|
const results = [];
|
|
94
94
|
let processed = 0;
|
|
95
95
|
let skippedMVP = 0;
|
|
96
|
+
let rateLimitHits = 0;
|
|
97
|
+
let otherErrors = 0;
|
|
96
98
|
try {
|
|
97
99
|
// Fetch more datasets to account for filtering
|
|
98
100
|
const fetchLimit = applyMVPFilters ? limit * 30 : limit * 10;
|
|
99
|
-
const CONCURRENCY = 25; // Increased for high-volume indexing
|
|
100
|
-
const queue = [];
|
|
101
101
|
// Support HuggingFace token from environment variable
|
|
102
102
|
const hfToken = process.env.HF_TOKEN || process.env.HUGGINGFACE_TOKEN;
|
|
103
|
+
// CRITICAL: Low concurrency without token to avoid rate limits
|
|
104
|
+
// With token: 10 concurrent (HF allows more)
|
|
105
|
+
// Without token: 2 concurrent (stay under ~30 req/min limit)
|
|
106
|
+
const CONCURRENCY = hfToken ? 10 : 2;
|
|
107
|
+
const queue = [];
|
|
108
|
+
if (!hfToken) {
|
|
109
|
+
console.error(`[HF Scraper] ⚠️ No HF_TOKEN set - using conservative rate limits`);
|
|
110
|
+
}
|
|
103
111
|
// Add delay between batches to avoid rate limits
|
|
104
|
-
const BATCH_DELAY =
|
|
112
|
+
const BATCH_DELAY = hfToken ? 500 : 2000;
|
|
105
113
|
for await (const ds of listDatasets({
|
|
106
114
|
limit: fetchLimit,
|
|
107
115
|
additionalFields: ["description", "tags"],
|
|
@@ -242,11 +250,20 @@ export class HuggingFaceScraper {
|
|
|
242
250
|
results.push(metadata);
|
|
243
251
|
}
|
|
244
252
|
catch (e) {
|
|
245
|
-
//
|
|
253
|
+
// Track all errors for user feedback
|
|
246
254
|
if (e?.status === 429 || e?.message?.includes('rate limit')) {
|
|
247
|
-
|
|
255
|
+
rateLimitHits++;
|
|
256
|
+
if (rateLimitHits <= 3) {
|
|
257
|
+
console.error(`[HF Scraper] Rate limit hit for ${repoId}`);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
else {
|
|
261
|
+
otherErrors++;
|
|
262
|
+
// Log first few non-rate-limit errors for debugging
|
|
263
|
+
if (otherErrors <= 2) {
|
|
264
|
+
console.error(`[HF Scraper] Error for ${repoId}: ${e.message?.slice(0, 80)}`);
|
|
265
|
+
}
|
|
248
266
|
}
|
|
249
|
-
// Silently skip other errors
|
|
250
267
|
}
|
|
251
268
|
})();
|
|
252
269
|
queue.push(processTask);
|
|
@@ -265,14 +282,21 @@ export class HuggingFaceScraper {
|
|
|
265
282
|
catch (e) {
|
|
266
283
|
// Handle rate limit errors with better messaging
|
|
267
284
|
if (e?.status === 429 || e?.message?.includes('rate limit')) {
|
|
268
|
-
console.error("Scraping failed due to rate limit:", e.message);
|
|
269
|
-
console.error("
|
|
285
|
+
console.error("[HF Scraper] ❌ Scraping failed due to rate limit:", e.message);
|
|
286
|
+
console.error("[HF Scraper] 💡 Set HF_TOKEN environment variable for unlimited access");
|
|
270
287
|
}
|
|
271
288
|
else {
|
|
272
|
-
console.error("Scraping failed
|
|
289
|
+
console.error("[HF Scraper] ❌ Scraping failed:", e.message);
|
|
273
290
|
}
|
|
274
291
|
}
|
|
275
|
-
|
|
292
|
+
// User-friendly summary
|
|
293
|
+
console.error(`[HF Scraper] ✅ Complete: ${results.length} datasets found`);
|
|
294
|
+
if (rateLimitHits > 0) {
|
|
295
|
+
console.error(`[HF Scraper] ⚠️ ${rateLimitHits} requests rate-limited. Set HF_TOKEN for better results.`);
|
|
296
|
+
}
|
|
297
|
+
if (otherErrors > 0) {
|
|
298
|
+
console.error(`[HF Scraper] ⚠️ ${otherErrors} datasets skipped due to errors`);
|
|
299
|
+
}
|
|
276
300
|
// Sort by downloads descending
|
|
277
301
|
return results.sort((a, b) => b.downloads - a.downloads);
|
|
278
302
|
}
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import sys
|
|
3
|
+
import json
|
|
4
|
+
import base64
|
|
5
|
+
import hashlib
|
|
6
|
+
import secrets
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, Optional
|
|
9
|
+
|
|
10
|
+
SERVICE_NAME = "vesper"
|
|
11
|
+
|
|
12
|
+
KEY_ALIASES = {
|
|
13
|
+
"hf_token": ["HF_TOKEN", "HUGGINGFACE_TOKEN"],
|
|
14
|
+
"kaggle_username": ["KAGGLE_USERNAME"],
|
|
15
|
+
"kaggle_key": ["KAGGLE_KEY"],
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
import keyring # type: ignore
|
|
20
|
+
HAS_KEYRING = True
|
|
21
|
+
except Exception:
|
|
22
|
+
HAS_KEYRING = False
|
|
23
|
+
|
|
24
|
+
try:
|
|
25
|
+
from cryptography.fernet import Fernet, InvalidToken # type: ignore
|
|
26
|
+
HAS_FERNET = True
|
|
27
|
+
except Exception:
|
|
28
|
+
HAS_FERNET = False
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _config_path() -> Path:
|
|
32
|
+
return Path.home() / ".vesper" / "config.toml"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _secret_path() -> Path:
|
|
36
|
+
return Path.home() / ".vesper" / ".config_key"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _ensure_parent(path: Path) -> None:
|
|
40
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _read_fallback_toml() -> Dict[str, str]:
|
|
44
|
+
path = _config_path()
|
|
45
|
+
if not path.exists():
|
|
46
|
+
return {}
|
|
47
|
+
|
|
48
|
+
values: Dict[str, str] = {}
|
|
49
|
+
in_keys = False
|
|
50
|
+
method = ""
|
|
51
|
+
|
|
52
|
+
for raw in path.read_text(encoding="utf-8").splitlines():
|
|
53
|
+
line = raw.strip()
|
|
54
|
+
if not line or line.startswith("#"):
|
|
55
|
+
continue
|
|
56
|
+
if line.startswith("[") and line.endswith("]"):
|
|
57
|
+
in_keys = (line == "[keys]")
|
|
58
|
+
continue
|
|
59
|
+
if line.startswith("method") and "=" in line:
|
|
60
|
+
method = line.split("=", 1)[1].strip().strip('"').strip("'")
|
|
61
|
+
continue
|
|
62
|
+
if not in_keys or "=" not in line:
|
|
63
|
+
continue
|
|
64
|
+
|
|
65
|
+
key, val = line.split("=", 1)
|
|
66
|
+
key = key.strip()
|
|
67
|
+
val = val.strip().strip('"').strip("'")
|
|
68
|
+
values[key] = val
|
|
69
|
+
|
|
70
|
+
if method:
|
|
71
|
+
values["__method__"] = method
|
|
72
|
+
|
|
73
|
+
return values
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _get_or_create_local_secret() -> str:
|
|
77
|
+
secret_file = _secret_path()
|
|
78
|
+
_ensure_parent(secret_file)
|
|
79
|
+
|
|
80
|
+
if secret_file.exists():
|
|
81
|
+
return secret_file.read_text(encoding="utf-8").strip()
|
|
82
|
+
|
|
83
|
+
secret = base64.urlsafe_b64encode(secrets.token_bytes(32)).decode("utf-8")
|
|
84
|
+
secret_file.write_text(secret, encoding="utf-8")
|
|
85
|
+
try:
|
|
86
|
+
os.chmod(secret_file, 0o600)
|
|
87
|
+
except Exception:
|
|
88
|
+
pass
|
|
89
|
+
return secret
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _xor_encrypt(plain: str, secret: str) -> str:
|
|
93
|
+
key = hashlib.sha256(secret.encode("utf-8")).digest()
|
|
94
|
+
data = plain.encode("utf-8")
|
|
95
|
+
out = bytes([data[i] ^ key[i % len(key)] for i in range(len(data))])
|
|
96
|
+
return base64.urlsafe_b64encode(out).decode("utf-8")
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _xor_decrypt(cipher_text: str, secret: str) -> str:
|
|
100
|
+
key = hashlib.sha256(secret.encode("utf-8")).digest()
|
|
101
|
+
data = base64.urlsafe_b64decode(cipher_text.encode("utf-8"))
|
|
102
|
+
out = bytes([data[i] ^ key[i % len(key)] for i in range(len(data))])
|
|
103
|
+
return out.decode("utf-8")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def _encrypt_value(value: str, secret: str) -> Dict[str, str]:
|
|
107
|
+
if HAS_FERNET:
|
|
108
|
+
token = Fernet(secret.encode("utf-8")).encrypt(value.encode("utf-8")).decode("utf-8")
|
|
109
|
+
return {"method": "fernet", "value": token}
|
|
110
|
+
# fallback encryption (weaker than fernet, but still not plaintext)
|
|
111
|
+
return {"method": "xor", "value": _xor_encrypt(value, secret)}
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _decrypt_value(value: str, method: str, secret: str) -> Optional[str]:
|
|
115
|
+
try:
|
|
116
|
+
if method == "fernet" and HAS_FERNET:
|
|
117
|
+
return Fernet(secret.encode("utf-8")).decrypt(value.encode("utf-8")).decode("utf-8")
|
|
118
|
+
if method == "xor":
|
|
119
|
+
return _xor_decrypt(value, secret)
|
|
120
|
+
return None
|
|
121
|
+
except InvalidToken:
|
|
122
|
+
return None
|
|
123
|
+
except Exception:
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _write_fallback_toml(values: Dict[str, str]) -> None:
|
|
128
|
+
path = _config_path()
|
|
129
|
+
_ensure_parent(path)
|
|
130
|
+
|
|
131
|
+
method = values.get("__method__", "fernet" if HAS_FERNET else "xor")
|
|
132
|
+
lines = [
|
|
133
|
+
"# Vesper optional API keys fallback storage",
|
|
134
|
+
"# Encrypted fallback (keyring is preferred)",
|
|
135
|
+
"[meta]",
|
|
136
|
+
f'method = "{method}"',
|
|
137
|
+
"[keys]",
|
|
138
|
+
]
|
|
139
|
+
for key in sorted(values.keys()):
|
|
140
|
+
if key.startswith("__"):
|
|
141
|
+
continue
|
|
142
|
+
val = str(values[key]).replace('"', '\\"')
|
|
143
|
+
lines.append(f'{key} = "{val}"')
|
|
144
|
+
|
|
145
|
+
path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def _get_from_env(name: str) -> Optional[str]:
|
|
149
|
+
for env_key in KEY_ALIASES.get(name, []):
|
|
150
|
+
val = os.getenv(env_key)
|
|
151
|
+
if val:
|
|
152
|
+
return val
|
|
153
|
+
return None
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def get_key(name: str) -> Optional[str]:
|
|
157
|
+
# 1) env vars (highest priority)
|
|
158
|
+
env_val = _get_from_env(name)
|
|
159
|
+
if env_val:
|
|
160
|
+
return env_val
|
|
161
|
+
|
|
162
|
+
# 2) keyring (secure)
|
|
163
|
+
if HAS_KEYRING:
|
|
164
|
+
try:
|
|
165
|
+
val = keyring.get_password(SERVICE_NAME, name)
|
|
166
|
+
if val:
|
|
167
|
+
return val
|
|
168
|
+
except Exception:
|
|
169
|
+
pass
|
|
170
|
+
|
|
171
|
+
# 3) encrypted fallback config.toml
|
|
172
|
+
fallback = _read_fallback_toml()
|
|
173
|
+
enc = fallback.get(name)
|
|
174
|
+
if not enc:
|
|
175
|
+
return None
|
|
176
|
+
secret = _get_or_create_local_secret()
|
|
177
|
+
method = fallback.get("__method__", "fernet" if HAS_FERNET else "xor")
|
|
178
|
+
return _decrypt_value(enc, method, secret)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def set_key(name: str, value: str) -> Dict[str, str]:
|
|
182
|
+
if not value:
|
|
183
|
+
return {"ok": "false", "method": "none", "error": "Empty value"}
|
|
184
|
+
|
|
185
|
+
if HAS_KEYRING:
|
|
186
|
+
try:
|
|
187
|
+
keyring.set_password(SERVICE_NAME, name, value)
|
|
188
|
+
return {"ok": "true", "method": "keyring"}
|
|
189
|
+
except Exception:
|
|
190
|
+
pass
|
|
191
|
+
|
|
192
|
+
fallback = _read_fallback_toml()
|
|
193
|
+
secret = _get_or_create_local_secret()
|
|
194
|
+
enc = _encrypt_value(value, secret)
|
|
195
|
+
fallback["__method__"] = enc["method"]
|
|
196
|
+
fallback[name] = enc["value"]
|
|
197
|
+
_write_fallback_toml(fallback)
|
|
198
|
+
return {"ok": "true", "method": f'toml:{enc["method"]}'}
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def has_key(name: str) -> bool:
|
|
202
|
+
return bool(get_key(name))
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def get_all() -> Dict[str, Optional[str]]:
|
|
206
|
+
return {
|
|
207
|
+
"hf_token": get_key("hf_token"),
|
|
208
|
+
"kaggle_username": get_key("kaggle_username"),
|
|
209
|
+
"kaggle_key": get_key("kaggle_key"),
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def _print_json(data):
|
|
214
|
+
print(json.dumps(data))
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def main() -> None:
|
|
218
|
+
if len(sys.argv) < 2:
|
|
219
|
+
_print_json({
|
|
220
|
+
"ok": False,
|
|
221
|
+
"error": "Usage: config.py <get|set|has|all> [name] [value]",
|
|
222
|
+
})
|
|
223
|
+
sys.exit(1)
|
|
224
|
+
|
|
225
|
+
cmd = sys.argv[1].lower()
|
|
226
|
+
|
|
227
|
+
if cmd == "all":
|
|
228
|
+
_print_json({"ok": True, "data": get_all()})
|
|
229
|
+
return
|
|
230
|
+
|
|
231
|
+
if len(sys.argv) < 3:
|
|
232
|
+
_print_json({"ok": False, "error": "Missing key name"})
|
|
233
|
+
sys.exit(1)
|
|
234
|
+
|
|
235
|
+
name = sys.argv[2]
|
|
236
|
+
|
|
237
|
+
if cmd == "get":
|
|
238
|
+
_print_json({"ok": True, "name": name, "value": get_key(name)})
|
|
239
|
+
return
|
|
240
|
+
|
|
241
|
+
if cmd == "has":
|
|
242
|
+
_print_json({"ok": True, "name": name, "value": has_key(name)})
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
if cmd == "set":
|
|
246
|
+
if len(sys.argv) < 4:
|
|
247
|
+
_print_json({"ok": False, "error": "Missing value for set"})
|
|
248
|
+
sys.exit(1)
|
|
249
|
+
value = sys.argv[3]
|
|
250
|
+
result = set_key(name, value)
|
|
251
|
+
_print_json({"ok": result.get("ok") == "true", "name": name, "method": result.get("method"), "error": result.get("error")})
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
_print_json({"ok": False, "error": f"Unknown command: {cmd}"})
|
|
255
|
+
sys.exit(1)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
if __name__ == "__main__":
|
|
259
|
+
main()
|