thumbgate 1.12.0 → 1.12.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/adapters/README.md +1 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +1 -1
- package/adapters/opencode/opencode.json +1 -1
- package/package.json +9 -3
- package/public/index.html +2 -2
- package/scripts/context-engine.js +710 -0
- package/scripts/durability/step.js +171 -0
- package/scripts/gates-engine.js +81 -2
- package/scripts/hf-papers.js +317 -0
- package/scripts/mcp-config.js +3 -3
- package/scripts/session-report.js +120 -0
- package/scripts/swarm-coordinator.js +81 -0
- package/scripts/token-savings.js +179 -0
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* step.js — lightweight durable-step helper.
|
|
5
|
+
*
|
|
6
|
+
* Inspired by the "use step" pattern in Vercel Workflows, without adopting
|
|
7
|
+
* the full durable-execution runtime. Gives each external call (HTTP,
|
|
8
|
+
* LanceDB, LLM) a uniform retry + idempotency wrapper:
|
|
9
|
+
*
|
|
10
|
+
* const result = await runStep('zernio.publishPost', {
|
|
11
|
+
* retries: 3,
|
|
12
|
+
* idempotencyKey: idempotencyKey(content, platforms),
|
|
13
|
+
* }, async ({ attempt }) => {
|
|
14
|
+
* return zernioFetch('POST', '/posts', body, { idempotencyKey: ... });
|
|
15
|
+
* });
|
|
16
|
+
*
|
|
17
|
+
* Why a custom helper instead of Vercel Workflows / Temporal / Inngest?
|
|
18
|
+
* - We run on Railway, not Vercel.
|
|
19
|
+
* - SQLite + existing workflow tables already cover the durable state
|
|
20
|
+
* we need; the gap is per-call retry/idempotency, not orchestration.
|
|
21
|
+
* - A 60-line helper captures ~70% of the reliability benefit without
|
|
22
|
+
* the platform migration or new ops surface.
|
|
23
|
+
*
|
|
24
|
+
* Error classification:
|
|
25
|
+
* - Errors with `retryable: true` or a `code` in TRANSIENT_CODES retry.
|
|
26
|
+
* - Errors with `nonRetryable: true` bail immediately.
|
|
27
|
+
* - HTTP status (from `err.status` or parsed from message):
|
|
28
|
+
* * 429 or 5xx → retry
|
|
29
|
+
* * 4xx → fail (no point retrying validation errors)
|
|
30
|
+
* - Unknown errors → retry (capped by `retries` count — fail-open on
|
|
31
|
+
* uncertainty, but bounded).
|
|
32
|
+
*/
|
|
33
|
+
|
|
34
|
+
const crypto = require('node:crypto');
|
|
35
|
+
|
|
36
|
+
const TRANSIENT_CODES = new Set([
|
|
37
|
+
'ECONNRESET',
|
|
38
|
+
'ETIMEDOUT',
|
|
39
|
+
'ENOTFOUND',
|
|
40
|
+
'EAI_AGAIN',
|
|
41
|
+
'ECONNREFUSED',
|
|
42
|
+
'EPIPE',
|
|
43
|
+
'UND_ERR_SOCKET',
|
|
44
|
+
'UND_ERR_CONNECT_TIMEOUT',
|
|
45
|
+
]);
|
|
46
|
+
|
|
47
|
+
const DEFAULT_BACKOFF_MS = Object.freeze([250, 1000, 4000]);
|
|
48
|
+
|
|
49
|
+
function defaultClassify(err) {
|
|
50
|
+
if (!err) return 'fail';
|
|
51
|
+
if (err.nonRetryable === true) return 'fail';
|
|
52
|
+
if (err.retryable === true) return 'retry';
|
|
53
|
+
if (err.code && TRANSIENT_CODES.has(err.code)) return 'retry';
|
|
54
|
+
|
|
55
|
+
// HTTP status from either an explicit prop or a parsed message.
|
|
56
|
+
const statusFromProp = Number.isFinite(err.status) ? err.status : null;
|
|
57
|
+
const msg = typeof err.message === 'string' ? err.message : '';
|
|
58
|
+
const match = /\b(5\d{2}|4\d{2})\b/.exec(msg);
|
|
59
|
+
const status = statusFromProp || (match ? Number(match[1]) : null);
|
|
60
|
+
|
|
61
|
+
if (status === 429) return 'retry';
|
|
62
|
+
if (status && status >= 500 && status < 600) return 'retry';
|
|
63
|
+
if (status && status >= 400 && status < 500) return 'fail';
|
|
64
|
+
|
|
65
|
+
// Unknown — retry cautiously. Bounded by the `retries` option.
|
|
66
|
+
return 'retry';
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
function sleep(ms) {
|
|
70
|
+
return new Promise((resolve) => { setTimeout(resolve, ms); });
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Build a stable 32-hex-char idempotency key from arbitrary inputs.
|
|
75
|
+
* Same inputs → same key. Safe to use as an Idempotency-Key HTTP header,
|
|
76
|
+
* a LanceDB row id, or a cache key for mid-flight deduplication.
|
|
77
|
+
*
|
|
78
|
+
* Usage:
|
|
79
|
+
* idempotencyKey(content, platformList, scheduledFor)
|
|
80
|
+
*/
|
|
81
|
+
function idempotencyKey(...parts) {
|
|
82
|
+
const h = crypto.createHash('sha256');
|
|
83
|
+
for (const p of parts) {
|
|
84
|
+
if (p == null) {
|
|
85
|
+
h.update('');
|
|
86
|
+
} else if (typeof p === 'string') {
|
|
87
|
+
h.update(p);
|
|
88
|
+
} else {
|
|
89
|
+
h.update(JSON.stringify(p));
|
|
90
|
+
}
|
|
91
|
+
h.update('\0'); // field separator — prevents ["a","b"] colliding with ["ab"]
|
|
92
|
+
}
|
|
93
|
+
return h.digest('hex').slice(0, 32);
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Execute `fn` with retry + backoff + classification. Returns the value
|
|
98
|
+
* `fn` resolves to, or throws the last error after exhausting retries /
|
|
99
|
+
* hitting a non-retryable verdict.
|
|
100
|
+
*
|
|
101
|
+
* @param {string} name Step name, used in logs. e.g. 'zernio.publishPost'.
|
|
102
|
+
* @param {object|function} options { retries, backoffMs, classify, onRetry, onFail, logger }
|
|
103
|
+
* (may be passed directly as the callback shorthand)
|
|
104
|
+
* @param {function({attempt:number}):Promise} fn The actual work.
|
|
105
|
+
*/
|
|
106
|
+
function errMessage(err) {
|
|
107
|
+
return err?.message ?? err;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function handleStepError({ err, attempt, retries, classify, backoffMs, name, onRetry, onFail, logger }) {
|
|
111
|
+
const verdict = classify(err);
|
|
112
|
+
const terminal = verdict === 'fail' || attempt >= retries;
|
|
113
|
+
if (terminal) {
|
|
114
|
+
if (typeof onFail === 'function') onFail({ name, attempt, err, verdict });
|
|
115
|
+
if (typeof logger === 'function') {
|
|
116
|
+
logger(`[step:${name}] FAIL attempt=${attempt} verdict=${verdict} err=${errMessage(err)}`);
|
|
117
|
+
}
|
|
118
|
+
return { terminal: true };
|
|
119
|
+
}
|
|
120
|
+
const waitMs = backoffMs[Math.min(attempt, backoffMs.length - 1)];
|
|
121
|
+
if (typeof onRetry === 'function') onRetry({ name, attempt, err, waitMs, verdict });
|
|
122
|
+
if (typeof logger === 'function') {
|
|
123
|
+
logger(`[step:${name}] RETRY attempt=${attempt} waitMs=${waitMs} err=${errMessage(err)}`);
|
|
124
|
+
}
|
|
125
|
+
return { terminal: false, waitMs };
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
async function runStep(name, options, fn) {
|
|
129
|
+
if (typeof options === 'function') {
|
|
130
|
+
fn = options;
|
|
131
|
+
options = {};
|
|
132
|
+
}
|
|
133
|
+
const {
|
|
134
|
+
retries = 3,
|
|
135
|
+
backoffMs = DEFAULT_BACKOFF_MS,
|
|
136
|
+
classify = defaultClassify,
|
|
137
|
+
onAttempt,
|
|
138
|
+
onRetry,
|
|
139
|
+
onFail,
|
|
140
|
+
logger,
|
|
141
|
+
sleepFn = sleep,
|
|
142
|
+
} = options || {};
|
|
143
|
+
|
|
144
|
+
if (typeof fn !== 'function') {
|
|
145
|
+
throw new TypeError(`runStep(${name}): fn must be a function`);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
let lastErr;
|
|
149
|
+
for (let attempt = 0; attempt <= retries; attempt++) {
|
|
150
|
+
if (typeof onAttempt === 'function') onAttempt({ name, attempt });
|
|
151
|
+
try {
|
|
152
|
+
return await fn({ attempt });
|
|
153
|
+
} catch (err) {
|
|
154
|
+
lastErr = err;
|
|
155
|
+
const outcome = handleStepError({
|
|
156
|
+
err, attempt, retries, classify, backoffMs, name, onRetry, onFail, logger,
|
|
157
|
+
});
|
|
158
|
+
if (outcome.terminal) throw err;
|
|
159
|
+
await sleepFn(outcome.waitMs);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
throw lastErr;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
module.exports = {
|
|
166
|
+
runStep,
|
|
167
|
+
idempotencyKey,
|
|
168
|
+
defaultClassify,
|
|
169
|
+
TRANSIENT_CODES,
|
|
170
|
+
DEFAULT_BACKOFF_MS,
|
|
171
|
+
};
|
package/scripts/gates-engine.js
CHANGED
|
@@ -83,6 +83,7 @@ const DEFAULT_PROTECTED_FILE_GLOBS = [
|
|
|
83
83
|
];
|
|
84
84
|
const EDIT_LIKE_TOOLS = new Set(['Edit', 'Write', 'MultiEdit']);
|
|
85
85
|
const HIGH_RISK_BASH_PATTERN = /\b(?:git\s+(?:add|commit|push)|gh\s+pr\s+(?:create|merge)|npm\s+publish|yarn\s+publish|pnpm\s+publish|rm\s+-rf)\b/i;
|
|
86
|
+
const REMOTE_SIDE_EFFECT_BASH_PATTERN = /\b(?:git\s+push\b|gh\s+pr\s+(?:create|merge|close|reopen|ready|edit)\b|gh\s+release\s+(?:create|delete|edit|upload)\b|npm\s+publish\b|yarn\s+publish\b|pnpm\s+publish\b)\b/i;
|
|
86
87
|
const BOOSTED_RISK_BLOCK_SCORE = 0.8;
|
|
87
88
|
const BOOSTED_RISK_MIN_EXAMPLES = 3;
|
|
88
89
|
const PR_THREAD_RESOLUTION_ACTION = 'pr_thread_resolution_verified_after_commit';
|
|
@@ -826,6 +827,59 @@ function evaluatePendingPrThreadResolutionGate(toolName, toolInput = {}) {
|
|
|
826
827
|
};
|
|
827
828
|
}
|
|
828
829
|
|
|
830
|
+
function getLocalOnlyScopeSources(governanceState = {}, constraints = {}) {
|
|
831
|
+
const sources = [];
|
|
832
|
+
if (governanceState.taskScope && governanceState.taskScope.localOnly) {
|
|
833
|
+
sources.push('task scope');
|
|
834
|
+
}
|
|
835
|
+
if (governanceState.branchGovernance && governanceState.branchGovernance.localOnly) {
|
|
836
|
+
sources.push('branch governance');
|
|
837
|
+
}
|
|
838
|
+
if (constraints.local_only && constraints.local_only.value === true) {
|
|
839
|
+
sources.push('local_only constraint');
|
|
840
|
+
}
|
|
841
|
+
return sources;
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
function isRemoteSideEffectCommand(toolName, toolInput = {}) {
|
|
845
|
+
if (toolName !== 'Bash') return false;
|
|
846
|
+
return REMOTE_SIDE_EFFECT_BASH_PATTERN.test(String(toolInput.command || ''));
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
function evaluateLocalOnlyRemoteSideEffectGate(toolName, toolInput = {}, governanceState = {}, constraints = {}) {
|
|
850
|
+
if (!isRemoteSideEffectCommand(toolName, toolInput)) return null;
|
|
851
|
+
const sources = getLocalOnlyScopeSources(governanceState, constraints);
|
|
852
|
+
if (sources.length === 0) return null;
|
|
853
|
+
|
|
854
|
+
const command = String(toolInput.command || '').trim();
|
|
855
|
+
return {
|
|
856
|
+
decision: 'deny',
|
|
857
|
+
gate: 'local-only-remote-side-effect',
|
|
858
|
+
message: 'Task scope is local-only; remote git, PR, release, and publish actions are blocked until the local-only scope is cleared or explicitly changed.',
|
|
859
|
+
severity: 'critical',
|
|
860
|
+
reasoning: [
|
|
861
|
+
`Local-only source: ${sources.join(', ')}`,
|
|
862
|
+
`Blocked command: ${command.slice(0, 160)}`,
|
|
863
|
+
'Remote side effects are denied before configurable gates so wrapped commands cannot bypass local-only work boundaries',
|
|
864
|
+
],
|
|
865
|
+
};
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
function recordStructuralGateBlock(toolName, toolInput, result) {
|
|
869
|
+
recordStat(result.gate, 'block');
|
|
870
|
+
const auditRecord = recordAuditEvent({
|
|
871
|
+
toolName,
|
|
872
|
+
toolInput,
|
|
873
|
+
decision: 'deny',
|
|
874
|
+
gateId: result.gate,
|
|
875
|
+
message: result.message,
|
|
876
|
+
severity: result.severity,
|
|
877
|
+
source: 'gates-engine',
|
|
878
|
+
});
|
|
879
|
+
auditToFeedback(auditRecord);
|
|
880
|
+
return result;
|
|
881
|
+
}
|
|
882
|
+
|
|
829
883
|
function isScopeEnforcedAction(toolName, toolInput = {}, affectedFiles = []) {
|
|
830
884
|
if (EDIT_LIKE_TOOLS.has(toolName) && affectedFiles.length > 0) return true;
|
|
831
885
|
if (toolName !== 'Bash') return false;
|
|
@@ -1333,7 +1387,18 @@ async function evaluateGatesAsync(toolName, toolInput, configPath) {
|
|
|
1333
1387
|
}
|
|
1334
1388
|
|
|
1335
1389
|
const constraints = loadConstraints();
|
|
1390
|
+
const governanceState = loadGovernanceState();
|
|
1336
1391
|
registerPrThreadResolutionClaimGate(toolName, toolInput);
|
|
1392
|
+
const localOnlyRemoteSideEffectGate = evaluateLocalOnlyRemoteSideEffectGate(
|
|
1393
|
+
toolName,
|
|
1394
|
+
toolInput,
|
|
1395
|
+
governanceState,
|
|
1396
|
+
constraints,
|
|
1397
|
+
);
|
|
1398
|
+
if (localOnlyRemoteSideEffectGate) {
|
|
1399
|
+
return recordStructuralGateBlock(toolName, toolInput, localOnlyRemoteSideEffectGate);
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1337
1402
|
const pendingThreadResolutionGate = evaluatePendingPrThreadResolutionGate(toolName, toolInput);
|
|
1338
1403
|
if (pendingThreadResolutionGate) {
|
|
1339
1404
|
recordStat(pendingThreadResolutionGate.gate, 'block');
|
|
@@ -1445,7 +1510,7 @@ async function evaluateGatesAsync(toolName, toolInput, configPath) {
|
|
|
1445
1510
|
}
|
|
1446
1511
|
|
|
1447
1512
|
const sentinelReport = evaluateWorkflowSentinel(toolName, toolInput, {
|
|
1448
|
-
governanceState
|
|
1513
|
+
governanceState,
|
|
1449
1514
|
});
|
|
1450
1515
|
const sentinelDecision = recordSentinelDecision(sentinelReport, toolName, toolInput);
|
|
1451
1516
|
const memoryGuard = evaluateMemoryGuard(toolName, toolInput);
|
|
@@ -1503,7 +1568,18 @@ function evaluateGates(toolName, toolInput, configPath) {
|
|
|
1503
1568
|
}
|
|
1504
1569
|
|
|
1505
1570
|
const constraints = loadConstraints();
|
|
1571
|
+
const governanceState = loadGovernanceState();
|
|
1506
1572
|
registerPrThreadResolutionClaimGate(toolName, toolInput);
|
|
1573
|
+
const localOnlyRemoteSideEffectGate = evaluateLocalOnlyRemoteSideEffectGate(
|
|
1574
|
+
toolName,
|
|
1575
|
+
toolInput,
|
|
1576
|
+
governanceState,
|
|
1577
|
+
constraints,
|
|
1578
|
+
);
|
|
1579
|
+
if (localOnlyRemoteSideEffectGate) {
|
|
1580
|
+
return recordStructuralGateBlock(toolName, toolInput, localOnlyRemoteSideEffectGate);
|
|
1581
|
+
}
|
|
1582
|
+
|
|
1507
1583
|
const pendingThreadResolutionGate = evaluatePendingPrThreadResolutionGate(toolName, toolInput);
|
|
1508
1584
|
if (pendingThreadResolutionGate) {
|
|
1509
1585
|
recordStat(pendingThreadResolutionGate.gate, 'block');
|
|
@@ -1587,7 +1663,7 @@ function evaluateGates(toolName, toolInput, configPath) {
|
|
|
1587
1663
|
}
|
|
1588
1664
|
|
|
1589
1665
|
const sentinelReport = evaluateWorkflowSentinel(toolName, toolInput, {
|
|
1590
|
-
governanceState
|
|
1666
|
+
governanceState,
|
|
1591
1667
|
});
|
|
1592
1668
|
const sentinelDecision = recordSentinelDecision(sentinelReport, toolName, toolInput);
|
|
1593
1669
|
const memoryGuard = evaluateMemoryGuard(toolName, toolInput);
|
|
@@ -2250,6 +2326,9 @@ module.exports = {
|
|
|
2250
2326
|
evaluateBoostedRiskTagGuard,
|
|
2251
2327
|
registerPrThreadResolutionClaimGate,
|
|
2252
2328
|
evaluatePendingPrThreadResolutionGate,
|
|
2329
|
+
getLocalOnlyScopeSources,
|
|
2330
|
+
isRemoteSideEffectCommand,
|
|
2331
|
+
evaluateLocalOnlyRemoteSideEffectGate,
|
|
2253
2332
|
PR_THREAD_RESOLUTION_ACTION,
|
|
2254
2333
|
};
|
|
2255
2334
|
|
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { URL, URLSearchParams } = require('node:url');
|
|
4
|
+
const {
|
|
5
|
+
NAMESPACES,
|
|
6
|
+
upsertContextObject,
|
|
7
|
+
recordProvenance,
|
|
8
|
+
constructTemplatedPack,
|
|
9
|
+
} = require('./contextfs');
|
|
10
|
+
|
|
11
|
+
const DEFAULT_HF_PAPERS_API_BASE = process.env.HF_PAPERS_API_BASE || 'https://huggingface.co/api';
|
|
12
|
+
const DEFAULT_LIMIT = 5;
|
|
13
|
+
|
|
14
|
+
function normalizeAuthors(authors) {
|
|
15
|
+
if (!Array.isArray(authors)) return [];
|
|
16
|
+
return authors
|
|
17
|
+
.map((author) => {
|
|
18
|
+
if (typeof author === 'string') return author.trim();
|
|
19
|
+
if (author && typeof author.name === 'string') return author.name.trim();
|
|
20
|
+
return '';
|
|
21
|
+
})
|
|
22
|
+
.filter(Boolean);
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function normalizeTags(tags) {
|
|
26
|
+
if (!Array.isArray(tags)) return [];
|
|
27
|
+
return [...new Set(tags
|
|
28
|
+
.map((tag) => {
|
|
29
|
+
if (typeof tag === 'string') return tag.trim();
|
|
30
|
+
if (tag && typeof tag.label === 'string') return tag.label.trim();
|
|
31
|
+
if (tag && typeof tag.name === 'string') return tag.name.trim();
|
|
32
|
+
return '';
|
|
33
|
+
})
|
|
34
|
+
.filter(Boolean))];
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function normalizePaper(record = {}) {
|
|
38
|
+
const paper = record && typeof record.paper === 'object' ? record.paper : record;
|
|
39
|
+
const paperId = String(
|
|
40
|
+
paper.id
|
|
41
|
+
|| paper.paper_id
|
|
42
|
+
|| paper.paperId
|
|
43
|
+
|| paper.arxiv_id
|
|
44
|
+
|| paper.arxivId
|
|
45
|
+
|| record.id
|
|
46
|
+
|| record.paper_id
|
|
47
|
+
|| record.paperId
|
|
48
|
+
|| record.arxiv_id
|
|
49
|
+
|| record.arxivId
|
|
50
|
+
|| ''
|
|
51
|
+
).trim();
|
|
52
|
+
const title = String(
|
|
53
|
+
paper.title
|
|
54
|
+
|| record.title
|
|
55
|
+
|| (paperId ? `Paper ${paperId}` : 'Untitled paper')
|
|
56
|
+
).trim();
|
|
57
|
+
const summary = String(
|
|
58
|
+
paper.summary
|
|
59
|
+
|| paper.abstract
|
|
60
|
+
|| record.summary
|
|
61
|
+
|| record.abstract
|
|
62
|
+
|| ''
|
|
63
|
+
).trim();
|
|
64
|
+
const url = String(
|
|
65
|
+
paper.url
|
|
66
|
+
|| paper.paper_url
|
|
67
|
+
|| record.url
|
|
68
|
+
|| record.paper_url
|
|
69
|
+
|| (paperId ? `https://arxiv.org/abs/${paperId}` : '')
|
|
70
|
+
).trim();
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
paperId,
|
|
74
|
+
title,
|
|
75
|
+
summary,
|
|
76
|
+
url: url || null,
|
|
77
|
+
authors: normalizeAuthors(paper.authors || record.authors),
|
|
78
|
+
tags: normalizeTags(paper.tags || paper.categories || record.tags || record.categories),
|
|
79
|
+
publishedAt: paper.publishedAt || paper.published_at || record.publishedAt || record.published_at || null,
|
|
80
|
+
source: 'huggingface-papers',
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
function extractPaperItems(payload) {
|
|
85
|
+
if (Array.isArray(payload)) return payload;
|
|
86
|
+
if (!payload || typeof payload !== 'object') return [];
|
|
87
|
+
if (Array.isArray(payload.papers)) return payload.papers;
|
|
88
|
+
if (Array.isArray(payload.items)) return payload.items;
|
|
89
|
+
if (Array.isArray(payload.results)) return payload.results;
|
|
90
|
+
if (Array.isArray(payload.dailyPapers)) return payload.dailyPapers;
|
|
91
|
+
if (payload.paper && typeof payload.paper === 'object') return [payload.paper];
|
|
92
|
+
return [];
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function buildSearchUrls({ query, limit = DEFAULT_LIMIT, baseUrl = DEFAULT_HF_PAPERS_API_BASE }) {
|
|
96
|
+
const normalizedBase = String(baseUrl || DEFAULT_HF_PAPERS_API_BASE).replace(/\/+$/, '');
|
|
97
|
+
const routes = [
|
|
98
|
+
['/daily_papers', { query, limit: String(limit) }],
|
|
99
|
+
['/papers/search', { q: query, limit: String(limit) }],
|
|
100
|
+
['/papers', { query, limit: String(limit) }],
|
|
101
|
+
];
|
|
102
|
+
|
|
103
|
+
return routes.map(([pathname, params]) => {
|
|
104
|
+
const url = new URL(`${normalizedBase}${pathname}`);
|
|
105
|
+
url.search = new URLSearchParams(params).toString();
|
|
106
|
+
return url.toString();
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
async function readJson(url, fetchImpl = global.fetch) {
|
|
111
|
+
if (typeof fetchImpl !== 'function') {
|
|
112
|
+
throw new Error('A fetch implementation is required');
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const response = await fetchImpl(url, {
|
|
116
|
+
headers: {
|
|
117
|
+
accept: 'application/json',
|
|
118
|
+
},
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
if (!response.ok) {
|
|
122
|
+
const error = new Error(`HF papers request failed: ${response.status} ${response.statusText}`);
|
|
123
|
+
error.status = response.status;
|
|
124
|
+
throw error;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
return response.json();
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
async function searchPapers({
|
|
131
|
+
query,
|
|
132
|
+
limit = DEFAULT_LIMIT,
|
|
133
|
+
baseUrl = DEFAULT_HF_PAPERS_API_BASE,
|
|
134
|
+
fetchImpl = global.fetch,
|
|
135
|
+
} = {}) {
|
|
136
|
+
const normalizedQuery = String(query || '').trim();
|
|
137
|
+
if (!normalizedQuery) {
|
|
138
|
+
throw new Error('searchPapers requires query');
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
const urls = buildSearchUrls({
|
|
142
|
+
query: normalizedQuery,
|
|
143
|
+
limit: Math.max(1, Number(limit) || DEFAULT_LIMIT),
|
|
144
|
+
baseUrl,
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
let lastError = null;
|
|
148
|
+
for (const url of urls) {
|
|
149
|
+
try {
|
|
150
|
+
const payload = await readJson(url, fetchImpl);
|
|
151
|
+
const papers = extractPaperItems(payload)
|
|
152
|
+
.map(normalizePaper)
|
|
153
|
+
.filter((paper) => paper.paperId || paper.title);
|
|
154
|
+
|
|
155
|
+
if (papers.length > 0) {
|
|
156
|
+
return papers.slice(0, limit);
|
|
157
|
+
}
|
|
158
|
+
} catch (error) {
|
|
159
|
+
lastError = error;
|
|
160
|
+
if (error && error.status === 404) {
|
|
161
|
+
continue;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (lastError) throw lastError;
|
|
167
|
+
return [];
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
function paperToMarkdown(paper) {
|
|
171
|
+
const normalized = normalizePaper(paper);
|
|
172
|
+
const lines = [
|
|
173
|
+
`# ${normalized.title}`,
|
|
174
|
+
'',
|
|
175
|
+
`Paper ID: ${normalized.paperId || 'unknown'}`,
|
|
176
|
+
`Source: ${normalized.source}`,
|
|
177
|
+
];
|
|
178
|
+
|
|
179
|
+
if (normalized.url) {
|
|
180
|
+
lines.push(`URL: ${normalized.url}`);
|
|
181
|
+
}
|
|
182
|
+
if (normalized.publishedAt) {
|
|
183
|
+
lines.push(`Published: ${normalized.publishedAt}`);
|
|
184
|
+
}
|
|
185
|
+
if (normalized.authors.length > 0) {
|
|
186
|
+
lines.push(`Authors: ${normalized.authors.join(', ')}`);
|
|
187
|
+
}
|
|
188
|
+
if (normalized.tags.length > 0) {
|
|
189
|
+
lines.push(`Tags: ${normalized.tags.join(', ')}`);
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
lines.push('', '## Abstract', '', normalized.summary || 'No abstract available.', '');
|
|
193
|
+
return lines.join('\n');
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function buildCitation(paper) {
|
|
197
|
+
return {
|
|
198
|
+
paperId: paper.paperId || null,
|
|
199
|
+
title: paper.title,
|
|
200
|
+
url: paper.url,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function ingestNormalizedPapers(papers, query) {
|
|
205
|
+
const ingested = papers.map((paper) => {
|
|
206
|
+
const normalizedTags = [...new Set([
|
|
207
|
+
'research',
|
|
208
|
+
'paper',
|
|
209
|
+
'hf-papers',
|
|
210
|
+
...paper.tags.map((tag) => String(tag)),
|
|
211
|
+
])].sort();
|
|
212
|
+
|
|
213
|
+
return upsertContextObject({
|
|
214
|
+
namespace: NAMESPACES.research,
|
|
215
|
+
title: `Paper: ${paper.title}`,
|
|
216
|
+
content: paperToMarkdown(paper),
|
|
217
|
+
tags: normalizedTags,
|
|
218
|
+
source: 'hf-papers',
|
|
219
|
+
metadata: {
|
|
220
|
+
provider: 'huggingface',
|
|
221
|
+
paperId: paper.paperId || null,
|
|
222
|
+
url: paper.url,
|
|
223
|
+
authors: paper.authors,
|
|
224
|
+
publishedAt: paper.publishedAt,
|
|
225
|
+
query,
|
|
226
|
+
},
|
|
227
|
+
});
|
|
228
|
+
});
|
|
229
|
+
|
|
230
|
+
recordProvenance({
|
|
231
|
+
type: 'hf_papers_ingested',
|
|
232
|
+
query,
|
|
233
|
+
count: ingested.length,
|
|
234
|
+
dedupedCount: ingested.filter((entry) => entry.deduped).length,
|
|
235
|
+
paperIds: papers.map((paper) => paper.paperId).filter(Boolean),
|
|
236
|
+
});
|
|
237
|
+
|
|
238
|
+
return ingested;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
async function ingestPaperSearch({
|
|
242
|
+
query,
|
|
243
|
+
limit = DEFAULT_LIMIT,
|
|
244
|
+
baseUrl = DEFAULT_HF_PAPERS_API_BASE,
|
|
245
|
+
fetchImpl = global.fetch,
|
|
246
|
+
searchPapersImpl = searchPapers,
|
|
247
|
+
} = {}) {
|
|
248
|
+
const papers = await searchPapersImpl({
|
|
249
|
+
query,
|
|
250
|
+
limit,
|
|
251
|
+
baseUrl,
|
|
252
|
+
fetchImpl,
|
|
253
|
+
});
|
|
254
|
+
const ingested = ingestNormalizedPapers(papers, query);
|
|
255
|
+
|
|
256
|
+
return {
|
|
257
|
+
query,
|
|
258
|
+
limit,
|
|
259
|
+
papers,
|
|
260
|
+
ingested,
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
async function buildResearchBrief({
|
|
265
|
+
query,
|
|
266
|
+
limit = DEFAULT_LIMIT,
|
|
267
|
+
template = 'research-brief',
|
|
268
|
+
baseUrl = DEFAULT_HF_PAPERS_API_BASE,
|
|
269
|
+
fetchImpl = global.fetch,
|
|
270
|
+
searchPapersImpl = searchPapers,
|
|
271
|
+
} = {}) {
|
|
272
|
+
const result = await ingestPaperSearch({
|
|
273
|
+
query,
|
|
274
|
+
limit,
|
|
275
|
+
baseUrl,
|
|
276
|
+
fetchImpl,
|
|
277
|
+
searchPapersImpl,
|
|
278
|
+
});
|
|
279
|
+
const pack = constructTemplatedPack({ template, query });
|
|
280
|
+
const citations = result.papers.map(buildCitation);
|
|
281
|
+
const brief = pack.items
|
|
282
|
+
.map((item, index) => {
|
|
283
|
+
const digest = String(item.structuredContext && item.structuredContext.rawContent || '')
|
|
284
|
+
.split('\n')
|
|
285
|
+
.slice(0, 6)
|
|
286
|
+
.join(' ')
|
|
287
|
+
.trim();
|
|
288
|
+
return `${index + 1}. ${item.title} ${digest}`.trim();
|
|
289
|
+
})
|
|
290
|
+
.join('\n');
|
|
291
|
+
|
|
292
|
+
return {
|
|
293
|
+
query,
|
|
294
|
+
limit,
|
|
295
|
+
source: 'huggingface-papers',
|
|
296
|
+
template,
|
|
297
|
+
ingestedCount: result.ingested.length,
|
|
298
|
+
packId: pack.packId,
|
|
299
|
+
citations,
|
|
300
|
+
brief,
|
|
301
|
+
pack,
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
module.exports = {
|
|
306
|
+
DEFAULT_HF_PAPERS_API_BASE,
|
|
307
|
+
buildResearchBrief,
|
|
308
|
+
buildSearchUrls,
|
|
309
|
+
extractPaperItems,
|
|
310
|
+
ingestNormalizedPapers,
|
|
311
|
+
ingestPaperSearch,
|
|
312
|
+
normalizeAuthors,
|
|
313
|
+
normalizePaper,
|
|
314
|
+
normalizeTags,
|
|
315
|
+
paperToMarkdown,
|
|
316
|
+
searchPapers,
|
|
317
|
+
};
|
package/scripts/mcp-config.js
CHANGED
|
@@ -189,13 +189,13 @@ function publishedCliAvailable(pkgVersion) {
|
|
|
189
189
|
|
|
190
190
|
function resolveMcpEntry({ pkgRoot, pkgVersion, scope = 'project', targetDir = pkgRoot }) {
|
|
191
191
|
if (!isSourceCheckout(pkgRoot)) {
|
|
192
|
-
return
|
|
192
|
+
return codexAutoUpdateMcpEntry();
|
|
193
193
|
}
|
|
194
194
|
if (scope === 'home' && publishedCliAvailable(pkgVersion)) {
|
|
195
|
-
return
|
|
195
|
+
return codexAutoUpdateMcpEntry();
|
|
196
196
|
}
|
|
197
197
|
if (scope === 'project' && !isSameCheckoutFamily(pkgRoot, targetDir) && publishedCliAvailable(pkgVersion)) {
|
|
198
|
-
return
|
|
198
|
+
return codexAutoUpdateMcpEntry();
|
|
199
199
|
}
|
|
200
200
|
return localMcpEntry(pkgRoot, scope);
|
|
201
201
|
}
|