thumbgate 1.8.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/llms.txt +4 -0
- package/.well-known/mcp/server-card.json +9 -226
- package/adapters/README.md +1 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +46 -1
- package/adapters/opencode/opencode.json +1 -1
- package/config/mcp-allowlists.json +5 -0
- package/package.json +3 -1
- package/public/index.html +2 -2
- package/scripts/agent-readiness.js +1 -0
- package/scripts/autoresearch-runner.js +228 -0
- package/scripts/multimodal-retrieval-plan.js +110 -0
- package/scripts/tool-registry.js +37 -0
- package/src/api/server.js +246 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "thumbgate-marketplace",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.9.0",
|
|
4
4
|
"owner": {
|
|
5
5
|
"name": "Igor Ganapolsky",
|
|
6
6
|
"email": "ig5973700@gmail.com"
|
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
"source": "npm",
|
|
14
14
|
"package": "thumbgate"
|
|
15
15
|
},
|
|
16
|
-
"version": "1.
|
|
16
|
+
"version": "1.9.0",
|
|
17
17
|
"author": {
|
|
18
18
|
"name": "Igor Ganapolsky"
|
|
19
19
|
},
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "thumbgate",
|
|
3
3
|
"description": "Type 👍 or 👎 on any agent action. ThumbGate captures it, distills a lesson, and blocks the pattern from repeating. One thumbs-down = the agent physically cannot make that mistake again. 33 pre-action gates, budget enforcement, self-protection, and NIST/SOC2 compliance tags.",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.9.0",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Igor Ganapolsky"
|
|
7
7
|
},
|
package/.well-known/llms.txt
CHANGED
|
@@ -46,6 +46,10 @@ npx thumbgate init --agent claude-code
|
|
|
46
46
|
|
|
47
47
|
## Links
|
|
48
48
|
|
|
49
|
+
- Agent discovery: https://thumbgate-production.up.railway.app/.well-known/mcp.json
|
|
50
|
+
- Progressive tool index: https://thumbgate-production.up.railway.app/.well-known/mcp/tools.json
|
|
51
|
+
- Agent skills: https://thumbgate-production.up.railway.app/.well-known/mcp/skills.json
|
|
52
|
+
- MCP applications: https://thumbgate-production.up.railway.app/.well-known/mcp/applications.json
|
|
49
53
|
- Documentation: https://thumbgate-production.up.railway.app/guide
|
|
50
54
|
- Dashboard: https://thumbgate-production.up.railway.app/dashboard
|
|
51
55
|
- GitHub: https://github.com/IgorGanapolsky/ThumbGate
|
|
@@ -1,231 +1,14 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "thumbgate",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.9.0",
|
|
4
4
|
"description": "ThumbGate — 👍👎 feedback that teaches your AI agent. Thumbs down a mistake, it never happens again.",
|
|
5
|
-
"homepage": "https://
|
|
5
|
+
"homepage": "https://thumbgate-production.up.railway.app",
|
|
6
6
|
"transport": "stdio",
|
|
7
|
-
"
|
|
8
|
-
"
|
|
9
|
-
"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
}
|
|
15
|
-
},
|
|
16
|
-
"required": []
|
|
17
|
-
},
|
|
18
|
-
"tools": [
|
|
19
|
-
{
|
|
20
|
-
"name": "capture_feedback",
|
|
21
|
-
"description": "Capture thumbs up/down feedback and promote actionable memory",
|
|
22
|
-
"inputSchema": {
|
|
23
|
-
"type": "object",
|
|
24
|
-
"required": [
|
|
25
|
-
"signal",
|
|
26
|
-
"context"
|
|
27
|
-
],
|
|
28
|
-
"properties": {
|
|
29
|
-
"signal": {
|
|
30
|
-
"type": "string",
|
|
31
|
-
"enum": [
|
|
32
|
-
"up",
|
|
33
|
-
"down"
|
|
34
|
-
]
|
|
35
|
-
},
|
|
36
|
-
"context": {
|
|
37
|
-
"type": "string"
|
|
38
|
-
},
|
|
39
|
-
"whatWentWrong": {
|
|
40
|
-
"type": "string"
|
|
41
|
-
},
|
|
42
|
-
"whatToChange": {
|
|
43
|
-
"type": "string"
|
|
44
|
-
},
|
|
45
|
-
"whatWorked": {
|
|
46
|
-
"type": "string"
|
|
47
|
-
},
|
|
48
|
-
"tags": {
|
|
49
|
-
"type": "array",
|
|
50
|
-
"items": {
|
|
51
|
-
"type": "string"
|
|
52
|
-
}
|
|
53
|
-
},
|
|
54
|
-
"skill": {
|
|
55
|
-
"type": "string"
|
|
56
|
-
}
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
},
|
|
60
|
-
{
|
|
61
|
-
"name": "feedback_summary",
|
|
62
|
-
"description": "Get summary of recent feedback",
|
|
63
|
-
"inputSchema": {
|
|
64
|
-
"type": "object",
|
|
65
|
-
"properties": {
|
|
66
|
-
"recent": {
|
|
67
|
-
"type": "number"
|
|
68
|
-
}
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
},
|
|
72
|
-
{
|
|
73
|
-
"name": "feedback_stats",
|
|
74
|
-
"description": "Get feedback stats and recommendations",
|
|
75
|
-
"inputSchema": {
|
|
76
|
-
"type": "object",
|
|
77
|
-
"properties": {}
|
|
78
|
-
}
|
|
79
|
-
},
|
|
80
|
-
{
|
|
81
|
-
"name": "list_intents",
|
|
82
|
-
"description": "List available intent plans and whether each requires human approval in the active profile",
|
|
83
|
-
"inputSchema": {
|
|
84
|
-
"type": "object",
|
|
85
|
-
"properties": {
|
|
86
|
-
"mcpProfile": {
|
|
87
|
-
"type": "string"
|
|
88
|
-
},
|
|
89
|
-
"bundleId": {
|
|
90
|
-
"type": "string"
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
},
|
|
95
|
-
{
|
|
96
|
-
"name": "plan_intent",
|
|
97
|
-
"description": "Generate an intent execution plan with policy checkpoints",
|
|
98
|
-
"inputSchema": {
|
|
99
|
-
"type": "object",
|
|
100
|
-
"required": [
|
|
101
|
-
"intentId"
|
|
102
|
-
],
|
|
103
|
-
"properties": {
|
|
104
|
-
"intentId": {
|
|
105
|
-
"type": "string"
|
|
106
|
-
},
|
|
107
|
-
"context": {
|
|
108
|
-
"type": "string"
|
|
109
|
-
},
|
|
110
|
-
"mcpProfile": {
|
|
111
|
-
"type": "string"
|
|
112
|
-
},
|
|
113
|
-
"bundleId": {
|
|
114
|
-
"type": "string"
|
|
115
|
-
},
|
|
116
|
-
"approved": {
|
|
117
|
-
"type": "boolean"
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
},
|
|
122
|
-
{
|
|
123
|
-
"name": "prevention_rules",
|
|
124
|
-
"description": "Generate prevention rules from repeated mistake patterns",
|
|
125
|
-
"inputSchema": {
|
|
126
|
-
"type": "object",
|
|
127
|
-
"properties": {
|
|
128
|
-
"minOccurrences": {
|
|
129
|
-
"type": "number"
|
|
130
|
-
},
|
|
131
|
-
"outputPath": {
|
|
132
|
-
"type": "string"
|
|
133
|
-
}
|
|
134
|
-
}
|
|
135
|
-
}
|
|
136
|
-
},
|
|
137
|
-
{
|
|
138
|
-
"name": "export_dpo_pairs",
|
|
139
|
-
"description": "Export DPO preference pairs from local memory log",
|
|
140
|
-
"inputSchema": {
|
|
141
|
-
"type": "object",
|
|
142
|
-
"properties": {
|
|
143
|
-
"memoryLogPath": {
|
|
144
|
-
"type": "string"
|
|
145
|
-
}
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
},
|
|
149
|
-
{
|
|
150
|
-
"name": "construct_context_pack",
|
|
151
|
-
"description": "Construct a bounded context pack from contextfs",
|
|
152
|
-
"inputSchema": {
|
|
153
|
-
"type": "object",
|
|
154
|
-
"properties": {
|
|
155
|
-
"query": {
|
|
156
|
-
"type": "string"
|
|
157
|
-
},
|
|
158
|
-
"maxItems": {
|
|
159
|
-
"type": "number"
|
|
160
|
-
},
|
|
161
|
-
"maxChars": {
|
|
162
|
-
"type": "number"
|
|
163
|
-
},
|
|
164
|
-
"namespaces": {
|
|
165
|
-
"type": "array",
|
|
166
|
-
"items": {
|
|
167
|
-
"type": "string"
|
|
168
|
-
}
|
|
169
|
-
}
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
},
|
|
173
|
-
{
|
|
174
|
-
"name": "evaluate_context_pack",
|
|
175
|
-
"description": "Record evaluation outcome for a context pack",
|
|
176
|
-
"inputSchema": {
|
|
177
|
-
"type": "object",
|
|
178
|
-
"required": [
|
|
179
|
-
"packId",
|
|
180
|
-
"outcome"
|
|
181
|
-
],
|
|
182
|
-
"properties": {
|
|
183
|
-
"packId": {
|
|
184
|
-
"type": "string"
|
|
185
|
-
},
|
|
186
|
-
"outcome": {
|
|
187
|
-
"type": "string"
|
|
188
|
-
},
|
|
189
|
-
"signal": {
|
|
190
|
-
"type": "string"
|
|
191
|
-
},
|
|
192
|
-
"notes": {
|
|
193
|
-
"type": "string"
|
|
194
|
-
}
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
},
|
|
198
|
-
{
|
|
199
|
-
"name": "context_provenance",
|
|
200
|
-
"description": "Get recent context/provenance events",
|
|
201
|
-
"inputSchema": {
|
|
202
|
-
"type": "object",
|
|
203
|
-
"properties": {
|
|
204
|
-
"limit": {
|
|
205
|
-
"type": "number"
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
},
|
|
210
|
-
{
|
|
211
|
-
"name": "recall",
|
|
212
|
-
"description": "Recall relevant past feedback, memories, and prevention rules for the current task",
|
|
213
|
-
"inputSchema": {
|
|
214
|
-
"type": "object",
|
|
215
|
-
"required": [
|
|
216
|
-
"query"
|
|
217
|
-
],
|
|
218
|
-
"properties": {
|
|
219
|
-
"query": {
|
|
220
|
-
"type": "string",
|
|
221
|
-
"description": "Describe the current task or context to find relevant past feedback"
|
|
222
|
-
},
|
|
223
|
-
"limit": {
|
|
224
|
-
"type": "number",
|
|
225
|
-
"description": "Max memories to return (default 5)"
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
]
|
|
7
|
+
"discovery": {
|
|
8
|
+
"manifestUrl": "https://thumbgate-production.up.railway.app/.well-known/mcp.json",
|
|
9
|
+
"toolIndexUrl": "https://thumbgate-production.up.railway.app/.well-known/mcp/tools.json",
|
|
10
|
+
"toolSchemaUrlTemplate": "https://thumbgate-production.up.railway.app/.well-known/mcp/tools/{name}.json",
|
|
11
|
+
"skillsUrl": "https://thumbgate-production.up.railway.app/.well-known/mcp/skills.json",
|
|
12
|
+
"applicationsUrl": "https://thumbgate-production.up.railway.app/.well-known/mcp/applications.json"
|
|
13
|
+
}
|
|
231
14
|
}
|
package/adapters/README.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
- `chatgpt/openapi.yaml`: import into GPT Actions.
|
|
4
4
|
- `gemini/function-declarations.json`: Gemini function-calling definitions.
|
|
5
5
|
- `mcp/server-stdio.js`: underlying local MCP stdio server implementation.
|
|
6
|
-
- `claude/.mcp.json`: example Claude Code MCP config using `npx --yes --package thumbgate@1.
|
|
6
|
+
- `claude/.mcp.json`: example Claude Code MCP config using `npx --yes --package thumbgate@1.9.0 thumbgate serve`.
|
|
7
7
|
- `codex/config.toml`: example Codex MCP profile section using the same version-pinned portable launcher.
|
|
8
8
|
- `amp/skills/thumbgate-feedback/SKILL.md`: Amp skill template.
|
|
9
9
|
- `opencode/opencode.json`: portable OpenCode MCP profile using the same version-pinned portable launcher.
|
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
"mcpServers": {
|
|
3
3
|
"thumbgate": {
|
|
4
4
|
"command": "npx",
|
|
5
|
-
"args": ["--yes", "--package", "thumbgate@1.
|
|
5
|
+
"args": ["--yes", "--package", "thumbgate@1.9.0", "thumbgate", "serve"]
|
|
6
6
|
}
|
|
7
7
|
},
|
|
8
8
|
"hooks": {
|
|
9
9
|
"preToolUse": {
|
|
10
10
|
"command": "npx",
|
|
11
|
-
"args": ["--yes", "--package", "thumbgate@1.
|
|
11
|
+
"args": ["--yes", "--package", "thumbgate@1.9.0", "thumbgate", "gate-check"]
|
|
12
12
|
}
|
|
13
13
|
}
|
|
14
14
|
}
|
|
@@ -106,6 +106,9 @@ const {
|
|
|
106
106
|
const {
|
|
107
107
|
searchThumbgate,
|
|
108
108
|
} = require('../../scripts/thumbgate-search');
|
|
109
|
+
const {
|
|
110
|
+
buildMultimodalRetrievalPlan,
|
|
111
|
+
} = require('../../scripts/multimodal-retrieval-plan');
|
|
109
112
|
const {
|
|
110
113
|
importDocument,
|
|
111
114
|
listImportedDocuments,
|
|
@@ -117,6 +120,7 @@ const {
|
|
|
117
120
|
listHarnesses,
|
|
118
121
|
runHarness,
|
|
119
122
|
} = require('../../scripts/natural-language-harness');
|
|
123
|
+
const { runLoop: runAutoresearchLoop } = require('../../scripts/autoresearch-runner');
|
|
120
124
|
const { TOOLS } = require('../../scripts/tool-registry');
|
|
121
125
|
const { reflect: reflectOnFeedback } = require('../../scripts/reflector-agent');
|
|
122
126
|
const { submitProductIssue } = require('../../scripts/product-feedback');
|
|
@@ -148,7 +152,7 @@ const {
|
|
|
148
152
|
finalizeSession: finalizeFeedbackSession,
|
|
149
153
|
} = require('../../scripts/feedback-session');
|
|
150
154
|
|
|
151
|
-
const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.
|
|
155
|
+
const SERVER_INFO = { name: 'thumbgate-mcp', version: '1.9.0' };
|
|
152
156
|
const COMMERCE_CATEGORIES = [
|
|
153
157
|
'product_recommendation',
|
|
154
158
|
'brand_compliance',
|
|
@@ -196,6 +200,17 @@ function resolveImportDocumentPath(targetPath) {
|
|
|
196
200
|
return resolved;
|
|
197
201
|
}
|
|
198
202
|
|
|
203
|
+
function resolveWorkspaceCwd(targetPath) {
|
|
204
|
+
if (!targetPath) return undefined;
|
|
205
|
+
const workspaceRoot = path.resolve(process.cwd());
|
|
206
|
+
const resolved = path.resolve(workspaceRoot, String(targetPath));
|
|
207
|
+
const relative = path.relative(workspaceRoot, resolved);
|
|
208
|
+
if (relative.startsWith('..') || path.isAbsolute(relative)) {
|
|
209
|
+
throw new Error(`cwd must stay within ${workspaceRoot}`);
|
|
210
|
+
}
|
|
211
|
+
return resolved;
|
|
212
|
+
}
|
|
213
|
+
|
|
199
214
|
function toTextResult(payload) {
|
|
200
215
|
const text = typeof payload === 'string' ? payload : JSON.stringify(payload, null, 2);
|
|
201
216
|
return {
|
|
@@ -851,6 +866,36 @@ async function callToolInner(name, args) {
|
|
|
851
866
|
return toTextResult({ harnesses: listHarnesses({ tag: args.tag }) });
|
|
852
867
|
case 'run_harness':
|
|
853
868
|
return toTextResult(runHarness(args.harness, args.inputs || {}, { jobId: args.jobId }));
|
|
869
|
+
case 'plan_multimodal_retrieval':
|
|
870
|
+
return toTextResult(buildMultimodalRetrievalPlan(args));
|
|
871
|
+
case 'run_autoresearch': {
|
|
872
|
+
const iterations = Math.max(1, Math.min(5, Number(args.iterations || 1)));
|
|
873
|
+
const timeoutMs = Math.max(1000, Math.min(600000, Number(args.timeoutMs || 120000)));
|
|
874
|
+
const holdoutCommands = Array.isArray(args.holdoutCommands)
|
|
875
|
+
? args.holdoutCommands.filter((command) => typeof command === 'string' && command.trim())
|
|
876
|
+
: [];
|
|
877
|
+
const result = await runAutoresearchLoop({
|
|
878
|
+
iterations,
|
|
879
|
+
targetName: args.targetName || undefined,
|
|
880
|
+
nextValue: Number.isFinite(args.nextValue) ? args.nextValue : undefined,
|
|
881
|
+
testCommand: args.testCommand || 'npm test',
|
|
882
|
+
holdoutCommands,
|
|
883
|
+
timeoutMs,
|
|
884
|
+
cwd: resolveWorkspaceCwd(args.cwd),
|
|
885
|
+
researchQuery: args.researchQuery || null,
|
|
886
|
+
paperLimit: Math.max(1, Math.min(10, Number(args.paperLimit || 5))),
|
|
887
|
+
});
|
|
888
|
+
return toTextResult({
|
|
889
|
+
...result,
|
|
890
|
+
controls: {
|
|
891
|
+
iterations,
|
|
892
|
+
timeoutMs,
|
|
893
|
+
holdoutCommands,
|
|
894
|
+
maxIterationsPerCall: 5,
|
|
895
|
+
maxTimeoutMs: 600000,
|
|
896
|
+
},
|
|
897
|
+
});
|
|
898
|
+
}
|
|
854
899
|
case 'open_feedback_session':
|
|
855
900
|
return toTextResult(openFeedbackSession(args.feedbackEventId, args.signal, args.initialContext));
|
|
856
901
|
case 'append_feedback_context':
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
"search_lessons",
|
|
13
13
|
"retrieve_lessons",
|
|
14
14
|
"search_thumbgate",
|
|
15
|
+
"plan_multimodal_retrieval",
|
|
15
16
|
"reflect_on_feedback",
|
|
16
17
|
"feedback_stats",
|
|
17
18
|
"diagnose_failure",
|
|
@@ -45,6 +46,7 @@
|
|
|
45
46
|
"settings_status",
|
|
46
47
|
"list_harnesses",
|
|
47
48
|
"run_harness",
|
|
49
|
+
"run_autoresearch",
|
|
48
50
|
"estimate_uncertainty",
|
|
49
51
|
"get_business_metrics",
|
|
50
52
|
"describe_semantic_entity",
|
|
@@ -70,6 +72,7 @@
|
|
|
70
72
|
"search_lessons",
|
|
71
73
|
"retrieve_lessons",
|
|
72
74
|
"search_thumbgate",
|
|
75
|
+
"plan_multimodal_retrieval",
|
|
73
76
|
"reflect_on_feedback",
|
|
74
77
|
"prevention_rules",
|
|
75
78
|
"set_task_scope",
|
|
@@ -114,6 +117,7 @@
|
|
|
114
117
|
"search_lessons",
|
|
115
118
|
"retrieve_lessons",
|
|
116
119
|
"search_thumbgate",
|
|
120
|
+
"plan_multimodal_retrieval",
|
|
117
121
|
"feedback_stats",
|
|
118
122
|
"diagnose_failure",
|
|
119
123
|
"list_harnesses",
|
|
@@ -146,6 +150,7 @@
|
|
|
146
150
|
"search_lessons",
|
|
147
151
|
"retrieve_lessons",
|
|
148
152
|
"search_thumbgate",
|
|
153
|
+
"plan_multimodal_retrieval",
|
|
149
154
|
"feedback_stats",
|
|
150
155
|
"diagnose_failure",
|
|
151
156
|
"list_harnesses",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "thumbgate",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.9.0",
|
|
4
4
|
"description": "Self-improving agent governance: type thumbs-up or thumbs-down on any AI agent action. ThumbGate turns every mistake into a prevention rule and blocks the pattern from repeating. One thumbs-down, never again. 33 pre-action gates, budget enforcement, and self-protection for Claude Code, Cursor, Codex, Gemini CLI, and Amp.",
|
|
5
5
|
"homepage": "https://thumbgate-production.up.railway.app",
|
|
6
6
|
"repository": {
|
|
@@ -49,6 +49,7 @@
|
|
|
49
49
|
"scripts/analytics-report.js",
|
|
50
50
|
"scripts/analytics-window.js",
|
|
51
51
|
"scripts/autonomous-workflow.js",
|
|
52
|
+
"scripts/autoresearch-runner.js",
|
|
52
53
|
"scripts/async-job-runner.js",
|
|
53
54
|
"scripts/audit-trail.js",
|
|
54
55
|
"scripts/auto-promote-gates.js",
|
|
@@ -135,6 +136,7 @@
|
|
|
135
136
|
"scripts/mcp-policy.js",
|
|
136
137
|
"scripts/memory-firewall.js",
|
|
137
138
|
"scripts/meta-agent-loop.js",
|
|
139
|
+
"scripts/multimodal-retrieval-plan.js",
|
|
138
140
|
"scripts/natural-language-harness.js",
|
|
139
141
|
"scripts/obsidian-export.js",
|
|
140
142
|
"scripts/operational-dashboard.js",
|
package/public/index.html
CHANGED
|
@@ -974,7 +974,7 @@ __GA_BOOTSTRAP__
|
|
|
974
974
|
<!-- HOW IT WORKS -->
|
|
975
975
|
<section class="how-it-works" id="how-it-works">
|
|
976
976
|
<div class="container">
|
|
977
|
-
<div class="section-label">New in v1.
|
|
977
|
+
<div class="section-label">New in v1.9.0</div>
|
|
978
978
|
<h2 class="section-title">Three steps to stop repeated AI failures</h2>
|
|
979
979
|
<div class="steps">
|
|
980
980
|
<div class="step">
|
|
@@ -1330,7 +1330,7 @@ __GA_BOOTSTRAP__
|
|
|
1330
1330
|
<a href="https://www.linkedin.com/in/igorganapolsky" target="_blank" rel="noopener">LinkedIn</a>
|
|
1331
1331
|
<a href="/blog">Blog</a>
|
|
1332
1332
|
</div>
|
|
1333
|
-
<span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.
|
|
1333
|
+
<span class="footer-copy">© 2026 Max Smith KDP LLC · MIT License · v1.9.0</span>
|
|
1334
1334
|
</div>
|
|
1335
1335
|
</footer>
|
|
1336
1336
|
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
/**
|
|
4
|
+
* Autoresearch Runner (AUTORESEARCH-02)
|
|
5
|
+
*
|
|
6
|
+
* Karpathy-inspired self-optimizing loop for the ThumbGate feedback studio.
|
|
7
|
+
* Each iteration: mutate local evolution state → run primary + holdout checks
|
|
8
|
+
* → measure score → keep/discard with rollback snapshots.
|
|
9
|
+
*
|
|
10
|
+
* The runner never rewrites tracked source files. It mutates the local
|
|
11
|
+
* evolution-state overlay, evaluates in place, and only persists accepted
|
|
12
|
+
* settings plus rollback snapshots.
|
|
13
|
+
*
|
|
14
|
+
* Mutation targets (in priority order):
|
|
15
|
+
* 1. Thompson Sampling priors (HALF_LIFE_DAYS, DECAY_FLOOR)
|
|
16
|
+
* 2. Prevention rule thresholds (minOccurrences)
|
|
17
|
+
* 3. Verification loop retries (MAX_RETRIES)
|
|
18
|
+
* 4. DPO temperature (DPO_BETA)
|
|
19
|
+
*
|
|
20
|
+
* Score function: command pass rate × approval weighting, with holdout gating.
|
|
21
|
+
*
|
|
22
|
+
* Zero external dependencies.
|
|
23
|
+
*
|
|
24
|
+
* Exports: runIteration, runLoop, scoreSuite, MUTATION_TARGETS
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
const {
|
|
28
|
+
getProgress,
|
|
29
|
+
} = require('./experiment-tracker');
|
|
30
|
+
const { buildResearchBrief } = require('./hf-papers');
|
|
31
|
+
const {
|
|
32
|
+
EVOLUTION_TARGETS,
|
|
33
|
+
parseCommandScore,
|
|
34
|
+
runWorkspaceEvolution,
|
|
35
|
+
} = require('./workspace-evolver');
|
|
36
|
+
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
// Mutation Targets
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
const MUTATION_TARGETS = EVOLUTION_TARGETS;
|
|
42
|
+
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// Score Function
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Score a test suite run. Returns a number in [0, 1].
|
|
49
|
+
*
|
|
50
|
+
* @param {object} params
|
|
51
|
+
* @param {string} params.testOutput - stdout from test run
|
|
52
|
+
* @param {number} [params.approvalRate] - Current approval rate from feedback
|
|
53
|
+
* @returns {{ score: number, testPassRate: number, details: object }}
|
|
54
|
+
*/
|
|
55
|
+
function scoreSuite(params) {
|
|
56
|
+
return parseCommandScore(params.testOutput || '', 0, typeof params.approvalRate === 'number' ? params.approvalRate : 0.5);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Single Iteration
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Run one autoresearch iteration.
|
|
65
|
+
*
|
|
66
|
+
* 1. Pick a random mutation target
|
|
67
|
+
* 2. Read current value, compute a random neighbor
|
|
68
|
+
* 3. Run the test suite in a tmp env with the mutation
|
|
69
|
+
* 4. Score and keep/discard via experiment tracker
|
|
70
|
+
*
|
|
71
|
+
* @param {object} [opts]
|
|
72
|
+
* @param {string} [opts.targetName] - Force a specific mutation target
|
|
73
|
+
* @param {number} [opts.nextValue] - Force the candidate value instead of a random neighbor
|
|
74
|
+
* @param {string} [opts.testCommand] - Override test command (default: npm test)
|
|
75
|
+
* @param {string[]} [opts.holdoutCommands] - Optional holdout commands required for acceptance
|
|
76
|
+
* @param {number} [opts.timeoutMs] - Test timeout in ms (default: 120000)
|
|
77
|
+
* @param {string} [opts.cwd] - Working directory for evaluation commands
|
|
78
|
+
* @param {string} [opts.researchQuery] - Optional external research query
|
|
79
|
+
* @param {number} [opts.paperLimit] - Max papers to ingest for research context
|
|
80
|
+
* @param {Function} [opts.fetchImpl] - Optional fetch implementation override
|
|
81
|
+
* @param {Function} [opts.searchPapersImpl] - Optional paper search override
|
|
82
|
+
* @returns {Promise<object>} experiment result
|
|
83
|
+
*/
|
|
84
|
+
async function runIteration(opts = {}) {
|
|
85
|
+
const options = opts || {};
|
|
86
|
+
const timeoutMs = options.timeoutMs || 120000;
|
|
87
|
+
const testCommand = options.testCommand || 'npm test';
|
|
88
|
+
const research = options.researchQuery
|
|
89
|
+
? await buildResearchBrief({
|
|
90
|
+
query: options.researchQuery,
|
|
91
|
+
limit: options.paperLimit,
|
|
92
|
+
fetchImpl: options.fetchImpl,
|
|
93
|
+
searchPapersImpl: options.searchPapersImpl,
|
|
94
|
+
template: 'autoresearch-brief',
|
|
95
|
+
})
|
|
96
|
+
: null;
|
|
97
|
+
|
|
98
|
+
const result = runWorkspaceEvolution({
|
|
99
|
+
targetName: options.targetName,
|
|
100
|
+
nextValue: options.nextValue,
|
|
101
|
+
primaryCommands: [testCommand],
|
|
102
|
+
holdoutCommands: options.holdoutCommands || [],
|
|
103
|
+
timeoutMs,
|
|
104
|
+
cwd: options.cwd,
|
|
105
|
+
hypothesisSuffix: research ? `Research query: ${research.query}` : null,
|
|
106
|
+
additionalMetrics: {
|
|
107
|
+
researchQuery: research ? research.query : null,
|
|
108
|
+
researchPackId: research ? research.packId : null,
|
|
109
|
+
researchPaperIds: research ? research.citations.map((citation) => citation.paperId).filter(Boolean) : [],
|
|
110
|
+
},
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
return result;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
// Multi-Iteration Loop
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Run N autoresearch iterations.
|
|
122
|
+
*
|
|
123
|
+
* @param {object} params
|
|
124
|
+
* @param {number} params.iterations - Number of experiments to run
|
|
125
|
+
* @param {string} [params.targetName] - Force a specific mutation target
|
|
126
|
+
* @param {number} [params.nextValue] - Force the candidate value instead of a random neighbor
|
|
127
|
+
* @param {string} [params.testCommand] - Override test command
|
|
128
|
+
* @param {string[]} [params.holdoutCommands] - Optional holdout commands required for acceptance
|
|
129
|
+
* @param {number} [params.timeoutMs] - Per-iteration timeout
|
|
130
|
+
* @param {string} [params.cwd] - Working directory for evaluation commands
|
|
131
|
+
* @param {string} [params.researchQuery] - Optional external research query
|
|
132
|
+
* @param {number} [params.paperLimit] - Max papers to ingest for research context
|
|
133
|
+
* @param {Function} [params.fetchImpl] - Optional fetch implementation override
|
|
134
|
+
* @param {Function} [params.searchPapersImpl] - Optional paper search override
|
|
135
|
+
* @returns {Promise<object>} { results, progress }
|
|
136
|
+
*/
|
|
137
|
+
async function runLoop(params) {
|
|
138
|
+
const iterations = params.iterations || 1;
|
|
139
|
+
const results = [];
|
|
140
|
+
|
|
141
|
+
for (let i = 0; i < iterations; i++) {
|
|
142
|
+
console.log(`\n[autoresearch] Iteration ${i + 1}/${iterations}`);
|
|
143
|
+
try {
|
|
144
|
+
const result = await runIteration({
|
|
145
|
+
targetName: params.targetName,
|
|
146
|
+
nextValue: Number.isFinite(params.nextValue) ? params.nextValue : undefined,
|
|
147
|
+
testCommand: params.testCommand,
|
|
148
|
+
holdoutCommands: params.holdoutCommands,
|
|
149
|
+
timeoutMs: params.timeoutMs,
|
|
150
|
+
cwd: params.cwd,
|
|
151
|
+
researchQuery: params.researchQuery,
|
|
152
|
+
paperLimit: params.paperLimit,
|
|
153
|
+
fetchImpl: params.fetchImpl,
|
|
154
|
+
searchPapersImpl: params.searchPapersImpl,
|
|
155
|
+
});
|
|
156
|
+
results.push(result);
|
|
157
|
+
if (result.kept) {
|
|
158
|
+
console.log(` ✓ KEPT: ${result.name} (delta: +${(result.delta || 0).toFixed(4)})`);
|
|
159
|
+
} else if (result.skipped) {
|
|
160
|
+
console.log(` ⊘ SKIPPED: ${result.reason}`);
|
|
161
|
+
} else {
|
|
162
|
+
console.log(` ✗ DISCARDED: ${result.reason}`);
|
|
163
|
+
}
|
|
164
|
+
} catch (err) {
|
|
165
|
+
console.error(` ✗ ERROR: ${err.message}`);
|
|
166
|
+
results.push({ error: err.message });
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const progress = getProgress();
|
|
171
|
+
console.log(`\n[autoresearch] Progress: ${progress.completed} experiments, ${progress.kept} kept (${progress.keepRate}%)`);
|
|
172
|
+
return { results, progress };
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// ---------------------------------------------------------------------------
|
|
176
|
+
// CLI
|
|
177
|
+
// ---------------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
if (require.main === module) {
|
|
180
|
+
const args = {};
|
|
181
|
+
process.argv.slice(2).forEach((arg) => {
|
|
182
|
+
if (!arg.startsWith('--')) return;
|
|
183
|
+
const [key, ...rest] = arg.slice(2).split('=');
|
|
184
|
+
args[key] = rest.length > 0 ? rest.join('=') : true;
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
if (args.run) {
|
|
188
|
+
const iterations = Number(args.iterations || 1);
|
|
189
|
+
const testCommand = args['test-command'] || 'npm test';
|
|
190
|
+
const timeoutMs = Number(args.timeout || 120000);
|
|
191
|
+
const paperLimit = Number(args['paper-limit'] || 5);
|
|
192
|
+
const holdoutCommands = args.holdout ? [args.holdout] : [];
|
|
193
|
+
runLoop({
|
|
194
|
+
iterations,
|
|
195
|
+
targetName: args.target || null,
|
|
196
|
+
nextValue: args['next-value'] !== undefined ? Number(args['next-value']) : undefined,
|
|
197
|
+
testCommand,
|
|
198
|
+
holdoutCommands,
|
|
199
|
+
timeoutMs,
|
|
200
|
+
cwd: args.cwd || undefined,
|
|
201
|
+
researchQuery: args['research-query'] || null,
|
|
202
|
+
paperLimit,
|
|
203
|
+
}).catch((error) => {
|
|
204
|
+
console.error(error.message);
|
|
205
|
+
process.exit(1);
|
|
206
|
+
});
|
|
207
|
+
} else if (args.targets) {
|
|
208
|
+
console.log('Mutation targets:');
|
|
209
|
+
MUTATION_TARGETS.forEach((t) => {
|
|
210
|
+
console.log(` ${t.name} (${t.type}): range [${t.range.join(', ')}], step ${t.step}`);
|
|
211
|
+
});
|
|
212
|
+
} else {
|
|
213
|
+
console.log(`Usage:
|
|
214
|
+
node scripts/autoresearch-runner.js --run [--iterations=5] [--target=half_life_days] [--next-value=8] [--test-command="npm test"] [--holdout="npm run self-heal:check"] [--timeout=120000] [--research-query="rank fusion"] [--paper-limit=5]
|
|
215
|
+
node scripts/autoresearch-runner.js --targets`);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// ---------------------------------------------------------------------------
|
|
220
|
+
// Exports
|
|
221
|
+
// ---------------------------------------------------------------------------
|
|
222
|
+
|
|
223
|
+
module.exports = {
|
|
224
|
+
runIteration,
|
|
225
|
+
runLoop,
|
|
226
|
+
scoreSuite,
|
|
227
|
+
MUTATION_TARGETS,
|
|
228
|
+
};
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const DEFAULT_EVIDENCE_TYPES = ['screenshots', 'pdf_pages', 'proof_artifacts'];
|
|
4
|
+
const DEFAULT_DIMS = [1024, 512, 256, 128, 64];
|
|
5
|
+
|
|
6
|
+
function clampInteger(value, { min, max, fallback }) {
|
|
7
|
+
const parsed = Number(value);
|
|
8
|
+
if (!Number.isFinite(parsed)) return fallback;
|
|
9
|
+
return Math.max(min, Math.min(max, Math.floor(parsed)));
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function normalizeEvidenceTypes(value) {
|
|
13
|
+
if (!Array.isArray(value)) return DEFAULT_EVIDENCE_TYPES;
|
|
14
|
+
const normalized = value
|
|
15
|
+
.map((item) => String(item || '').trim().toLowerCase().replace(/[^a-z0-9]+/g, '_'))
|
|
16
|
+
.filter(Boolean);
|
|
17
|
+
return normalized.length > 0 ? [...new Set(normalized)] : DEFAULT_EVIDENCE_TYPES;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function dimensionPlan({ corpusItems, maxEmbeddingDim }) {
|
|
21
|
+
const dims = DEFAULT_DIMS.filter((dim) => dim <= maxEmbeddingDim);
|
|
22
|
+
const selected = dims.length > 0 ? dims : [maxEmbeddingDim];
|
|
23
|
+
return selected.map((dim) => ({
|
|
24
|
+
dim,
|
|
25
|
+
estimatedFloat32Mb: Number(((corpusItems * dim * 4) / (1024 * 1024)).toFixed(2)),
|
|
26
|
+
useWhen: dim >= 1024
|
|
27
|
+
? 'default quality pass for launch-critical retrieval'
|
|
28
|
+
: 'cost-down pass when storage or latency dominates',
|
|
29
|
+
}));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function buildMultimodalRetrievalPlan(args = {}) {
|
|
33
|
+
const evidenceTypes = normalizeEvidenceTypes(args.evidenceTypes);
|
|
34
|
+
const corpusItems = clampInteger(args.corpusItems, {
|
|
35
|
+
min: 100,
|
|
36
|
+
max: 10000000,
|
|
37
|
+
fallback: 5000,
|
|
38
|
+
});
|
|
39
|
+
const maxEmbeddingDim = clampInteger(args.maxEmbeddingDim, {
|
|
40
|
+
min: 64,
|
|
41
|
+
max: 2048,
|
|
42
|
+
fallback: 1024,
|
|
43
|
+
});
|
|
44
|
+
const latencyBudgetMs = clampInteger(args.latencyBudgetMs, {
|
|
45
|
+
min: 50,
|
|
46
|
+
max: 30000,
|
|
47
|
+
fallback: 750,
|
|
48
|
+
});
|
|
49
|
+
const useReranker = args.useReranker !== false;
|
|
50
|
+
const goal = String(args.goal || 'retrieve visual proof for agent-governance decisions').trim();
|
|
51
|
+
const dims = dimensionPlan({ corpusItems, maxEmbeddingDim });
|
|
52
|
+
const defaultDim = dims.some((entry) => entry.dim === 1024) ? 1024 : dims[0].dim;
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
planVersion: '2026-04-20',
|
|
56
|
+
sourcePattern: 'multimodal Sentence Transformers visual document retrieval',
|
|
57
|
+
goal,
|
|
58
|
+
evidenceTypes,
|
|
59
|
+
architecture: {
|
|
60
|
+
stage1: 'Index screenshots, PDF pages, dashboard captures, and proof artifacts with a multimodal embedding model.',
|
|
61
|
+
stage2: useReranker
|
|
62
|
+
? 'Rerank the top candidates with a multimodal cross-encoder before using evidence in a gate, PR, or sales proof claim.'
|
|
63
|
+
: 'Skip reranking for low-latency agent recall; require stronger holdout evaluation before shipping.',
|
|
64
|
+
fallback: 'Keep text-only search as a fallback for code, logs, markdown, and plain policy docs.',
|
|
65
|
+
},
|
|
66
|
+
trainingData: {
|
|
67
|
+
pilotSchema: ['query', 'image', 'negative_0'],
|
|
68
|
+
hardNegativeStrategy: 'Pair each proof query with visually similar but wrong screenshots or PDF pages.',
|
|
69
|
+
minimumPilot: 'Start with 300 labeled evaluation queries and at least one hard negative per query before finetuning.',
|
|
70
|
+
},
|
|
71
|
+
evaluation: {
|
|
72
|
+
baseline: 'Measure current text-only retrieval before any model changes.',
|
|
73
|
+
primaryMetric: 'NDCG@10',
|
|
74
|
+
secondaryMetrics: ['Recall@5', 'MAP', 'false_positive_gate_rate'],
|
|
75
|
+
holdoutSets: [
|
|
76
|
+
'agent failure screenshots',
|
|
77
|
+
'dashboard proof captures',
|
|
78
|
+
'visual docs that contain tables or charts',
|
|
79
|
+
],
|
|
80
|
+
},
|
|
81
|
+
deployment: {
|
|
82
|
+
latencyBudgetMs,
|
|
83
|
+
defaultEmbeddingDim: defaultDim,
|
|
84
|
+
matryoshkaDimensions: dims,
|
|
85
|
+
compressionPath: 'Use Matryoshka truncation first, then quantization only after holdout quality is stable.',
|
|
86
|
+
},
|
|
87
|
+
thumbgateUseCases: [
|
|
88
|
+
'Find the exact screenshot or proof artifact behind a completion claim.',
|
|
89
|
+
'Retrieve visual evidence before approving a workflow-hardening sprint.',
|
|
90
|
+
'Rank dashboard captures and PDF runbook pages for GEO/SEO evidence pages.',
|
|
91
|
+
'Attach visual hard negatives to Autoresearch loops so agents cannot reward-hack by deleting hard cases.',
|
|
92
|
+
],
|
|
93
|
+
guardrails: [
|
|
94
|
+
'Never promote visual retrieval results into claims without a linked artifact URL or local path.',
|
|
95
|
+
'Keep the multimodal index read-only for agent recall; gate training and index rebuilds behind explicit workflow checks.',
|
|
96
|
+
'Evaluate retrieval on holdout screenshots/PDF pages before replacing text-only recall.',
|
|
97
|
+
],
|
|
98
|
+
nextActions: [
|
|
99
|
+
'Create a small visual proof corpus from existing public dashboard screenshots and proof artifacts.',
|
|
100
|
+
'Log query -> correct artifact -> hard negative triples during workflow sprint reviews.',
|
|
101
|
+
'Use Autoresearch to optimize NDCG@10 and latency only after the baseline corpus exists.',
|
|
102
|
+
],
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
module.exports = {
|
|
107
|
+
buildMultimodalRetrievalPlan,
|
|
108
|
+
dimensionPlan,
|
|
109
|
+
normalizeEvidenceTypes,
|
|
110
|
+
};
|
package/scripts/tool-registry.js
CHANGED
|
@@ -134,6 +134,25 @@ const TOOLS = [
|
|
|
134
134
|
},
|
|
135
135
|
},
|
|
136
136
|
}),
|
|
137
|
+
readOnlyTool({
|
|
138
|
+
name: 'plan_multimodal_retrieval',
|
|
139
|
+
description: 'Plan a high-ROI multimodal retrieval rollout for screenshots, PDF pages, dashboard captures, and proof artifacts without starting GPU training.',
|
|
140
|
+
inputSchema: {
|
|
141
|
+
type: 'object',
|
|
142
|
+
properties: {
|
|
143
|
+
goal: { type: 'string', description: 'Business or workflow objective for visual/document retrieval.' },
|
|
144
|
+
evidenceTypes: {
|
|
145
|
+
type: 'array',
|
|
146
|
+
items: { type: 'string' },
|
|
147
|
+
description: 'Evidence surfaces to include, such as screenshots, pdf_pages, proof_artifacts, dashboards, or videos.',
|
|
148
|
+
},
|
|
149
|
+
corpusItems: { type: 'number', description: 'Estimated number of visual artifacts or document pages to index.' },
|
|
150
|
+
maxEmbeddingDim: { type: 'number', description: 'Maximum embedding dimension to budget for Matryoshka-style truncation planning.' },
|
|
151
|
+
latencyBudgetMs: { type: 'number', description: 'Target retrieval latency budget for agent recall.' },
|
|
152
|
+
useReranker: { type: 'boolean', description: 'Whether to include a multimodal reranker stage after initial embedding retrieval.' },
|
|
153
|
+
},
|
|
154
|
+
},
|
|
155
|
+
}),
|
|
137
156
|
destructiveTool({
|
|
138
157
|
name: 'import_document',
|
|
139
158
|
description: 'Import a local policy or runbook document into ThumbGate, normalize it for search, and propose provenance-backed gate candidates.',
|
|
@@ -872,6 +891,24 @@ const TOOLS = [
|
|
|
872
891
|
},
|
|
873
892
|
},
|
|
874
893
|
}),
|
|
894
|
+
destructiveTool({
|
|
895
|
+
name: 'run_autoresearch',
|
|
896
|
+
description: 'Run a bounded metric-improvement loop: measure a baseline, test a hypothesis, require primary and holdout checks, then keep or discard the candidate mutation with proof.',
|
|
897
|
+
inputSchema: {
|
|
898
|
+
type: 'object',
|
|
899
|
+
properties: {
|
|
900
|
+
iterations: { type: 'number', description: 'Number of iterations to run. Capped at 5 per call; default 1.' },
|
|
901
|
+
targetName: { type: 'string', enum: ['half_life_days', 'decay_floor', 'prevention_min_occurrences', 'verification_max_retries', 'dpo_beta'], description: 'Optional evolution target to mutate.' },
|
|
902
|
+
nextValue: { type: 'number', description: 'Optional explicit candidate value for the target.' },
|
|
903
|
+
testCommand: { type: 'string', description: 'Primary metric command. Defaults to npm test.' },
|
|
904
|
+
holdoutCommands: { type: 'array', items: { type: 'string' }, description: 'Additional checks required before a candidate can be kept.' },
|
|
905
|
+
timeoutMs: { type: 'number', description: 'Per-command timeout in milliseconds. Capped at 600000; default 120000.' },
|
|
906
|
+
cwd: { type: 'string', description: 'Optional workspace directory for the evaluation commands.' },
|
|
907
|
+
researchQuery: { type: 'string', description: 'Optional research query used to build an autoresearch context brief.' },
|
|
908
|
+
paperLimit: { type: 'number', description: 'Maximum research papers to ingest when researchQuery is set. Capped at 10; default 5.' },
|
|
909
|
+
},
|
|
910
|
+
},
|
|
911
|
+
}),
|
|
875
912
|
destructiveTool({
|
|
876
913
|
name: 'schedule',
|
|
877
914
|
description: 'Create, list, or delete scheduled tasks. Supports natural language scheduling like "daily 9:00", "weekly monday 8:30", "hourly". Installs as macOS LaunchAgent or Linux crontab.',
|
package/src/api/server.js
CHANGED
|
@@ -552,6 +552,169 @@ function getServerCardTools() {
|
|
|
552
552
|
}));
|
|
553
553
|
}
|
|
554
554
|
|
|
555
|
+
function buildPublicUrl(hostedConfig, pathname) {
|
|
556
|
+
return `${hostedConfig.appOrigin}${pathname}`;
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
const VERIFICATION_EVIDENCE_URL = 'https://github.com/IgorGanapolsky/ThumbGate/blob/main/docs/VERIFICATION_EVIDENCE.md';
|
|
560
|
+
|
|
561
|
+
function getToolDiscoveryIndex(hostedConfig) {
|
|
562
|
+
return MCP_TOOLS.map((tool) => ({
|
|
563
|
+
name: tool.name,
|
|
564
|
+
description: tool.description,
|
|
565
|
+
annotations: tool.annotations || {},
|
|
566
|
+
schemaUrl: buildPublicUrl(hostedConfig, `/.well-known/mcp/tools/${encodeURIComponent(tool.name)}.json`),
|
|
567
|
+
}));
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
function getMcpSkillManifests(hostedConfig) {
|
|
571
|
+
return [
|
|
572
|
+
{
|
|
573
|
+
name: 'thumbgate',
|
|
574
|
+
title: 'ThumbGate Pre-Action Gates',
|
|
575
|
+
description: 'Capture feedback, recall lessons, generate rules, and block repeated agent mistakes before tool execution.',
|
|
576
|
+
triggers: ['thumbgate', 'pre-action gates', 'prevent repeated AI mistakes', 'agent feedback', 'PreToolUse hooks'],
|
|
577
|
+
recommendedFlow: [
|
|
578
|
+
'Recall lessons before risky work.',
|
|
579
|
+
'Plan high-risk actions with checkpoints.',
|
|
580
|
+
'Capture concrete thumbs-down/up feedback.',
|
|
581
|
+
'Inspect prevention_rules after repeats.',
|
|
582
|
+
],
|
|
583
|
+
installCommand: 'npx thumbgate init',
|
|
584
|
+
contextUrl: buildPublicUrl(hostedConfig, '/public/llm-context.md'),
|
|
585
|
+
proofUrl: VERIFICATION_EVIDENCE_URL,
|
|
586
|
+
},
|
|
587
|
+
{
|
|
588
|
+
name: 'workflow-hardening-sprint',
|
|
589
|
+
title: 'Workflow Hardening Sprint',
|
|
590
|
+
description: 'Turn one repeated agent failure into an enforced gate with proof and rollout evidence.',
|
|
591
|
+
triggers: ['workflow hardening', 'team rollout', 'agent governance', 'approval boundary', 'audit trail'],
|
|
592
|
+
recommendedFlow: [
|
|
593
|
+
'Pick one costly repeated failure.',
|
|
594
|
+
'Import the policy or runbook.',
|
|
595
|
+
'Ship the gate with dashboard proof.',
|
|
596
|
+
],
|
|
597
|
+
intakeUrl: buildPublicUrl(hostedConfig, '/#workflow-sprint-intake'),
|
|
598
|
+
proofUrl: VERIFICATION_EVIDENCE_URL,
|
|
599
|
+
},
|
|
600
|
+
{
|
|
601
|
+
name: 'visual-proof-retrieval',
|
|
602
|
+
title: 'Visual Proof Retrieval',
|
|
603
|
+
description: 'Use screenshots, PDF pages, dashboard captures, and proof artifacts as searchable evidence for agent-governance claims.',
|
|
604
|
+
triggers: ['visual document retrieval', 'multimodal embeddings', 'screenshots', 'PDF evidence', 'proof artifacts'],
|
|
605
|
+
recommendedFlow: [
|
|
606
|
+
'Plan the corpus and Matryoshka dimension budget.',
|
|
607
|
+
'Baseline text-only retrieval before finetuning.',
|
|
608
|
+
'Evaluate NDCG@10 on visual hard negatives.',
|
|
609
|
+
'Require artifact links before using retrieved evidence in claims.',
|
|
610
|
+
],
|
|
611
|
+
contextUrl: buildPublicUrl(hostedConfig, '/public/llm-context.md'),
|
|
612
|
+
proofUrl: VERIFICATION_EVIDENCE_URL,
|
|
613
|
+
},
|
|
614
|
+
];
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
function getMcpApplications(hostedConfig) {
|
|
618
|
+
return [
|
|
619
|
+
{
|
|
620
|
+
name: 'dashboard',
|
|
621
|
+
title: 'ThumbGate Dashboard',
|
|
622
|
+
description: 'Review feedback, gates, blocked actions, funnel metrics, and proof.',
|
|
623
|
+
url: buildPublicUrl(hostedConfig, '/dashboard'),
|
|
624
|
+
useWhen: 'Need proof before approving more autonomy.',
|
|
625
|
+
},
|
|
626
|
+
{
|
|
627
|
+
name: 'lessons',
|
|
628
|
+
title: 'Lessons',
|
|
629
|
+
description: 'Browse promoted lessons and corrective actions.',
|
|
630
|
+
url: buildPublicUrl(hostedConfig, '/lessons'),
|
|
631
|
+
useWhen: 'Need human-approved context before risk.',
|
|
632
|
+
},
|
|
633
|
+
{
|
|
634
|
+
name: 'guide',
|
|
635
|
+
title: 'Setup Guide',
|
|
636
|
+
description: 'Install ThumbGate for Claude Code, Cursor, Codex, Gemini CLI, Amp, OpenCode, and MCP agents.',
|
|
637
|
+
url: buildPublicUrl(hostedConfig, '/guide'),
|
|
638
|
+
useWhen: 'Need setup without searching the repo.',
|
|
639
|
+
},
|
|
640
|
+
{
|
|
641
|
+
name: 'workflow-sprint-intake',
|
|
642
|
+
title: 'Workflow Hardening Sprint Intake',
|
|
643
|
+
description: 'Submit a repeated agent failure for a proof-backed sprint.',
|
|
644
|
+
url: buildPublicUrl(hostedConfig, '/#workflow-sprint-intake'),
|
|
645
|
+
useWhen: 'Ready to convert mistakes into gates.',
|
|
646
|
+
},
|
|
647
|
+
];
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
function getMcpDiscoveryManifest(hostedConfig) {
|
|
651
|
+
return {
|
|
652
|
+
schemaVersion: '2026-04-20',
|
|
653
|
+
name: 'thumbgate',
|
|
654
|
+
title: 'ThumbGate',
|
|
655
|
+
version: pkg.version,
|
|
656
|
+
description: 'Pre-Action Gates for AI coding agents: feedback, recall, prevention rules, and tool-call blocking.',
|
|
657
|
+
homepage: hostedConfig.appOrigin,
|
|
658
|
+
repository: 'https://github.com/IgorGanapolsky/ThumbGate',
|
|
659
|
+
package: {
|
|
660
|
+
registry: 'npm',
|
|
661
|
+
name: 'thumbgate',
|
|
662
|
+
installCommand: 'npx thumbgate init',
|
|
663
|
+
},
|
|
664
|
+
transport: {
|
|
665
|
+
type: 'streamable-http',
|
|
666
|
+
endpoint: buildPublicUrl(hostedConfig, '/mcp'),
|
|
667
|
+
unauthenticatedDiscovery: ['initialize', 'tools/list'],
|
|
668
|
+
authenticatedMethods: ['tools/call'],
|
|
669
|
+
},
|
|
670
|
+
discovery: {
|
|
671
|
+
serverCardUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/server-card.json'),
|
|
672
|
+
toolIndexUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/tools.json'),
|
|
673
|
+
toolSchemaUrlTemplate: buildPublicUrl(hostedConfig, '/.well-known/mcp/tools/{name}.json'),
|
|
674
|
+
skillsUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/skills.json'),
|
|
675
|
+
applicationsUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/applications.json'),
|
|
676
|
+
llmsTxtUrl: buildPublicUrl(hostedConfig, '/.well-known/llms.txt'),
|
|
677
|
+
progressive: {
|
|
678
|
+
pattern: 'Load manifest, inspect tools.json, fetch one tool schema only when needed.',
|
|
679
|
+
tokenStrategy: 'Do not preload every inputSchema. Use per-tool schema URLs.',
|
|
680
|
+
},
|
|
681
|
+
},
|
|
682
|
+
primaryFlows: [
|
|
683
|
+
{
|
|
684
|
+
name: 'capture-to-gate',
|
|
685
|
+
description: 'Capture feedback, retrieve lessons, generate rules, enforce a gate.',
|
|
686
|
+
tools: ['capture_feedback', 'search_lessons', 'prevention_rules', 'gate_stats'],
|
|
687
|
+
},
|
|
688
|
+
{
|
|
689
|
+
name: 'safe-autonomous-work',
|
|
690
|
+
description: 'Plan high-risk work, recall lessons, diagnose failures.',
|
|
691
|
+
tools: ['plan_intent', 'recall', 'diagnose_failure', 'feedback_summary'],
|
|
692
|
+
},
|
|
693
|
+
{
|
|
694
|
+
name: 'team-rollout-proof',
|
|
695
|
+
description: 'Show dashboard evidence, metrics, and sprint proof.',
|
|
696
|
+
tools: ['dashboard', 'get_business_metrics', 'construct_context_pack'],
|
|
697
|
+
},
|
|
698
|
+
{
|
|
699
|
+
name: 'metric-autoresearch',
|
|
700
|
+
description: 'Run bounded baseline -> hypothesis -> holdout loops with keep/discard proof.',
|
|
701
|
+
tools: ['get_business_metrics', 'construct_context_pack', 'run_autoresearch', 'require_evidence_for_claim'],
|
|
702
|
+
},
|
|
703
|
+
{
|
|
704
|
+
name: 'visual-proof-retrieval',
|
|
705
|
+
description: 'Plan screenshot/PDF/proof-artifact retrieval before investing in multimodal finetuning.',
|
|
706
|
+
tools: ['plan_multimodal_retrieval', 'search_thumbgate', 'construct_context_pack', 'require_evidence_for_claim'],
|
|
707
|
+
},
|
|
708
|
+
],
|
|
709
|
+
skills: getMcpSkillManifests(hostedConfig),
|
|
710
|
+
applications: getMcpApplications(hostedConfig),
|
|
711
|
+
proof: {
|
|
712
|
+
verificationEvidenceUrl: VERIFICATION_EVIDENCE_URL,
|
|
713
|
+
llmContextUrl: buildPublicUrl(hostedConfig, '/public/llm-context.md'),
|
|
714
|
+
},
|
|
715
|
+
};
|
|
716
|
+
}
|
|
717
|
+
|
|
555
718
|
function createHttpError(statusCode, message) {
|
|
556
719
|
const err = new Error(message);
|
|
557
720
|
err.statusCode = statusCode;
|
|
@@ -3904,7 +4067,85 @@ async function addContext(){
|
|
|
3904
4067
|
return;
|
|
3905
4068
|
}
|
|
3906
4069
|
|
|
4070
|
+
if (isGetLikeRequest && pathname === '/.well-known/mcp.json') {
|
|
4071
|
+
sendJson(res, 200, getMcpDiscoveryManifest(hostedConfig), {}, {
|
|
4072
|
+
headOnly: isHeadRequest,
|
|
4073
|
+
});
|
|
4074
|
+
return;
|
|
4075
|
+
}
|
|
4076
|
+
|
|
4077
|
+
if (isGetLikeRequest && pathname === '/.well-known/mcp/tools.json') {
|
|
4078
|
+
sendJson(res, 200, {
|
|
4079
|
+
name: 'thumbgate',
|
|
4080
|
+
version: pkg.version,
|
|
4081
|
+
count: MCP_TOOLS.length,
|
|
4082
|
+
tools: getToolDiscoveryIndex(hostedConfig),
|
|
4083
|
+
}, {}, {
|
|
4084
|
+
headOnly: isHeadRequest,
|
|
4085
|
+
});
|
|
4086
|
+
return;
|
|
4087
|
+
}
|
|
4088
|
+
|
|
4089
|
+
if (isGetLikeRequest && pathname.startsWith('/.well-known/mcp/tools/') && pathname.endsWith('.json')) {
|
|
4090
|
+
const encodedToolName = pathname.slice('/.well-known/mcp/tools/'.length, -'.json'.length);
|
|
4091
|
+
let toolName = encodedToolName;
|
|
4092
|
+
try {
|
|
4093
|
+
toolName = decodeURIComponent(encodedToolName);
|
|
4094
|
+
} catch (_err) {
|
|
4095
|
+
sendJson(res, 400, {
|
|
4096
|
+
error: 'invalid_tool_name',
|
|
4097
|
+
toolIndexUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/tools.json'),
|
|
4098
|
+
}, {}, {
|
|
4099
|
+
headOnly: isHeadRequest,
|
|
4100
|
+
});
|
|
4101
|
+
return;
|
|
4102
|
+
}
|
|
4103
|
+
const tool = MCP_TOOLS.find((candidate) => candidate.name === toolName);
|
|
4104
|
+
if (!tool) {
|
|
4105
|
+
sendJson(res, 404, {
|
|
4106
|
+
error: 'tool_not_found',
|
|
4107
|
+
toolName,
|
|
4108
|
+
toolIndexUrl: buildPublicUrl(hostedConfig, '/.well-known/mcp/tools.json'),
|
|
4109
|
+
}, {}, {
|
|
4110
|
+
headOnly: isHeadRequest,
|
|
4111
|
+
});
|
|
4112
|
+
return;
|
|
4113
|
+
}
|
|
4114
|
+
sendJson(res, 200, {
|
|
4115
|
+
name: tool.name,
|
|
4116
|
+
description: tool.description,
|
|
4117
|
+
annotations: tool.annotations || {},
|
|
4118
|
+
inputSchema: tool.inputSchema,
|
|
4119
|
+
}, {}, {
|
|
4120
|
+
headOnly: isHeadRequest,
|
|
4121
|
+
});
|
|
4122
|
+
return;
|
|
4123
|
+
}
|
|
4124
|
+
|
|
4125
|
+
if (isGetLikeRequest && pathname === '/.well-known/mcp/skills.json') {
|
|
4126
|
+
sendJson(res, 200, {
|
|
4127
|
+
name: 'thumbgate',
|
|
4128
|
+
version: pkg.version,
|
|
4129
|
+
skills: getMcpSkillManifests(hostedConfig),
|
|
4130
|
+
}, {}, {
|
|
4131
|
+
headOnly: isHeadRequest,
|
|
4132
|
+
});
|
|
4133
|
+
return;
|
|
4134
|
+
}
|
|
4135
|
+
|
|
4136
|
+
if (isGetLikeRequest && pathname === '/.well-known/mcp/applications.json') {
|
|
4137
|
+
sendJson(res, 200, {
|
|
4138
|
+
name: 'thumbgate',
|
|
4139
|
+
version: pkg.version,
|
|
4140
|
+
applications: getMcpApplications(hostedConfig),
|
|
4141
|
+
}, {}, {
|
|
4142
|
+
headOnly: isHeadRequest,
|
|
4143
|
+
});
|
|
4144
|
+
return;
|
|
4145
|
+
}
|
|
4146
|
+
|
|
3907
4147
|
if (isGetLikeRequest && pathname === '/.well-known/mcp/server-card.json') {
|
|
4148
|
+
const discoveryManifest = getMcpDiscoveryManifest(hostedConfig);
|
|
3908
4149
|
sendJson(res, 200, {
|
|
3909
4150
|
serverInfo: {
|
|
3910
4151
|
name: 'thumbgate',
|
|
@@ -3913,7 +4154,12 @@ async function addContext(){
|
|
|
3913
4154
|
name: 'thumbgate',
|
|
3914
4155
|
description: 'Pre-action gates that physically block AI coding agents from repeating known mistakes. Captures feedback, auto-promotes failures into prevention rules, and enforces them via PreToolUse hooks. Works with Claude Code, Codex, Gemini, Amp, Cursor, OpenCode, and any MCP-compatible agent.',
|
|
3915
4156
|
version: pkg.version,
|
|
4157
|
+
transport: discoveryManifest.transport,
|
|
4158
|
+
discovery: discoveryManifest.discovery,
|
|
3916
4159
|
tools: getServerCardTools(),
|
|
4160
|
+
skills: getMcpSkillManifests(hostedConfig),
|
|
4161
|
+
applications: getMcpApplications(hostedConfig),
|
|
4162
|
+
proof: discoveryManifest.proof,
|
|
3917
4163
|
repository: 'https://github.com/IgorGanapolsky/ThumbGate',
|
|
3918
4164
|
homepage: hostedConfig.appOrigin,
|
|
3919
4165
|
}, {}, {
|