thumbgate 1.7.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/llms.txt +4 -0
- package/.well-known/mcp/server-card.json +9 -226
- package/adapters/README.md +1 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +95 -1
- package/adapters/opencode/opencode.json +1 -1
- package/config/mcp-allowlists.json +15 -1
- package/package.json +13 -6
- package/public/index.html +2 -2
- package/scripts/agent-readiness.js +1 -0
- package/scripts/autonomous-workflow.js +377 -0
- package/scripts/autoresearch-runner.js +228 -0
- package/scripts/billing.js +4 -2
- package/scripts/mailer/resend-mailer.js +210 -40
- package/scripts/multimodal-retrieval-plan.js +110 -0
- package/scripts/statusline-context.js +207 -0
- package/scripts/statusline.sh +31 -14
- package/scripts/tool-registry.js +76 -0
- package/src/api/server.js +246 -0
- package/CHANGELOG.md +0 -702
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const fs = require('node:fs');
|
|
5
|
+
const path = require('node:path');
|
|
6
|
+
|
|
7
|
+
const { ensureDir } = require('./fs-utils');
|
|
8
|
+
const {
|
|
9
|
+
executeJob,
|
|
10
|
+
readJobState,
|
|
11
|
+
resumeJob,
|
|
12
|
+
} = require('./async-job-runner');
|
|
13
|
+
const {
|
|
14
|
+
createCheckpoint,
|
|
15
|
+
advanceCheckpoint,
|
|
16
|
+
loadCheckpoint,
|
|
17
|
+
saveCheckpoint,
|
|
18
|
+
} = require('./workflow-gate-checkpoint');
|
|
19
|
+
const { appendWorkflowRun } = require('./workflow-runs');
|
|
20
|
+
|
|
21
|
+
function normalizeText(value) {
|
|
22
|
+
if (value === undefined || value === null) return '';
|
|
23
|
+
return String(value).trim();
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function slugify(value, fallback = 'workflow') {
|
|
27
|
+
// Avoid any `-+` quantifier in an edge-anchored regex (Sonar javascript:S5852
|
|
28
|
+
// still flags even the anchored form). Strip edge dashes with a linear scan.
|
|
29
|
+
const collapsed = normalizeText(value).toLowerCase().replace(/[^a-z0-9]+/g, '-');
|
|
30
|
+
let start = 0;
|
|
31
|
+
let end = collapsed.length;
|
|
32
|
+
while (start < end && collapsed.charCodeAt(start) === 45) start += 1;
|
|
33
|
+
while (end > start && collapsed.charCodeAt(end - 1) === 45) end -= 1;
|
|
34
|
+
const normalized = collapsed.slice(start, end);
|
|
35
|
+
return normalized || fallback;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function getWorkflowPaths(workflowId, cwd = process.cwd()) {
|
|
39
|
+
const rootDir = path.join(cwd, '.thumbgate', 'autonomous-workflows', workflowId);
|
|
40
|
+
return {
|
|
41
|
+
rootDir,
|
|
42
|
+
checkpointPath: path.join(rootDir, 'checkpoint.json'),
|
|
43
|
+
reportJsonPath: path.join(rootDir, 'report.json'),
|
|
44
|
+
reportMdPath: path.join(rootDir, 'report.md'),
|
|
45
|
+
planPath: path.join(rootDir, 'plan.json'),
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
function normalizePlan(input, workflowId) {
|
|
50
|
+
if (Array.isArray(input)) {
|
|
51
|
+
return {
|
|
52
|
+
workflowId,
|
|
53
|
+
summary: input.map((step) => normalizeText(step)).filter(Boolean).join(' | ') || 'Execution plan ready',
|
|
54
|
+
steps: input
|
|
55
|
+
.map((step, index) => ({
|
|
56
|
+
id: `step_${index + 1}`,
|
|
57
|
+
description: normalizeText(step),
|
|
58
|
+
}))
|
|
59
|
+
.filter((step) => step.description),
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (input && typeof input === 'object') {
|
|
64
|
+
const steps = Array.isArray(input.steps)
|
|
65
|
+
? input.steps
|
|
66
|
+
.map((step, index) => {
|
|
67
|
+
if (typeof step === 'string') {
|
|
68
|
+
return {
|
|
69
|
+
id: `step_${index + 1}`,
|
|
70
|
+
description: normalizeText(step),
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (step && typeof step === 'object') {
|
|
75
|
+
return {
|
|
76
|
+
id: normalizeText(step.id) || `step_${index + 1}`,
|
|
77
|
+
description: normalizeText(step.description || step.summary || step.name),
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return null;
|
|
82
|
+
})
|
|
83
|
+
.filter(Boolean)
|
|
84
|
+
: [];
|
|
85
|
+
|
|
86
|
+
return {
|
|
87
|
+
workflowId,
|
|
88
|
+
summary: normalizeText(input.summary) || steps.map((step) => step.description).join(' | ') || 'Execution plan ready',
|
|
89
|
+
steps,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
const summary = normalizeText(input) || 'Execution plan ready';
|
|
94
|
+
return {
|
|
95
|
+
workflowId,
|
|
96
|
+
summary,
|
|
97
|
+
steps: summary ? [{ id: 'step_1', description: summary }] : [],
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function buildDefaultPlan(spec, workflowId) {
|
|
102
|
+
const executionSteps = Array.isArray(spec.stages)
|
|
103
|
+
? spec.stages.map((stage, index) => normalizeText(stage && (stage.name || stage.context || stage.command)) || `Stage ${index + 1}`)
|
|
104
|
+
: [];
|
|
105
|
+
|
|
106
|
+
return normalizePlan({
|
|
107
|
+
summary: normalizeText(spec.planSummary) || `Run ${executionSteps.length || 0} execution stage(s) and verify output`,
|
|
108
|
+
steps: [
|
|
109
|
+
{ id: 'intent', description: normalizeText(spec.intent) || 'Intent captured' },
|
|
110
|
+
{ id: 'plan', description: 'Execution plan generated' },
|
|
111
|
+
...executionSteps.map((description, index) => ({
|
|
112
|
+
id: `execute_${index + 1}`,
|
|
113
|
+
description,
|
|
114
|
+
})),
|
|
115
|
+
{ id: 'verify', description: 'Verification loop completed' },
|
|
116
|
+
{ id: 'report', description: 'Evidence-backed report recorded' },
|
|
117
|
+
],
|
|
118
|
+
workflowId,
|
|
119
|
+
}, workflowId);
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function resolvePlan(spec, workflowId) {
|
|
123
|
+
if (typeof spec.plan === 'function') {
|
|
124
|
+
return normalizePlan(spec.plan(spec), workflowId);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (spec.plan) {
|
|
128
|
+
return normalizePlan(spec.plan, workflowId);
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return buildDefaultPlan(spec, workflowId);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function buildExecutionJob(spec, workflowId, paths, plan) {
|
|
135
|
+
return {
|
|
136
|
+
id: spec.jobId || `${workflowId}-execution`,
|
|
137
|
+
tags: Array.isArray(spec.tags) ? spec.tags : [],
|
|
138
|
+
skill: spec.skill || 'autonomous-workflow',
|
|
139
|
+
partnerProfile: spec.partnerProfile || null,
|
|
140
|
+
verificationMode: spec.verificationMode === 'none' ? 'none' : 'standard',
|
|
141
|
+
autoImprove: spec.autoImprove !== false,
|
|
142
|
+
recordFeedback: spec.recordFeedback !== false,
|
|
143
|
+
stages: Array.isArray(spec.stages) ? spec.stages : [],
|
|
144
|
+
metadata: {
|
|
145
|
+
workflowId,
|
|
146
|
+
planSummary: plan.summary,
|
|
147
|
+
workflowRoot: paths.rootDir,
|
|
148
|
+
},
|
|
149
|
+
};
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
function writeWorkflowPlan(paths, plan) {
|
|
153
|
+
ensureDir(paths.rootDir);
|
|
154
|
+
fs.writeFileSync(paths.planPath, `${JSON.stringify(plan, null, 2)}\n`, 'utf8');
|
|
155
|
+
return paths.planPath;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function collectEvidenceArtifacts(paths, executionResult, extraArtifacts = []) {
|
|
159
|
+
return [
|
|
160
|
+
paths.checkpointPath,
|
|
161
|
+
paths.planPath,
|
|
162
|
+
paths.reportJsonPath,
|
|
163
|
+
paths.reportMdPath,
|
|
164
|
+
executionResult && executionResult.jobStatePath ? executionResult.jobStatePath : null,
|
|
165
|
+
...extraArtifacts,
|
|
166
|
+
].filter(Boolean);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function writeWorkflowReport(paths, report) {
|
|
170
|
+
ensureDir(paths.rootDir);
|
|
171
|
+
fs.writeFileSync(paths.reportJsonPath, `${JSON.stringify(report, null, 2)}\n`, 'utf8');
|
|
172
|
+
|
|
173
|
+
const markdown = [
|
|
174
|
+
`# ${report.workflowName}`,
|
|
175
|
+
'',
|
|
176
|
+
`- Workflow ID: ${report.workflowId}`,
|
|
177
|
+
`- Status: ${report.status}`,
|
|
178
|
+
`- Intent: ${report.intent}`,
|
|
179
|
+
`- Verification accepted: ${report.verification ? String(report.verification.accepted) : 'skipped'}`,
|
|
180
|
+
`- Evidence artifacts: ${report.evidenceArtifacts.length}`,
|
|
181
|
+
'',
|
|
182
|
+
'## Plan',
|
|
183
|
+
'',
|
|
184
|
+
report.plan.summary,
|
|
185
|
+
'',
|
|
186
|
+
...report.plan.steps.map((step) => `- ${step.id}: ${step.description}`),
|
|
187
|
+
'',
|
|
188
|
+
'## Execution',
|
|
189
|
+
'',
|
|
190
|
+
...report.execution.stageHistory.map((stage) => `- ${stage.name} @ ${stage.completedAt}`),
|
|
191
|
+
'',
|
|
192
|
+
'## Evidence Artifacts',
|
|
193
|
+
'',
|
|
194
|
+
...report.evidenceArtifacts.map((artifact) => `- ${artifact}`),
|
|
195
|
+
].join('\n');
|
|
196
|
+
|
|
197
|
+
fs.writeFileSync(paths.reportMdPath, `${markdown}\n`, 'utf8');
|
|
198
|
+
return {
|
|
199
|
+
json: paths.reportJsonPath,
|
|
200
|
+
markdown: paths.reportMdPath,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function recordAutonomousWorkflowRun(spec, report, evidenceArtifacts, feedbackDir) {
|
|
205
|
+
const proofBacked = report.status === 'completed'
|
|
206
|
+
&& (!report.verification || report.verification.accepted)
|
|
207
|
+
&& evidenceArtifacts.length > 0;
|
|
208
|
+
|
|
209
|
+
return appendWorkflowRun({
|
|
210
|
+
workflowId: report.workflowId,
|
|
211
|
+
workflowName: report.workflowName,
|
|
212
|
+
owner: spec.owner || 'automation',
|
|
213
|
+
runtime: 'node',
|
|
214
|
+
status: report.status,
|
|
215
|
+
customerType: spec.customerType || 'internal_dogfood',
|
|
216
|
+
teamId: spec.teamId || null,
|
|
217
|
+
reviewed: proofBacked,
|
|
218
|
+
reviewedBy: proofBacked ? (spec.reviewedBy || 'automation') : null,
|
|
219
|
+
proofBacked,
|
|
220
|
+
proofArtifacts: evidenceArtifacts,
|
|
221
|
+
source: spec.source || 'autonomous-workflow',
|
|
222
|
+
metadata: {
|
|
223
|
+
intent: report.intent,
|
|
224
|
+
planSummary: report.plan.summary,
|
|
225
|
+
verificationAttempts: report.verification ? report.verification.attempts : 0,
|
|
226
|
+
executionJobId: report.execution.jobId,
|
|
227
|
+
},
|
|
228
|
+
}, feedbackDir);
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
function runAutonomousWorkflow(spec = {}, options = {}) {
|
|
232
|
+
const cwd = options.cwd || process.cwd();
|
|
233
|
+
const workflowId = normalizeText(spec.workflowId) || slugify(spec.name || spec.intent, 'autonomous-workflow');
|
|
234
|
+
const workflowName = normalizeText(spec.name) || `Autonomous workflow ${workflowId}`;
|
|
235
|
+
const intent = normalizeText(spec.intent) || 'Intent not provided';
|
|
236
|
+
const paths = getWorkflowPaths(workflowId, cwd);
|
|
237
|
+
const plan = resolvePlan(spec, workflowId);
|
|
238
|
+
|
|
239
|
+
writeWorkflowPlan(paths, plan);
|
|
240
|
+
|
|
241
|
+
let checkpoint = createCheckpoint({
|
|
242
|
+
workflowId,
|
|
243
|
+
phase: 'intent',
|
|
244
|
+
status: 'running',
|
|
245
|
+
intent: { summary: intent },
|
|
246
|
+
plan,
|
|
247
|
+
evidence: [paths.planPath],
|
|
248
|
+
metadata: {
|
|
249
|
+
workflowName,
|
|
250
|
+
},
|
|
251
|
+
});
|
|
252
|
+
saveCheckpoint(checkpoint, paths.checkpointPath);
|
|
253
|
+
|
|
254
|
+
checkpoint = advanceCheckpoint(checkpoint, {
|
|
255
|
+
phase: 'plan',
|
|
256
|
+
status: 'running',
|
|
257
|
+
plan,
|
|
258
|
+
evidence: [paths.planPath],
|
|
259
|
+
});
|
|
260
|
+
saveCheckpoint(checkpoint, paths.checkpointPath);
|
|
261
|
+
|
|
262
|
+
const job = buildExecutionJob(spec, workflowId, paths, plan);
|
|
263
|
+
const executionResult = options.resume === true
|
|
264
|
+
? resumeJob(job.id, job)
|
|
265
|
+
: executeJob(job);
|
|
266
|
+
const jobState = readJobState(job.id);
|
|
267
|
+
|
|
268
|
+
checkpoint = advanceCheckpoint(checkpoint, {
|
|
269
|
+
phase: 'verify',
|
|
270
|
+
status: executionResult.status,
|
|
271
|
+
evidence: jobState && jobState.verification ? [paths.checkpointPath] : [],
|
|
272
|
+
metadata: {
|
|
273
|
+
executionJobId: job.id,
|
|
274
|
+
executionStatus: executionResult.status,
|
|
275
|
+
},
|
|
276
|
+
});
|
|
277
|
+
saveCheckpoint(checkpoint, paths.checkpointPath);
|
|
278
|
+
|
|
279
|
+
const report = {
|
|
280
|
+
workflowId,
|
|
281
|
+
workflowName,
|
|
282
|
+
status: executionResult.status,
|
|
283
|
+
intent,
|
|
284
|
+
plan,
|
|
285
|
+
execution: {
|
|
286
|
+
jobId: job.id,
|
|
287
|
+
status: executionResult.status,
|
|
288
|
+
stageHistory: Array.isArray(jobState && jobState.stageHistory) ? jobState.stageHistory : [],
|
|
289
|
+
checkpointCount: Array.isArray(jobState && jobState.checkpoints) ? jobState.checkpoints.length : 0,
|
|
290
|
+
currentContext: jobState && jobState.currentContext ? jobState.currentContext : '',
|
|
291
|
+
jobStatePath: jobState ? path.join(getFeedbackDir(options.feedbackDir), 'jobs', job.id, 'state.json') : null,
|
|
292
|
+
},
|
|
293
|
+
verification: executionResult.phases ? executionResult.phases.verification : null,
|
|
294
|
+
phases: executionResult.phases || null,
|
|
295
|
+
timestamp: new Date().toISOString(),
|
|
296
|
+
evidenceArtifacts: [],
|
|
297
|
+
};
|
|
298
|
+
|
|
299
|
+
const evidenceArtifacts = collectEvidenceArtifacts(paths, report.execution, spec.proofArtifacts);
|
|
300
|
+
report.evidenceArtifacts = evidenceArtifacts;
|
|
301
|
+
|
|
302
|
+
checkpoint = advanceCheckpoint(checkpoint, {
|
|
303
|
+
phase: 'report',
|
|
304
|
+
status: executionResult.status,
|
|
305
|
+
report: {
|
|
306
|
+
status: report.status,
|
|
307
|
+
generatedAt: report.timestamp,
|
|
308
|
+
},
|
|
309
|
+
evidence: evidenceArtifacts,
|
|
310
|
+
});
|
|
311
|
+
saveCheckpoint(checkpoint, paths.checkpointPath);
|
|
312
|
+
|
|
313
|
+
writeWorkflowReport(paths, report);
|
|
314
|
+
report.workflowRun = recordAutonomousWorkflowRun(spec, report, evidenceArtifacts, options.feedbackDir);
|
|
315
|
+
fs.writeFileSync(paths.reportJsonPath, `${JSON.stringify(report, null, 2)}\n`, 'utf8');
|
|
316
|
+
|
|
317
|
+
return report;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
function getFeedbackDir(feedbackDir) {
|
|
321
|
+
if (feedbackDir) return feedbackDir;
|
|
322
|
+
return process.env.THUMBGATE_FEEDBACK_DIR || path.join(process.cwd(), '.thumbgate');
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function resumeAutonomousWorkflow(spec = {}, options = {}) {
|
|
326
|
+
return runAutonomousWorkflow(spec, { ...options, resume: true });
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
function readWorkflowReport(workflowId, options = {}) {
|
|
330
|
+
const paths = getWorkflowPaths(workflowId, options.cwd || process.cwd());
|
|
331
|
+
if (!fs.existsSync(paths.reportJsonPath)) return null;
|
|
332
|
+
return JSON.parse(fs.readFileSync(paths.reportJsonPath, 'utf8'));
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
function isCliInvocation(argv = process.argv) {
|
|
336
|
+
const invokedPath = argv[1];
|
|
337
|
+
return invokedPath ? path.resolve(invokedPath) === __filename : false;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
function parseArgs(argv = process.argv.slice(2)) {
|
|
341
|
+
const args = {};
|
|
342
|
+
for (const arg of argv) {
|
|
343
|
+
if (!arg.startsWith('--')) continue;
|
|
344
|
+
const [key, ...rest] = arg.slice(2).split('=');
|
|
345
|
+
args[key] = rest.length > 0 ? rest.join('=') : true;
|
|
346
|
+
}
|
|
347
|
+
return args;
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
if (isCliInvocation()) {
|
|
351
|
+
const args = parseArgs();
|
|
352
|
+
if (!args.file) {
|
|
353
|
+
console.error('Usage: node scripts/autonomous-workflow.js --file=workflow.json [--resume]');
|
|
354
|
+
process.exit(1);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
const specPath = path.resolve(args.file);
|
|
358
|
+
const spec = JSON.parse(fs.readFileSync(specPath, 'utf8'));
|
|
359
|
+
const report = args.resume ? resumeAutonomousWorkflow(spec) : runAutonomousWorkflow(spec);
|
|
360
|
+
console.log(JSON.stringify(report, null, 2));
|
|
361
|
+
process.exit(report.status === 'completed' ? 0 : 1);
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
module.exports = {
|
|
365
|
+
buildDefaultPlan,
|
|
366
|
+
collectEvidenceArtifacts,
|
|
367
|
+
getWorkflowPaths,
|
|
368
|
+
normalizePlan,
|
|
369
|
+
parseArgs,
|
|
370
|
+
readWorkflowReport,
|
|
371
|
+
recordAutonomousWorkflowRun,
|
|
372
|
+
resumeAutonomousWorkflow,
|
|
373
|
+
runAutonomousWorkflow,
|
|
374
|
+
slugify,
|
|
375
|
+
writeWorkflowPlan,
|
|
376
|
+
writeWorkflowReport,
|
|
377
|
+
};
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
/**
|
|
4
|
+
* Autoresearch Runner (AUTORESEARCH-02)
|
|
5
|
+
*
|
|
6
|
+
* Karpathy-inspired self-optimizing loop for the ThumbGate feedback studio.
|
|
7
|
+
* Each iteration: mutate local evolution state → run primary + holdout checks
|
|
8
|
+
* → measure score → keep/discard with rollback snapshots.
|
|
9
|
+
*
|
|
10
|
+
* The runner never rewrites tracked source files. It mutates the local
|
|
11
|
+
* evolution-state overlay, evaluates in place, and only persists accepted
|
|
12
|
+
* settings plus rollback snapshots.
|
|
13
|
+
*
|
|
14
|
+
* Mutation targets (in priority order):
|
|
15
|
+
* 1. Thompson Sampling priors (HALF_LIFE_DAYS, DECAY_FLOOR)
|
|
16
|
+
* 2. Prevention rule thresholds (minOccurrences)
|
|
17
|
+
* 3. Verification loop retries (MAX_RETRIES)
|
|
18
|
+
* 4. DPO temperature (DPO_BETA)
|
|
19
|
+
*
|
|
20
|
+
* Score function: command pass rate × approval weighting, with holdout gating.
|
|
21
|
+
*
|
|
22
|
+
* Zero external dependencies.
|
|
23
|
+
*
|
|
24
|
+
* Exports: runIteration, runLoop, scoreSuite, MUTATION_TARGETS
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
const {
|
|
28
|
+
getProgress,
|
|
29
|
+
} = require('./experiment-tracker');
|
|
30
|
+
const { buildResearchBrief } = require('./hf-papers');
|
|
31
|
+
const {
|
|
32
|
+
EVOLUTION_TARGETS,
|
|
33
|
+
parseCommandScore,
|
|
34
|
+
runWorkspaceEvolution,
|
|
35
|
+
} = require('./workspace-evolver');
|
|
36
|
+
|
|
37
|
+
// ---------------------------------------------------------------------------
|
|
38
|
+
// Mutation Targets
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
|
|
41
|
+
const MUTATION_TARGETS = EVOLUTION_TARGETS;
|
|
42
|
+
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// Score Function
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Score a test suite run. Returns a number in [0, 1].
|
|
49
|
+
*
|
|
50
|
+
* @param {object} params
|
|
51
|
+
* @param {string} params.testOutput - stdout from test run
|
|
52
|
+
* @param {number} [params.approvalRate] - Current approval rate from feedback
|
|
53
|
+
* @returns {{ score: number, testPassRate: number, details: object }}
|
|
54
|
+
*/
|
|
55
|
+
function scoreSuite(params) {
|
|
56
|
+
return parseCommandScore(params.testOutput || '', 0, typeof params.approvalRate === 'number' ? params.approvalRate : 0.5);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Single Iteration
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Run one autoresearch iteration.
|
|
65
|
+
*
|
|
66
|
+
* 1. Pick a random mutation target
|
|
67
|
+
* 2. Read current value, compute a random neighbor
|
|
68
|
+
* 3. Run the test suite in a tmp env with the mutation
|
|
69
|
+
* 4. Score and keep/discard via experiment tracker
|
|
70
|
+
*
|
|
71
|
+
* @param {object} [opts]
|
|
72
|
+
* @param {string} [opts.targetName] - Force a specific mutation target
|
|
73
|
+
* @param {number} [opts.nextValue] - Force the candidate value instead of a random neighbor
|
|
74
|
+
* @param {string} [opts.testCommand] - Override test command (default: npm test)
|
|
75
|
+
* @param {string[]} [opts.holdoutCommands] - Optional holdout commands required for acceptance
|
|
76
|
+
* @param {number} [opts.timeoutMs] - Test timeout in ms (default: 120000)
|
|
77
|
+
* @param {string} [opts.cwd] - Working directory for evaluation commands
|
|
78
|
+
* @param {string} [opts.researchQuery] - Optional external research query
|
|
79
|
+
* @param {number} [opts.paperLimit] - Max papers to ingest for research context
|
|
80
|
+
* @param {Function} [opts.fetchImpl] - Optional fetch implementation override
|
|
81
|
+
* @param {Function} [opts.searchPapersImpl] - Optional paper search override
|
|
82
|
+
* @returns {Promise<object>} experiment result
|
|
83
|
+
*/
|
|
84
|
+
async function runIteration(opts = {}) {
|
|
85
|
+
const options = opts || {};
|
|
86
|
+
const timeoutMs = options.timeoutMs || 120000;
|
|
87
|
+
const testCommand = options.testCommand || 'npm test';
|
|
88
|
+
const research = options.researchQuery
|
|
89
|
+
? await buildResearchBrief({
|
|
90
|
+
query: options.researchQuery,
|
|
91
|
+
limit: options.paperLimit,
|
|
92
|
+
fetchImpl: options.fetchImpl,
|
|
93
|
+
searchPapersImpl: options.searchPapersImpl,
|
|
94
|
+
template: 'autoresearch-brief',
|
|
95
|
+
})
|
|
96
|
+
: null;
|
|
97
|
+
|
|
98
|
+
const result = runWorkspaceEvolution({
|
|
99
|
+
targetName: options.targetName,
|
|
100
|
+
nextValue: options.nextValue,
|
|
101
|
+
primaryCommands: [testCommand],
|
|
102
|
+
holdoutCommands: options.holdoutCommands || [],
|
|
103
|
+
timeoutMs,
|
|
104
|
+
cwd: options.cwd,
|
|
105
|
+
hypothesisSuffix: research ? `Research query: ${research.query}` : null,
|
|
106
|
+
additionalMetrics: {
|
|
107
|
+
researchQuery: research ? research.query : null,
|
|
108
|
+
researchPackId: research ? research.packId : null,
|
|
109
|
+
researchPaperIds: research ? research.citations.map((citation) => citation.paperId).filter(Boolean) : [],
|
|
110
|
+
},
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
return result;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// ---------------------------------------------------------------------------
|
|
117
|
+
// Multi-Iteration Loop
|
|
118
|
+
// ---------------------------------------------------------------------------
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Run N autoresearch iterations.
|
|
122
|
+
*
|
|
123
|
+
* @param {object} params
|
|
124
|
+
* @param {number} params.iterations - Number of experiments to run
|
|
125
|
+
* @param {string} [params.targetName] - Force a specific mutation target
|
|
126
|
+
* @param {number} [params.nextValue] - Force the candidate value instead of a random neighbor
|
|
127
|
+
* @param {string} [params.testCommand] - Override test command
|
|
128
|
+
* @param {string[]} [params.holdoutCommands] - Optional holdout commands required for acceptance
|
|
129
|
+
* @param {number} [params.timeoutMs] - Per-iteration timeout
|
|
130
|
+
* @param {string} [params.cwd] - Working directory for evaluation commands
|
|
131
|
+
* @param {string} [params.researchQuery] - Optional external research query
|
|
132
|
+
* @param {number} [params.paperLimit] - Max papers to ingest for research context
|
|
133
|
+
* @param {Function} [params.fetchImpl] - Optional fetch implementation override
|
|
134
|
+
* @param {Function} [params.searchPapersImpl] - Optional paper search override
|
|
135
|
+
* @returns {Promise<object>} { results, progress }
|
|
136
|
+
*/
|
|
137
|
+
async function runLoop(params) {
|
|
138
|
+
const iterations = params.iterations || 1;
|
|
139
|
+
const results = [];
|
|
140
|
+
|
|
141
|
+
for (let i = 0; i < iterations; i++) {
|
|
142
|
+
console.log(`\n[autoresearch] Iteration ${i + 1}/${iterations}`);
|
|
143
|
+
try {
|
|
144
|
+
const result = await runIteration({
|
|
145
|
+
targetName: params.targetName,
|
|
146
|
+
nextValue: Number.isFinite(params.nextValue) ? params.nextValue : undefined,
|
|
147
|
+
testCommand: params.testCommand,
|
|
148
|
+
holdoutCommands: params.holdoutCommands,
|
|
149
|
+
timeoutMs: params.timeoutMs,
|
|
150
|
+
cwd: params.cwd,
|
|
151
|
+
researchQuery: params.researchQuery,
|
|
152
|
+
paperLimit: params.paperLimit,
|
|
153
|
+
fetchImpl: params.fetchImpl,
|
|
154
|
+
searchPapersImpl: params.searchPapersImpl,
|
|
155
|
+
});
|
|
156
|
+
results.push(result);
|
|
157
|
+
if (result.kept) {
|
|
158
|
+
console.log(` ✓ KEPT: ${result.name} (delta: +${(result.delta || 0).toFixed(4)})`);
|
|
159
|
+
} else if (result.skipped) {
|
|
160
|
+
console.log(` ⊘ SKIPPED: ${result.reason}`);
|
|
161
|
+
} else {
|
|
162
|
+
console.log(` ✗ DISCARDED: ${result.reason}`);
|
|
163
|
+
}
|
|
164
|
+
} catch (err) {
|
|
165
|
+
console.error(` ✗ ERROR: ${err.message}`);
|
|
166
|
+
results.push({ error: err.message });
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const progress = getProgress();
|
|
171
|
+
console.log(`\n[autoresearch] Progress: ${progress.completed} experiments, ${progress.kept} kept (${progress.keepRate}%)`);
|
|
172
|
+
return { results, progress };
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// ---------------------------------------------------------------------------
|
|
176
|
+
// CLI
|
|
177
|
+
// ---------------------------------------------------------------------------
|
|
178
|
+
|
|
179
|
+
if (require.main === module) {
|
|
180
|
+
const args = {};
|
|
181
|
+
process.argv.slice(2).forEach((arg) => {
|
|
182
|
+
if (!arg.startsWith('--')) return;
|
|
183
|
+
const [key, ...rest] = arg.slice(2).split('=');
|
|
184
|
+
args[key] = rest.length > 0 ? rest.join('=') : true;
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
if (args.run) {
|
|
188
|
+
const iterations = Number(args.iterations || 1);
|
|
189
|
+
const testCommand = args['test-command'] || 'npm test';
|
|
190
|
+
const timeoutMs = Number(args.timeout || 120000);
|
|
191
|
+
const paperLimit = Number(args['paper-limit'] || 5);
|
|
192
|
+
const holdoutCommands = args.holdout ? [args.holdout] : [];
|
|
193
|
+
runLoop({
|
|
194
|
+
iterations,
|
|
195
|
+
targetName: args.target || null,
|
|
196
|
+
nextValue: args['next-value'] !== undefined ? Number(args['next-value']) : undefined,
|
|
197
|
+
testCommand,
|
|
198
|
+
holdoutCommands,
|
|
199
|
+
timeoutMs,
|
|
200
|
+
cwd: args.cwd || undefined,
|
|
201
|
+
researchQuery: args['research-query'] || null,
|
|
202
|
+
paperLimit,
|
|
203
|
+
}).catch((error) => {
|
|
204
|
+
console.error(error.message);
|
|
205
|
+
process.exit(1);
|
|
206
|
+
});
|
|
207
|
+
} else if (args.targets) {
|
|
208
|
+
console.log('Mutation targets:');
|
|
209
|
+
MUTATION_TARGETS.forEach((t) => {
|
|
210
|
+
console.log(` ${t.name} (${t.type}): range [${t.range.join(', ')}], step ${t.step}`);
|
|
211
|
+
});
|
|
212
|
+
} else {
|
|
213
|
+
console.log(`Usage:
|
|
214
|
+
node scripts/autoresearch-runner.js --run [--iterations=5] [--target=half_life_days] [--next-value=8] [--test-command="npm test"] [--holdout="npm run self-heal:check"] [--timeout=120000] [--research-query="rank fusion"] [--paper-limit=5]
|
|
215
|
+
node scripts/autoresearch-runner.js --targets`);
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// ---------------------------------------------------------------------------
|
|
220
|
+
// Exports
|
|
221
|
+
// ---------------------------------------------------------------------------
|
|
222
|
+
|
|
223
|
+
module.exports = {
|
|
224
|
+
runIteration,
|
|
225
|
+
runLoop,
|
|
226
|
+
scoreSuite,
|
|
227
|
+
MUTATION_TARGETS,
|
|
228
|
+
};
|
package/scripts/billing.js
CHANGED
|
@@ -511,6 +511,7 @@ function buildTrialActivationEmail({ customerEmail, apiKey, sessionId, planId, a
|
|
|
511
511
|
const origin = resolvePublicAppOrigin(appOrigin);
|
|
512
512
|
const dashboardUrl = joinPublicUrl(origin, '/dashboard');
|
|
513
513
|
const docsUrl = 'https://github.com/IgorGanapolsky/ThumbGate/blob/main/docs/VERIFICATION_EVIDENCE.md';
|
|
514
|
+
const supportEmail = process.env.THUMBGATE_SUPPORT_EMAIL || CONFIG.TRIAL_EMAIL_REPLY_TO || 'igor.ganapolsky@gmail.com';
|
|
514
515
|
const command = `npx thumbgate pro --activate --key=${apiKey || ''}`;
|
|
515
516
|
const subject = 'Your 7-day ThumbGate Pro trial is live';
|
|
516
517
|
const preheader = 'Activate Pro in one command, open the dashboard, and start blocking repeated AI coding mistakes.';
|
|
@@ -519,6 +520,7 @@ function buildTrialActivationEmail({ customerEmail, apiKey, sessionId, planId, a
|
|
|
519
520
|
const exampleFeedback = 'thumbs down: the answer skipped exact files and tests; next time include paths, commands, and verification evidence.';
|
|
520
521
|
const safeDashboardUrl = escapeHtml(dashboardUrl);
|
|
521
522
|
const safeDocsUrl = escapeHtml(docsUrl);
|
|
523
|
+
const safeSupportEmail = escapeHtml(supportEmail);
|
|
522
524
|
const safeCommand = escapeHtml(command);
|
|
523
525
|
const safeApiKey = escapeHtml(apiKey || '');
|
|
524
526
|
return {
|
|
@@ -544,7 +546,7 @@ function buildTrialActivationEmail({ customerEmail, apiKey, sessionId, planId, a
|
|
|
544
546
|
apiKey,
|
|
545
547
|
'',
|
|
546
548
|
`Verification evidence: ${docsUrl}`,
|
|
547
|
-
|
|
549
|
+
`Keep this key private. Questions? Reply to this email or write ${supportEmail}.`,
|
|
548
550
|
sessionId ? `Stripe session: ${sessionId}` : null,
|
|
549
551
|
planId ? `Plan: ${planId}` : null,
|
|
550
552
|
].filter(Boolean).join('\n'),
|
|
@@ -591,7 +593,7 @@ function buildTrialActivationEmail({ customerEmail, apiKey, sessionId, planId, a
|
|
|
591
593
|
|
|
592
594
|
<p style="margin:0;font-size:13px;line-height:1.6;color:#526273;">
|
|
593
595
|
Proof trail: <a href="${safeDocsUrl}" style="color:#087a91;">verification evidence</a>.
|
|
594
|
-
Keep this key private. Questions? Reply here or write <a href="mailto
|
|
596
|
+
Keep this key private. Questions? Reply here or write <a href="mailto:${safeSupportEmail}" style="color:#087a91;">${safeSupportEmail}</a>.
|
|
595
597
|
</p>
|
|
596
598
|
${sessionId ? `<p style="margin:12px 0 0;font-size:12px;line-height:1.5;color:#7a8790;">Stripe session: ${escapeHtml(sessionId)}</p>` : ''}
|
|
597
599
|
</td>
|