thumbgate 1.16.13 → 1.16.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +3 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +26 -1
- package/adapters/opencode/opencode.json +1 -1
- package/bin/cli.js +420 -1
- package/bin/postinstall.js +2 -2
- package/config/gate-templates.json +372 -0
- package/config/mcp-allowlists.json +25 -0
- package/config/model-candidates.json +59 -2
- package/config/model-tiers.json +4 -1
- package/package.json +79 -22
- package/public/compare.html +6 -0
- package/public/index.html +153 -20
- package/public/numbers.html +6 -6
- package/public/pro.html +25 -27
- package/scripts/agent-design-governance.js +211 -0
- package/scripts/agent-reasoning-traces.js +683 -0
- package/scripts/agent-reward-model.js +438 -0
- package/scripts/agent-stack-survival-audit.js +231 -0
- package/scripts/ai-engineering-stack-guardrails.js +256 -0
- package/scripts/billing.js +33 -5
- package/scripts/chatgpt-ads-readiness-pack.js +195 -0
- package/scripts/cli-schema.js +277 -0
- package/scripts/code-graph-guardrails.js +176 -0
- package/scripts/commercial-offer.js +1 -1
- package/scripts/deepseek-v4-runtime-guardrails.js +253 -0
- package/scripts/gemini-embedding-policy.js +198 -0
- package/scripts/inference-cache-policy.js +39 -0
- package/scripts/judge-reward-function.js +396 -0
- package/scripts/llm-behavior-monitor.js +251 -0
- package/scripts/long-running-agent-context-guardrails.js +176 -0
- package/scripts/multimodal-retrieval-plan.js +31 -11
- package/scripts/oss-pr-opportunity-scout.js +240 -0
- package/scripts/proactive-agent-eval-guardrails.js +230 -0
- package/scripts/profile-router.js +5 -4
- package/scripts/prompting-operating-system.js +273 -0
- package/scripts/proxy-pointer-rag-guardrails.js +189 -0
- package/scripts/rag-precision-guardrails.js +202 -0
- package/scripts/rate-limiter.js +1 -1
- package/scripts/reasoning-efficiency-guardrails.js +176 -0
- package/scripts/reward-hacking-guardrails.js +251 -0
- package/scripts/seo-gsd.js +1201 -11
- package/scripts/single-use-credential-gate.js +182 -0
- package/scripts/structured-prompt-driven.js +226 -0
- package/scripts/telemetry-analytics.js +108 -6
- package/scripts/tool-registry.js +92 -0
- package/scripts/upstream-contribution-engine.js +379 -0
- package/scripts/vector-store.js +119 -4
- package/src/api/server.js +455 -143
- package/scripts/agents-sdk-sandbox-plan.js +0 -57
- package/scripts/ai-org-governance.js +0 -98
- package/scripts/artifact-agent-plan.js +0 -81
- package/scripts/enterprise-agent-rollout.js +0 -34
- package/scripts/experience-replay-governance.js +0 -69
- package/scripts/inference-economics.js +0 -53
- package/scripts/knowledge-layer-plan.js +0 -108
- package/scripts/memory-store-governance.js +0 -60
- package/scripts/post-training-governance.js +0 -34
- package/scripts/production-agent-readiness.js +0 -40
- package/scripts/scaling-law-claims.js +0 -60
- package/scripts/student-consistent-training.js +0 -73
|
@@ -1,60 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
'use strict';
|
|
3
|
-
|
|
4
|
-
function normalizeText(value) {
|
|
5
|
-
if (value === undefined || value === null) return '';
|
|
6
|
-
return String(value).trim();
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
function classifyScalingClaim(claim) {
|
|
10
|
-
const text = normalizeText(claim).toLowerCase();
|
|
11
|
-
if (/\b(pretrain|pretraining|parameters|training tokens|flops|cross entropy|test loss)\b/.test(text)) {
|
|
12
|
-
return 'pretraining_scaling';
|
|
13
|
-
}
|
|
14
|
-
if (/\b(rl|reinforcement|feedback|dpo|kto|reward|policy|thumbs[-\s]?(up|down)|gate|prevention rule)\b/.test(text)) {
|
|
15
|
-
return 'feedback_policy_scaling';
|
|
16
|
-
}
|
|
17
|
-
return 'general_scaling';
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
function evaluateScalingClaim(input = {}) {
|
|
21
|
-
const claim = normalizeText(input.claim);
|
|
22
|
-
const claimType = classifyScalingClaim(claim);
|
|
23
|
-
const evidence = Array.isArray(input.evidence) ? input.evidence.filter(Boolean) : [];
|
|
24
|
-
const heldout = evidence.some((entry) => /held[-\s]?out|validation|eval|ablation|backtest/i.test(String(entry)));
|
|
25
|
-
const production = evidence.some((entry) => /production|real user|workflow run|decision journal|blocked action/i.test(String(entry)));
|
|
26
|
-
const rlCompute = evidence.some((entry) => /sampling compute|rollout|trajectory|policy update|reward model|rl compute/i.test(String(entry)));
|
|
27
|
-
const sampling = evidence.some((entry) => /pass@|best-of-n|majority vote|sample budget|sampling/i.test(String(entry)));
|
|
28
|
-
const issues = [];
|
|
29
|
-
|
|
30
|
-
if (!claim) issues.push('missing_claim');
|
|
31
|
-
if (claimType === 'feedback_policy_scaling' && !heldout) {
|
|
32
|
-
issues.push('missing_heldout_feedback_eval');
|
|
33
|
-
}
|
|
34
|
-
if (claimType === 'feedback_policy_scaling' && /rl|reinforcement|sampling/i.test(claim) && !rlCompute) {
|
|
35
|
-
issues.push('missing_rl_compute_evidence');
|
|
36
|
-
}
|
|
37
|
-
if (claimType === 'feedback_policy_scaling' && /sampling|best-of|vote|pass@/i.test(claim) && !sampling) {
|
|
38
|
-
issues.push('missing_sampling_budget_evidence');
|
|
39
|
-
}
|
|
40
|
-
if (claimType === 'pretraining_scaling' && evidence.length === 0) {
|
|
41
|
-
issues.push('missing_model_scaling_evidence');
|
|
42
|
-
}
|
|
43
|
-
if (/guarantee|always|never|100%|proves?/i.test(claim) && !production) {
|
|
44
|
-
issues.push('absolute_claim_without_production_evidence');
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
return {
|
|
48
|
-
claimType,
|
|
49
|
-
decision: issues.length === 0 ? 'allow' : 'warn',
|
|
50
|
-
issues,
|
|
51
|
-
requiredEvidence: claimType === 'feedback_policy_scaling'
|
|
52
|
-
? ['held-out eval', 'ablation or backtest', 'RL/sampling compute budget when claimed', 'decision-journal production sample']
|
|
53
|
-
: ['source data', 'validation metric', 'scope limits'],
|
|
54
|
-
};
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
module.exports = {
|
|
58
|
-
classifyScalingClaim,
|
|
59
|
-
evaluateScalingClaim,
|
|
60
|
-
};
|
|
@@ -1,73 +0,0 @@
|
|
|
1
|
-
'use strict';
|
|
2
|
-
|
|
3
|
-
function buildStudentConsistentTrainingPlan(options = {}) {
|
|
4
|
-
const student = options.student || 'target-agent-policy';
|
|
5
|
-
const teacher = options.teacher || 'frontier-reviewer';
|
|
6
|
-
const dataset = options.dataset || 'thumbgate-feedback-lessons';
|
|
7
|
-
const holdout = options.holdout || 'feedback-gate-holdout';
|
|
8
|
-
|
|
9
|
-
return {
|
|
10
|
-
method: 'student_consistent_synthetic_sft',
|
|
11
|
-
dataset,
|
|
12
|
-
teacher,
|
|
13
|
-
student,
|
|
14
|
-
generationContract: {
|
|
15
|
-
teacherRole: 'adds capability tokens: corrected decision, missing evidence, safer action',
|
|
16
|
-
studentRole: 'preserves target agent style: terse format, tool discipline, gate vocabulary',
|
|
17
|
-
rejectIf: [
|
|
18
|
-
'teacher rewrites the answer into unsupported style',
|
|
19
|
-
'lesson cannot be traced to source feedback',
|
|
20
|
-
'sample contains secrets or private customer context',
|
|
21
|
-
'sample teaches a shortcut that bypasses evidence gates',
|
|
22
|
-
],
|
|
23
|
-
},
|
|
24
|
-
requiredArtifacts: [
|
|
25
|
-
'source feedback id',
|
|
26
|
-
'student baseline response',
|
|
27
|
-
'teacher correction',
|
|
28
|
-
'student-consistent final sample',
|
|
29
|
-
'redaction report',
|
|
30
|
-
'holdout eval result',
|
|
31
|
-
],
|
|
32
|
-
evals: {
|
|
33
|
-
holdout,
|
|
34
|
-
compareAgainst: ['raw_teacher_sft', 'self_distill_only', 'no_training_baseline'],
|
|
35
|
-
metrics: ['gate_precision', 'gate_recall', 'unsupported_claim_rate', 'style_drift_rate'],
|
|
36
|
-
},
|
|
37
|
-
};
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
function evaluateStudentConsistentTrainingSample(sample = {}) {
|
|
41
|
-
const issues = [];
|
|
42
|
-
|
|
43
|
-
if (!sample.sourceFeedbackId) issues.push('missing_source_feedback_id');
|
|
44
|
-
if (!sample.studentBaseline) issues.push('missing_student_baseline');
|
|
45
|
-
if (!sample.teacherCorrection) issues.push('missing_teacher_correction');
|
|
46
|
-
if (!sample.finalSample) issues.push('missing_final_sample');
|
|
47
|
-
if (!sample.redacted) issues.push('redaction_required');
|
|
48
|
-
if (!sample.holdoutEval) issues.push('holdout_eval_required');
|
|
49
|
-
|
|
50
|
-
const text = [
|
|
51
|
-
sample.studentBaseline,
|
|
52
|
-
sample.teacherCorrection,
|
|
53
|
-
sample.finalSample,
|
|
54
|
-
].filter(Boolean).join('\n');
|
|
55
|
-
if (/(api[_-]?key|secret|token|password)\s*[:=]/i.test(text)) {
|
|
56
|
-
issues.push('secret_like_content');
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
if (sample.styleDriftRate !== undefined && Number(sample.styleDriftRate) > 0.15) {
|
|
60
|
-
issues.push('style_drift_too_high');
|
|
61
|
-
}
|
|
62
|
-
|
|
63
|
-
return {
|
|
64
|
-
decision: issues.length ? 'warn' : 'allow',
|
|
65
|
-
issues,
|
|
66
|
-
onPolicy: !issues.includes('style_drift_too_high') && Boolean(sample.studentBaseline),
|
|
67
|
-
};
|
|
68
|
-
}
|
|
69
|
-
|
|
70
|
-
module.exports = {
|
|
71
|
-
buildStudentConsistentTrainingPlan,
|
|
72
|
-
evaluateStudentConsistentTrainingSample,
|
|
73
|
-
};
|