thumbgate 1.15.0 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +6 -6
- package/.claude-plugin/plugin.json +3 -3
- package/.well-known/llms.txt +5 -5
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +59 -35
- package/adapters/chatgpt/openapi.yaml +118 -2
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +210 -84
- package/adapters/opencode/opencode.json +1 -1
- package/bench/prompt-eval-suite.json +5 -1
- package/bin/cli.js +157 -8
- package/config/evals/agent-safety-eval.json +338 -22
- package/config/gates/routine.json +43 -0
- package/config/github-about.json +3 -3
- package/config/model-candidates.json +131 -0
- package/openapi/openapi.yaml +118 -2
- package/package.json +55 -48
- package/public/blog.html +7 -7
- package/public/codex-plugin.html +6 -6
- package/public/compare.html +29 -23
- package/public/dashboard.html +82 -10
- package/public/guide.html +28 -28
- package/public/index.html +216 -98
- package/public/learn.html +50 -22
- package/public/lessons.html +1 -1
- package/public/numbers.html +17 -17
- package/public/pro.html +82 -18
- package/scripts/agent-audit-trace.js +55 -0
- package/scripts/agent-memory-lifecycle.js +96 -0
- package/scripts/agent-readiness-plan.js +118 -0
- package/scripts/agentic-data-pipeline.js +21 -1
- package/scripts/agents-sdk-sandbox-plan.js +57 -0
- package/scripts/ai-org-governance.js +98 -0
- package/scripts/ai-search-distribution.js +43 -0
- package/scripts/artifact-agent-plan.js +81 -0
- package/scripts/billing.js +27 -8
- package/scripts/cli-schema.js +18 -2
- package/scripts/code-mode-mcp-plan.js +71 -0
- package/scripts/context-engine.js +1 -2
- package/scripts/context-manager.js +4 -1
- package/scripts/dashboard-render-spec.js +1 -1
- package/scripts/dashboard.js +275 -9
- package/scripts/decision-journal.js +13 -3
- package/scripts/document-workflow-governance.js +62 -0
- package/scripts/enterprise-agent-rollout.js +34 -0
- package/scripts/experience-replay-governance.js +69 -0
- package/scripts/export-hf-dataset.js +1 -1
- package/scripts/feedback-loop.js +92 -4
- package/scripts/feedback-to-rules.js +17 -23
- package/scripts/gates-engine.js +4 -6
- package/scripts/growth-campaigns.js +49 -0
- package/scripts/harness-selector.js +16 -4
- package/scripts/hybrid-supervisor-agent.js +64 -0
- package/scripts/inference-cache-policy.js +72 -0
- package/scripts/inference-economics.js +53 -0
- package/scripts/internal-agent-bootstrap.js +12 -2
- package/scripts/knowledge-layer-plan.js +108 -0
- package/scripts/lesson-inference.js +183 -44
- package/scripts/lesson-search.js +4 -1
- package/scripts/llm-client.js +157 -26
- package/scripts/mailer/resend-mailer.js +112 -1
- package/scripts/mcp-transport-strategy.js +66 -0
- package/scripts/memory-store-governance.js +60 -0
- package/scripts/meta-agent-loop.js +7 -13
- package/scripts/model-access-eligibility.js +38 -0
- package/scripts/model-migration-readiness.js +55 -0
- package/scripts/operational-integrity.js +96 -3
- package/scripts/otel-declarative-config.js +56 -0
- package/scripts/perplexity-client.js +1 -1
- package/scripts/post-training-governance.js +34 -0
- package/scripts/private-core-boundary.js +72 -0
- package/scripts/production-agent-readiness.js +40 -0
- package/scripts/prompt-eval.js +564 -32
- package/scripts/prompt-programs.js +93 -0
- package/scripts/provider-action-normalizer.js +585 -0
- package/scripts/scaling-law-claims.js +60 -0
- package/scripts/security-scanner.js +1 -1
- package/scripts/self-distill-agent.js +7 -32
- package/scripts/seo-gsd.js +232 -55
- package/scripts/skill-rag-router.js +53 -0
- package/scripts/spec-gate.js +1 -1
- package/scripts/student-consistent-training.js +73 -0
- package/scripts/synthetic-data-provenance.js +98 -0
- package/scripts/task-context-result.js +81 -0
- package/scripts/telemetry-analytics.js +149 -0
- package/scripts/thompson-sampling.js +2 -2
- package/scripts/token-savings.js +7 -6
- package/scripts/token-tco.js +46 -0
- package/scripts/tool-registry.js +63 -3
- package/scripts/verification-loop.js +10 -1
- package/scripts/verifier-scoring.js +71 -0
- package/scripts/workflow-sentinel.js +284 -28
- package/scripts/workspace-agent-routines.js +118 -0
- package/src/api/server.js +381 -120
- package/scripts/analytics-report.js +0 -328
- package/scripts/autonomous-workflow.js +0 -377
- package/scripts/billing-setup.js +0 -109
- package/scripts/creator-campaigns.js +0 -239
- package/scripts/cross-encoder-reranker.js +0 -235
- package/scripts/daemon-manager.js +0 -108
- package/scripts/decision-trace.js +0 -354
- package/scripts/delegation-runtime.js +0 -896
- package/scripts/dispatch-brief.js +0 -159
- package/scripts/distribution-surfaces.js +0 -110
- package/scripts/feedback-history-distiller.js +0 -382
- package/scripts/funnel-analytics.js +0 -35
- package/scripts/history-distiller.js +0 -200
- package/scripts/hosted-job-launcher.js +0 -256
- package/scripts/intent-router.js +0 -392
- package/scripts/lesson-reranker.js +0 -263
- package/scripts/lesson-retrieval.js +0 -148
- package/scripts/managed-lesson-agent.js +0 -183
- package/scripts/operational-dashboard.js +0 -103
- package/scripts/operational-summary.js +0 -129
- package/scripts/operator-artifacts.js +0 -608
- package/scripts/optimize-context.js +0 -17
- package/scripts/org-dashboard.js +0 -206
- package/scripts/partner-orchestration.js +0 -146
- package/scripts/predictive-insights.js +0 -356
- package/scripts/pulse.js +0 -80
- package/scripts/reflector-agent.js +0 -221
- package/scripts/sales-pipeline.js +0 -681
- package/scripts/session-episode-store.js +0 -329
- package/scripts/session-health-sensor.js +0 -242
- package/scripts/session-report.js +0 -120
- package/scripts/swarm-coordinator.js +0 -81
- package/scripts/tool-kpi-tracker.js +0 -12
- package/scripts/webhook-delivery.js +0 -62
- package/scripts/workflow-sprint-intake.js +0 -475
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
function normalizeText(value) {
|
|
5
|
+
if (value === undefined || value === null) return '';
|
|
6
|
+
return String(value).trim();
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
function classifyScalingClaim(claim) {
|
|
10
|
+
const text = normalizeText(claim).toLowerCase();
|
|
11
|
+
if (/\b(pretrain|pretraining|parameters|training tokens|flops|cross entropy|test loss)\b/.test(text)) {
|
|
12
|
+
return 'pretraining_scaling';
|
|
13
|
+
}
|
|
14
|
+
if (/\b(rl|reinforcement|feedback|dpo|kto|reward|policy|thumbs[-\s]?(up|down)|gate|prevention rule)\b/.test(text)) {
|
|
15
|
+
return 'feedback_policy_scaling';
|
|
16
|
+
}
|
|
17
|
+
return 'general_scaling';
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
function evaluateScalingClaim(input = {}) {
|
|
21
|
+
const claim = normalizeText(input.claim);
|
|
22
|
+
const claimType = classifyScalingClaim(claim);
|
|
23
|
+
const evidence = Array.isArray(input.evidence) ? input.evidence.filter(Boolean) : [];
|
|
24
|
+
const heldout = evidence.some((entry) => /held[-\s]?out|validation|eval|ablation|backtest/i.test(String(entry)));
|
|
25
|
+
const production = evidence.some((entry) => /production|real user|workflow run|decision journal|blocked action/i.test(String(entry)));
|
|
26
|
+
const rlCompute = evidence.some((entry) => /sampling compute|rollout|trajectory|policy update|reward model|rl compute/i.test(String(entry)));
|
|
27
|
+
const sampling = evidence.some((entry) => /pass@|best-of-n|majority vote|sample budget|sampling/i.test(String(entry)));
|
|
28
|
+
const issues = [];
|
|
29
|
+
|
|
30
|
+
if (!claim) issues.push('missing_claim');
|
|
31
|
+
if (claimType === 'feedback_policy_scaling' && !heldout) {
|
|
32
|
+
issues.push('missing_heldout_feedback_eval');
|
|
33
|
+
}
|
|
34
|
+
if (claimType === 'feedback_policy_scaling' && /rl|reinforcement|sampling/i.test(claim) && !rlCompute) {
|
|
35
|
+
issues.push('missing_rl_compute_evidence');
|
|
36
|
+
}
|
|
37
|
+
if (claimType === 'feedback_policy_scaling' && /sampling|best-of|vote|pass@/i.test(claim) && !sampling) {
|
|
38
|
+
issues.push('missing_sampling_budget_evidence');
|
|
39
|
+
}
|
|
40
|
+
if (claimType === 'pretraining_scaling' && evidence.length === 0) {
|
|
41
|
+
issues.push('missing_model_scaling_evidence');
|
|
42
|
+
}
|
|
43
|
+
if (/guarantee|always|never|100%|proves?/i.test(claim) && !production) {
|
|
44
|
+
issues.push('absolute_claim_without_production_evidence');
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
claimType,
|
|
49
|
+
decision: issues.length === 0 ? 'allow' : 'warn',
|
|
50
|
+
issues,
|
|
51
|
+
requiredEvidence: claimType === 'feedback_policy_scaling'
|
|
52
|
+
? ['held-out eval', 'ablation or backtest', 'RL/sampling compute budget when claimed', 'decision-journal production sample']
|
|
53
|
+
: ['source data', 'validation metric', 'scope limits'],
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
module.exports = {
|
|
58
|
+
classifyScalingClaim,
|
|
59
|
+
evaluateScalingClaim,
|
|
60
|
+
};
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
'use strict';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
|
-
* Security Scanner — OWASP-aware static analysis for PreToolUse
|
|
5
|
+
* Security Scanner — OWASP-aware static analysis for PreToolUse checks.
|
|
6
6
|
*
|
|
7
7
|
* Scans code being written/edited by AI agents for common vulnerability
|
|
8
8
|
* patterns (injection, XSS, path traversal, etc.) and suspicious dependency
|
|
@@ -349,39 +349,14 @@ Return JSON only, no markdown fences:
|
|
|
349
349
|
Focus on actionable, specific lessons. Ignore trivial interactions.`;
|
|
350
350
|
|
|
351
351
|
async function callAnthropicApi(conversationText, model) {
|
|
352
|
-
const
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
messages: [
|
|
360
|
-
{ role: 'user', content: `Analyze this conversation window and extract lessons:\n\n${conversationText}` },
|
|
361
|
-
],
|
|
352
|
+
const { callClaudeJson, MODELS } = require('./llm-client');
|
|
353
|
+
return callClaudeJson({
|
|
354
|
+
model: model || MODELS.SMART,
|
|
355
|
+
maxTokens: 2048,
|
|
356
|
+
systemPrompt: LLM_SYSTEM_PROMPT,
|
|
357
|
+
userPrompt: `Analyze this conversation window and extract lessons:\n\n${conversationText}`,
|
|
358
|
+
cache: true,
|
|
362
359
|
});
|
|
363
|
-
|
|
364
|
-
try {
|
|
365
|
-
const resp = await fetch('https://api.anthropic.com/v1/messages', {
|
|
366
|
-
method: 'POST',
|
|
367
|
-
headers: {
|
|
368
|
-
'Content-Type': 'application/json',
|
|
369
|
-
'x-api-key': apiKey,
|
|
370
|
-
'anthropic-version': '2023-06-01',
|
|
371
|
-
},
|
|
372
|
-
body,
|
|
373
|
-
});
|
|
374
|
-
|
|
375
|
-
if (!resp.ok) return null;
|
|
376
|
-
|
|
377
|
-
const data = await resp.json();
|
|
378
|
-
const text = (data.content && data.content[0] && data.content[0].text) || '';
|
|
379
|
-
// Strip markdown fences if present
|
|
380
|
-
const cleaned = text.replace(/^```(?:json)?\s*/m, '').replace(/```\s*$/m, '').trim();
|
|
381
|
-
return JSON.parse(cleaned);
|
|
382
|
-
} catch {
|
|
383
|
-
return null;
|
|
384
|
-
}
|
|
385
360
|
}
|
|
386
361
|
|
|
387
362
|
async function generateLlmLessons(conversationWindow, model) {
|