thumbgate 1.15.0 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/.claude-plugin/marketplace.json +6 -6
  2. package/.claude-plugin/plugin.json +3 -3
  3. package/.well-known/llms.txt +5 -5
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +59 -35
  6. package/adapters/chatgpt/openapi.yaml +118 -2
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/mcp/server-stdio.js +210 -84
  9. package/adapters/opencode/opencode.json +1 -1
  10. package/bench/prompt-eval-suite.json +5 -1
  11. package/bin/cli.js +157 -8
  12. package/config/evals/agent-safety-eval.json +338 -22
  13. package/config/gates/routine.json +43 -0
  14. package/config/github-about.json +3 -3
  15. package/config/model-candidates.json +131 -0
  16. package/openapi/openapi.yaml +118 -2
  17. package/package.json +55 -48
  18. package/public/blog.html +7 -7
  19. package/public/codex-plugin.html +6 -6
  20. package/public/compare.html +29 -23
  21. package/public/dashboard.html +82 -10
  22. package/public/guide.html +28 -28
  23. package/public/index.html +216 -98
  24. package/public/learn.html +50 -22
  25. package/public/lessons.html +1 -1
  26. package/public/numbers.html +17 -17
  27. package/public/pro.html +82 -18
  28. package/scripts/agent-audit-trace.js +55 -0
  29. package/scripts/agent-memory-lifecycle.js +96 -0
  30. package/scripts/agent-readiness-plan.js +118 -0
  31. package/scripts/agentic-data-pipeline.js +21 -1
  32. package/scripts/agents-sdk-sandbox-plan.js +57 -0
  33. package/scripts/ai-org-governance.js +98 -0
  34. package/scripts/ai-search-distribution.js +43 -0
  35. package/scripts/artifact-agent-plan.js +81 -0
  36. package/scripts/billing.js +27 -8
  37. package/scripts/cli-schema.js +18 -2
  38. package/scripts/code-mode-mcp-plan.js +71 -0
  39. package/scripts/context-engine.js +1 -2
  40. package/scripts/context-manager.js +4 -1
  41. package/scripts/dashboard-render-spec.js +1 -1
  42. package/scripts/dashboard.js +275 -9
  43. package/scripts/decision-journal.js +13 -3
  44. package/scripts/document-workflow-governance.js +62 -0
  45. package/scripts/enterprise-agent-rollout.js +34 -0
  46. package/scripts/experience-replay-governance.js +69 -0
  47. package/scripts/export-hf-dataset.js +1 -1
  48. package/scripts/feedback-loop.js +92 -4
  49. package/scripts/feedback-to-rules.js +17 -23
  50. package/scripts/gates-engine.js +4 -6
  51. package/scripts/growth-campaigns.js +49 -0
  52. package/scripts/harness-selector.js +16 -4
  53. package/scripts/hybrid-supervisor-agent.js +64 -0
  54. package/scripts/inference-cache-policy.js +72 -0
  55. package/scripts/inference-economics.js +53 -0
  56. package/scripts/internal-agent-bootstrap.js +12 -2
  57. package/scripts/knowledge-layer-plan.js +108 -0
  58. package/scripts/lesson-inference.js +183 -44
  59. package/scripts/lesson-search.js +4 -1
  60. package/scripts/llm-client.js +157 -26
  61. package/scripts/mailer/resend-mailer.js +112 -1
  62. package/scripts/mcp-transport-strategy.js +66 -0
  63. package/scripts/memory-store-governance.js +60 -0
  64. package/scripts/meta-agent-loop.js +7 -13
  65. package/scripts/model-access-eligibility.js +38 -0
  66. package/scripts/model-migration-readiness.js +55 -0
  67. package/scripts/operational-integrity.js +96 -3
  68. package/scripts/otel-declarative-config.js +56 -0
  69. package/scripts/perplexity-client.js +1 -1
  70. package/scripts/post-training-governance.js +34 -0
  71. package/scripts/private-core-boundary.js +72 -0
  72. package/scripts/production-agent-readiness.js +40 -0
  73. package/scripts/prompt-eval.js +564 -32
  74. package/scripts/prompt-programs.js +93 -0
  75. package/scripts/provider-action-normalizer.js +585 -0
  76. package/scripts/scaling-law-claims.js +60 -0
  77. package/scripts/security-scanner.js +1 -1
  78. package/scripts/self-distill-agent.js +7 -32
  79. package/scripts/seo-gsd.js +232 -55
  80. package/scripts/skill-rag-router.js +53 -0
  81. package/scripts/spec-gate.js +1 -1
  82. package/scripts/student-consistent-training.js +73 -0
  83. package/scripts/synthetic-data-provenance.js +98 -0
  84. package/scripts/task-context-result.js +81 -0
  85. package/scripts/telemetry-analytics.js +149 -0
  86. package/scripts/thompson-sampling.js +2 -2
  87. package/scripts/token-savings.js +7 -6
  88. package/scripts/token-tco.js +46 -0
  89. package/scripts/tool-registry.js +63 -3
  90. package/scripts/verification-loop.js +10 -1
  91. package/scripts/verifier-scoring.js +71 -0
  92. package/scripts/workflow-sentinel.js +284 -28
  93. package/scripts/workspace-agent-routines.js +118 -0
  94. package/src/api/server.js +381 -120
  95. package/scripts/analytics-report.js +0 -328
  96. package/scripts/autonomous-workflow.js +0 -377
  97. package/scripts/billing-setup.js +0 -109
  98. package/scripts/creator-campaigns.js +0 -239
  99. package/scripts/cross-encoder-reranker.js +0 -235
  100. package/scripts/daemon-manager.js +0 -108
  101. package/scripts/decision-trace.js +0 -354
  102. package/scripts/delegation-runtime.js +0 -896
  103. package/scripts/dispatch-brief.js +0 -159
  104. package/scripts/distribution-surfaces.js +0 -110
  105. package/scripts/feedback-history-distiller.js +0 -382
  106. package/scripts/funnel-analytics.js +0 -35
  107. package/scripts/history-distiller.js +0 -200
  108. package/scripts/hosted-job-launcher.js +0 -256
  109. package/scripts/intent-router.js +0 -392
  110. package/scripts/lesson-reranker.js +0 -263
  111. package/scripts/lesson-retrieval.js +0 -148
  112. package/scripts/managed-lesson-agent.js +0 -183
  113. package/scripts/operational-dashboard.js +0 -103
  114. package/scripts/operational-summary.js +0 -129
  115. package/scripts/operator-artifacts.js +0 -608
  116. package/scripts/optimize-context.js +0 -17
  117. package/scripts/org-dashboard.js +0 -206
  118. package/scripts/partner-orchestration.js +0 -146
  119. package/scripts/predictive-insights.js +0 -356
  120. package/scripts/pulse.js +0 -80
  121. package/scripts/reflector-agent.js +0 -221
  122. package/scripts/sales-pipeline.js +0 -681
  123. package/scripts/session-episode-store.js +0 -329
  124. package/scripts/session-health-sensor.js +0 -242
  125. package/scripts/session-report.js +0 -120
  126. package/scripts/swarm-coordinator.js +0 -81
  127. package/scripts/tool-kpi-tracker.js +0 -12
  128. package/scripts/webhook-delivery.js +0 -62
  129. package/scripts/workflow-sprint-intake.js +0 -475
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ function normalizeText(value) {
5
+ if (value === undefined || value === null) return '';
6
+ return String(value).trim();
7
+ }
8
+
9
+ function classifyScalingClaim(claim) {
10
+ const text = normalizeText(claim).toLowerCase();
11
+ if (/\b(pretrain|pretraining|parameters|training tokens|flops|cross entropy|test loss)\b/.test(text)) {
12
+ return 'pretraining_scaling';
13
+ }
14
+ if (/\b(rl|reinforcement|feedback|dpo|kto|reward|policy|thumbs[-\s]?(up|down)|gate|prevention rule)\b/.test(text)) {
15
+ return 'feedback_policy_scaling';
16
+ }
17
+ return 'general_scaling';
18
+ }
19
+
20
+ function evaluateScalingClaim(input = {}) {
21
+ const claim = normalizeText(input.claim);
22
+ const claimType = classifyScalingClaim(claim);
23
+ const evidence = Array.isArray(input.evidence) ? input.evidence.filter(Boolean) : [];
24
+ const heldout = evidence.some((entry) => /held[-\s]?out|validation|eval|ablation|backtest/i.test(String(entry)));
25
+ const production = evidence.some((entry) => /production|real user|workflow run|decision journal|blocked action/i.test(String(entry)));
26
+ const rlCompute = evidence.some((entry) => /sampling compute|rollout|trajectory|policy update|reward model|rl compute/i.test(String(entry)));
27
+ const sampling = evidence.some((entry) => /pass@|best-of-n|majority vote|sample budget|sampling/i.test(String(entry)));
28
+ const issues = [];
29
+
30
+ if (!claim) issues.push('missing_claim');
31
+ if (claimType === 'feedback_policy_scaling' && !heldout) {
32
+ issues.push('missing_heldout_feedback_eval');
33
+ }
34
+ if (claimType === 'feedback_policy_scaling' && /rl|reinforcement|sampling/i.test(claim) && !rlCompute) {
35
+ issues.push('missing_rl_compute_evidence');
36
+ }
37
+ if (claimType === 'feedback_policy_scaling' && /sampling|best-of|vote|pass@/i.test(claim) && !sampling) {
38
+ issues.push('missing_sampling_budget_evidence');
39
+ }
40
+ if (claimType === 'pretraining_scaling' && evidence.length === 0) {
41
+ issues.push('missing_model_scaling_evidence');
42
+ }
43
+ if (/guarantee|always|never|100%|proves?/i.test(claim) && !production) {
44
+ issues.push('absolute_claim_without_production_evidence');
45
+ }
46
+
47
+ return {
48
+ claimType,
49
+ decision: issues.length === 0 ? 'allow' : 'warn',
50
+ issues,
51
+ requiredEvidence: claimType === 'feedback_policy_scaling'
52
+ ? ['held-out eval', 'ablation or backtest', 'RL/sampling compute budget when claimed', 'decision-journal production sample']
53
+ : ['source data', 'validation metric', 'scope limits'],
54
+ };
55
+ }
56
+
57
+ module.exports = {
58
+ classifyScalingClaim,
59
+ evaluateScalingClaim,
60
+ };
@@ -2,7 +2,7 @@
2
2
  'use strict';
3
3
 
4
4
  /**
5
- * Security Scanner — OWASP-aware static analysis for PreToolUse gates.
5
+ * Security Scanner — OWASP-aware static analysis for PreToolUse checks.
6
6
  *
7
7
  * Scans code being written/edited by AI agents for common vulnerability
8
8
  * patterns (injection, XSS, path traversal, etc.) and suspicious dependency
@@ -349,39 +349,14 @@ Return JSON only, no markdown fences:
349
349
  Focus on actionable, specific lessons. Ignore trivial interactions.`;
350
350
 
351
351
  async function callAnthropicApi(conversationText, model) {
352
- const apiKey = process.env.ANTHROPIC_API_KEY;
353
- if (!apiKey) return null;
354
-
355
- const body = JSON.stringify({
356
- model: model || 'claude-sonnet-4-20250514',
357
- max_tokens: 2048,
358
- system: LLM_SYSTEM_PROMPT,
359
- messages: [
360
- { role: 'user', content: `Analyze this conversation window and extract lessons:\n\n${conversationText}` },
361
- ],
352
+ const { callClaudeJson, MODELS } = require('./llm-client');
353
+ return callClaudeJson({
354
+ model: model || MODELS.SMART,
355
+ maxTokens: 2048,
356
+ systemPrompt: LLM_SYSTEM_PROMPT,
357
+ userPrompt: `Analyze this conversation window and extract lessons:\n\n${conversationText}`,
358
+ cache: true,
362
359
  });
363
-
364
- try {
365
- const resp = await fetch('https://api.anthropic.com/v1/messages', {
366
- method: 'POST',
367
- headers: {
368
- 'Content-Type': 'application/json',
369
- 'x-api-key': apiKey,
370
- 'anthropic-version': '2023-06-01',
371
- },
372
- body,
373
- });
374
-
375
- if (!resp.ok) return null;
376
-
377
- const data = await resp.json();
378
- const text = (data.content && data.content[0] && data.content[0].text) || '';
379
- // Strip markdown fences if present
380
- const cleaned = text.replace(/^```(?:json)?\s*/m, '').replace(/```\s*$/m, '').trim();
381
- return JSON.parse(cleaned);
382
- } catch {
383
- return null;
384
- }
385
360
  }
386
361
 
387
362
  async function generateLlmLessons(conversationWindow, model) {