thumbgate 1.15.0 → 1.16.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. package/.claude-plugin/marketplace.json +6 -6
  2. package/.claude-plugin/plugin.json +3 -3
  3. package/.well-known/llms.txt +5 -5
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +59 -35
  6. package/adapters/chatgpt/openapi.yaml +118 -2
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/mcp/server-stdio.js +210 -84
  9. package/adapters/opencode/opencode.json +1 -1
  10. package/bench/prompt-eval-suite.json +5 -1
  11. package/bin/cli.js +157 -8
  12. package/config/evals/agent-safety-eval.json +338 -22
  13. package/config/gates/routine.json +43 -0
  14. package/config/github-about.json +3 -3
  15. package/config/model-candidates.json +131 -0
  16. package/openapi/openapi.yaml +118 -2
  17. package/package.json +57 -49
  18. package/public/blog.html +7 -7
  19. package/public/codex-plugin.html +6 -6
  20. package/public/compare.html +29 -23
  21. package/public/dashboard.html +82 -10
  22. package/public/guide.html +28 -28
  23. package/public/index.html +216 -98
  24. package/public/learn.html +50 -22
  25. package/public/lessons.html +1 -1
  26. package/public/numbers.html +17 -17
  27. package/public/pro.html +82 -18
  28. package/scripts/agent-audit-trace.js +55 -0
  29. package/scripts/agent-memory-lifecycle.js +96 -0
  30. package/scripts/agent-readiness-plan.js +118 -0
  31. package/scripts/agentic-data-pipeline.js +21 -1
  32. package/scripts/agents-sdk-sandbox-plan.js +57 -0
  33. package/scripts/ai-org-governance.js +98 -0
  34. package/scripts/ai-search-distribution.js +43 -0
  35. package/scripts/artifact-agent-plan.js +81 -0
  36. package/scripts/billing.js +27 -8
  37. package/scripts/cli-schema.js +18 -2
  38. package/scripts/code-mode-mcp-plan.js +71 -0
  39. package/scripts/context-engine.js +1 -2
  40. package/scripts/context-manager.js +4 -1
  41. package/scripts/dashboard-render-spec.js +1 -1
  42. package/scripts/dashboard.js +275 -9
  43. package/scripts/decision-journal.js +13 -3
  44. package/scripts/document-workflow-governance.js +62 -0
  45. package/scripts/enterprise-agent-rollout.js +34 -0
  46. package/scripts/experience-replay-governance.js +69 -0
  47. package/scripts/export-hf-dataset.js +1 -1
  48. package/scripts/feedback-loop.js +92 -4
  49. package/scripts/feedback-to-rules.js +17 -23
  50. package/scripts/gates-engine.js +4 -6
  51. package/scripts/growth-campaigns.js +49 -0
  52. package/scripts/harness-selector.js +16 -4
  53. package/scripts/hybrid-supervisor-agent.js +64 -0
  54. package/scripts/inference-cache-policy.js +72 -0
  55. package/scripts/inference-economics.js +53 -0
  56. package/scripts/internal-agent-bootstrap.js +12 -2
  57. package/scripts/knowledge-layer-plan.js +108 -0
  58. package/scripts/lesson-inference.js +183 -44
  59. package/scripts/lesson-search.js +4 -1
  60. package/scripts/llm-client.js +157 -26
  61. package/scripts/mailer/resend-mailer.js +112 -1
  62. package/scripts/mcp-transport-strategy.js +66 -0
  63. package/scripts/memory-store-governance.js +60 -0
  64. package/scripts/meta-agent-loop.js +7 -13
  65. package/scripts/model-access-eligibility.js +38 -0
  66. package/scripts/model-migration-readiness.js +55 -0
  67. package/scripts/operational-integrity.js +96 -3
  68. package/scripts/otel-declarative-config.js +56 -0
  69. package/scripts/perplexity-client.js +1 -1
  70. package/scripts/post-training-governance.js +34 -0
  71. package/scripts/private-core-boundary.js +72 -0
  72. package/scripts/production-agent-readiness.js +40 -0
  73. package/scripts/prompt-eval.js +564 -32
  74. package/scripts/prompt-programs.js +93 -0
  75. package/scripts/provider-action-normalizer.js +585 -0
  76. package/scripts/scaling-law-claims.js +60 -0
  77. package/scripts/security-scanner.js +1 -1
  78. package/scripts/self-distill-agent.js +7 -32
  79. package/scripts/seo-gsd.js +232 -55
  80. package/scripts/skill-rag-router.js +53 -0
  81. package/scripts/spec-gate.js +1 -1
  82. package/scripts/student-consistent-training.js +73 -0
  83. package/scripts/synthetic-data-provenance.js +98 -0
  84. package/scripts/task-context-result.js +81 -0
  85. package/scripts/telemetry-analytics.js +149 -0
  86. package/scripts/thompson-sampling.js +2 -2
  87. package/scripts/token-savings.js +7 -6
  88. package/scripts/token-tco.js +46 -0
  89. package/scripts/tool-registry.js +63 -3
  90. package/scripts/verification-loop.js +10 -1
  91. package/scripts/verifier-scoring.js +71 -0
  92. package/scripts/workflow-sentinel.js +284 -28
  93. package/scripts/workspace-agent-routines.js +118 -0
  94. package/src/api/server.js +381 -120
  95. package/scripts/analytics-report.js +0 -328
  96. package/scripts/autonomous-workflow.js +0 -377
  97. package/scripts/billing-setup.js +0 -109
  98. package/scripts/creator-campaigns.js +0 -239
  99. package/scripts/cross-encoder-reranker.js +0 -235
  100. package/scripts/daemon-manager.js +0 -108
  101. package/scripts/decision-trace.js +0 -354
  102. package/scripts/delegation-runtime.js +0 -896
  103. package/scripts/dispatch-brief.js +0 -159
  104. package/scripts/distribution-surfaces.js +0 -110
  105. package/scripts/feedback-history-distiller.js +0 -382
  106. package/scripts/funnel-analytics.js +0 -35
  107. package/scripts/history-distiller.js +0 -200
  108. package/scripts/hosted-job-launcher.js +0 -256
  109. package/scripts/intent-router.js +0 -392
  110. package/scripts/lesson-reranker.js +0 -263
  111. package/scripts/lesson-retrieval.js +0 -148
  112. package/scripts/managed-lesson-agent.js +0 -183
  113. package/scripts/operational-dashboard.js +0 -103
  114. package/scripts/operational-summary.js +0 -129
  115. package/scripts/operator-artifacts.js +0 -608
  116. package/scripts/optimize-context.js +0 -17
  117. package/scripts/org-dashboard.js +0 -206
  118. package/scripts/partner-orchestration.js +0 -146
  119. package/scripts/predictive-insights.js +0 -356
  120. package/scripts/pulse.js +0 -80
  121. package/scripts/reflector-agent.js +0 -221
  122. package/scripts/sales-pipeline.js +0 -681
  123. package/scripts/session-episode-store.js +0 -329
  124. package/scripts/session-health-sensor.js +0 -242
  125. package/scripts/session-report.js +0 -120
  126. package/scripts/swarm-coordinator.js +0 -81
  127. package/scripts/tool-kpi-tracker.js +0 -12
  128. package/scripts/webhook-delivery.js +0 -62
  129. package/scripts/workflow-sprint-intake.js +0 -475
@@ -387,7 +387,7 @@ const TOOLS = [
387
387
  }),
388
388
  readOnlyTool({
389
389
  name: 'enforcement_matrix',
390
- description: 'Show the full Enforcement Matrix: feedback pipeline stats, active pre-action gates, and rejection ledger with revival conditions.',
390
+ description: 'Show the full Enforcement Matrix: feedback pipeline stats, active pre-action checks, and rejection ledger with revival conditions.',
391
391
  inputSchema: {
392
392
  type: 'object',
393
393
  properties: {},
@@ -773,9 +773,69 @@ const TOOLS = [
773
773
  description: 'Predict pre-action workflow risk, blast radius, and remediations before a tool call executes.',
774
774
  inputSchema: {
775
775
  type: 'object',
776
- required: ['toolName'],
777
776
  properties: {
778
- toolName: { type: 'string', description: 'Tool being assessed, such as Bash, Edit, or Write' },
777
+ toolName: { type: 'string', description: 'Tool being assessed, such as Bash, Edit, or Write. Optional when provider-native tool call payload is supplied.' },
778
+ provider: { type: 'string', description: 'Optional provider name, such as anthropic, openai, codex, cursor, gemini, or mcp' },
779
+ model: { type: 'string', description: 'Optional model name used for audit evidence and budget review' },
780
+ providerToolCall: {
781
+ type: 'object',
782
+ additionalProperties: true,
783
+ description: 'Provider-native tool call object, including Anthropic tool_use or OpenAI function/tool call shapes',
784
+ },
785
+ content: {
786
+ type: 'array',
787
+ items: { type: 'object', additionalProperties: true },
788
+ description: 'Provider-native message content blocks; Anthropic tool_use blocks are normalized automatically',
789
+ },
790
+ method: { type: 'string', description: 'Optional JSON-RPC/MCP method, such as tools/call' },
791
+ params: {
792
+ type: 'object',
793
+ additionalProperties: true,
794
+ description: 'Optional JSON-RPC/MCP params, including tools/call name and arguments, resources/read URI, or prompts/get template arguments',
795
+ },
796
+ usage: {
797
+ type: 'object',
798
+ additionalProperties: true,
799
+ description: 'Provider token/cost usage, such as input_tokens, output_tokens, or total_tokens',
800
+ },
801
+ tokenEstimate: { type: 'number', description: 'Estimated total tokens for this action when provider usage is unavailable' },
802
+ costUsd: { type: 'number', description: 'Estimated USD cost for this action when provider usage is unavailable' },
803
+ budget: {
804
+ type: 'object',
805
+ additionalProperties: true,
806
+ description: 'Optional per-action budget controls: maxTokensPerAction, remainingTokens, maxCostUsdPerAction, remainingCostUsd, maxParallelBranches',
807
+ },
808
+ workflowPattern: {
809
+ type: 'string',
810
+ enum: ['single_action', 'chaining', 'routing', 'parallelization', 'evaluator-optimizer', 'agent'],
811
+ description: 'Optional workflow architecture hint. Agents require inspection evidence; predefined workflows are easier to evaluate.',
812
+ },
813
+ workflow: {
814
+ type: 'object',
815
+ additionalProperties: true,
816
+ description: 'Optional workflow metadata: pattern, steps, routes, branches, tools, inspection, and verification evidence.',
817
+ },
818
+ goal: { type: 'string', description: 'Optional agent goal for open-ended tool planning.' },
819
+ tools: {
820
+ type: 'array',
821
+ items: { type: 'string' },
822
+ description: 'Optional abstract/combinable tool names available to an open-ended agent.',
823
+ },
824
+ branches: {
825
+ type: 'array',
826
+ items: { type: 'string' },
827
+ description: 'Optional parallel workflow branches for fan-out budget and review checks.',
828
+ },
829
+ steps: {
830
+ type: 'array',
831
+ items: { type: 'string' },
832
+ description: 'Optional predefined workflow steps for chaining/evaluator workflow audit evidence.',
833
+ },
834
+ routes: {
835
+ type: 'array',
836
+ items: { type: 'string' },
837
+ description: 'Optional routing workflow destinations or classes.',
838
+ },
779
839
  command: { type: 'string', description: 'Optional shell command when toolName is Bash' },
780
840
  filePath: { type: 'string', description: 'Optional primary file path for edit-like tools' },
781
841
  changedFiles: {
@@ -6,11 +6,20 @@ const {
6
6
  getFeedbackPaths,
7
7
  appendDiagnosticRecord,
8
8
  } = require('./feedback-loop');
9
+ const { loadOptionalModule } = require('./private-core-boundary');
9
10
  const {
10
11
  buildPartnerStrategy,
11
12
  computePartnerReward,
12
13
  resolveVerificationRetries,
13
- } = require('./partner-orchestration');
14
+ } = loadOptionalModule('./partner-orchestration', () => ({
15
+ buildPartnerStrategy: ({ partnerProfile } = {}) => ({
16
+ profile: partnerProfile || 'public-shell',
17
+ verificationMode: 'local-only',
18
+ recommendedChecks: [],
19
+ }),
20
+ computePartnerReward: () => 0,
21
+ resolveVerificationRetries: (requestedMaxRetries) => requestedMaxRetries,
22
+ }));
14
23
  const {
15
24
  diagnoseFailure,
16
25
  } = require('./failure-diagnostics');
@@ -0,0 +1,71 @@
1
+ 'use strict';
2
+
3
+ function buildVerifierScoringRubric(options = {}) {
4
+ const criteria = options.criteria || [
5
+ 'evidence_cited',
6
+ 'scope_respected',
7
+ 'tests_or_proof_run',
8
+ 'claim_matches_artifacts',
9
+ ];
10
+ const granularity = Number.isFinite(options.granularity) ? options.granularity : 100;
11
+ const repeats = Number.isFinite(options.repeats) ? options.repeats : 3;
12
+
13
+ return {
14
+ rubricId: 'granular_llm_verifier',
15
+ criteria,
16
+ granularity,
17
+ repeats,
18
+ scoring: 'probability_weighted_average',
19
+ passThreshold: Number.isFinite(options.passThreshold) ? options.passThreshold : 0.82,
20
+ caveats: [
21
+ 'calibrate against held-out human labels before production blocking',
22
+ 'fall back to coarse scores when model cannot expose score-token probabilities',
23
+ 'never use verifier score alone for destructive actions',
24
+ ],
25
+ };
26
+ }
27
+
28
+ function computeVerifierScore({ scores = [], rubric = buildVerifierScoringRubric() } = {}) {
29
+ const flattened = scores
30
+ .flatMap((criterion) => Array.isArray(criterion.repeats) ? criterion.repeats : [])
31
+ .filter((value) => Number.isFinite(value));
32
+
33
+ if (flattened.length === 0) {
34
+ return {
35
+ score: 0,
36
+ decision: 'warn',
37
+ issues: ['missing_verifier_scores'],
38
+ };
39
+ }
40
+
41
+ const normalized = flattened.map((value) => value > 1 ? value / rubric.granularity : value);
42
+ const score = normalized.reduce((sum, value) => sum + value, 0) / normalized.length;
43
+ const issues = [];
44
+ if (scores.length < rubric.criteria.length) issues.push('missing_criteria_scores');
45
+ if (flattened.length < rubric.criteria.length * rubric.repeats) issues.push('missing_repeat_verifications');
46
+
47
+ return {
48
+ score: Number(score.toFixed(4)),
49
+ decision: score >= rubric.passThreshold && issues.length === 0 ? 'allow' : 'warn',
50
+ issues,
51
+ };
52
+ }
53
+
54
+ function evaluateVerifierSetup(setup = {}) {
55
+ const issues = [];
56
+ if (!setup.criteria || setup.criteria.length < 3) issues.push('too_few_criteria');
57
+ if (!setup.repeats || setup.repeats < 2) issues.push('repeat_verification_required');
58
+ if (!setup.heldoutCalibration) issues.push('heldout_calibration_required');
59
+ if (setup.destructiveAction && !setup.humanReview) issues.push('human_review_required_for_destructive_action');
60
+
61
+ return {
62
+ decision: issues.length ? 'warn' : 'allow',
63
+ issues,
64
+ };
65
+ }
66
+
67
+ module.exports = {
68
+ buildVerifierScoringRubric,
69
+ computeVerifierScore,
70
+ evaluateVerifierSetup,
71
+ };