thumbgate 1.14.1 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.claude-plugin/marketplace.json +6 -6
  2. package/.claude-plugin/plugin.json +3 -3
  3. package/.well-known/llms.txt +5 -5
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +60 -35
  6. package/adapters/chatgpt/openapi.yaml +118 -2
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/mcp/server-stdio.js +217 -84
  9. package/adapters/opencode/opencode.json +1 -1
  10. package/bench/prompt-eval-suite.json +5 -1
  11. package/bin/cli.js +211 -8
  12. package/config/enforcement.json +59 -7
  13. package/config/evals/agent-safety-eval.json +338 -22
  14. package/config/gates/default.json +33 -0
  15. package/config/gates/routine.json +43 -0
  16. package/config/github-about.json +3 -3
  17. package/config/mcp-allowlists.json +4 -0
  18. package/config/merge-quality-checks.json +2 -1
  19. package/config/model-candidates.json +131 -0
  20. package/openapi/openapi.yaml +118 -2
  21. package/package.json +70 -51
  22. package/public/blog.html +7 -7
  23. package/public/codex-plugin.html +13 -7
  24. package/public/compare.html +29 -23
  25. package/public/dashboard.html +105 -12
  26. package/public/guide.html +28 -28
  27. package/public/index.html +233 -97
  28. package/public/learn.html +87 -20
  29. package/public/lessons.html +26 -2
  30. package/public/numbers.html +271 -0
  31. package/public/pro.html +89 -19
  32. package/scripts/agent-audit-trace.js +55 -0
  33. package/scripts/agent-memory-lifecycle.js +96 -0
  34. package/scripts/agent-readiness-plan.js +118 -0
  35. package/scripts/agentic-data-pipeline.js +21 -1
  36. package/scripts/agents-sdk-sandbox-plan.js +57 -0
  37. package/scripts/ai-org-governance.js +98 -0
  38. package/scripts/ai-search-distribution.js +43 -0
  39. package/scripts/artifact-agent-plan.js +81 -0
  40. package/scripts/billing.js +27 -8
  41. package/scripts/cli-feedback.js +2 -1
  42. package/scripts/cli-schema.js +60 -5
  43. package/scripts/code-mode-mcp-plan.js +71 -0
  44. package/scripts/commercial-offer.js +1 -1
  45. package/scripts/context-engine.js +1 -2
  46. package/scripts/context-manager.js +4 -1
  47. package/scripts/contextfs.js +214 -32
  48. package/scripts/dashboard-render-spec.js +1 -1
  49. package/scripts/dashboard.js +275 -9
  50. package/scripts/decision-journal.js +13 -3
  51. package/scripts/document-workflow-governance.js +62 -0
  52. package/scripts/enterprise-agent-rollout.js +34 -0
  53. package/scripts/experience-replay-governance.js +69 -0
  54. package/scripts/export-hf-dataset.js +1 -1
  55. package/scripts/feedback-loop.js +141 -9
  56. package/scripts/feedback-to-rules.js +17 -23
  57. package/scripts/gates-engine.js +4 -6
  58. package/scripts/growth-campaigns.js +49 -0
  59. package/scripts/harness-selector.js +145 -1
  60. package/scripts/hybrid-supervisor-agent.js +64 -0
  61. package/scripts/inference-cache-policy.js +72 -0
  62. package/scripts/inference-economics.js +53 -0
  63. package/scripts/internal-agent-bootstrap.js +12 -2
  64. package/scripts/knowledge-layer-plan.js +108 -0
  65. package/scripts/lesson-canonical.js +181 -0
  66. package/scripts/lesson-db.js +71 -10
  67. package/scripts/lesson-inference.js +183 -44
  68. package/scripts/lesson-search.js +4 -1
  69. package/scripts/lesson-synthesis.js +23 -2
  70. package/scripts/llm-client.js +157 -26
  71. package/scripts/mailer/resend-mailer.js +112 -1
  72. package/scripts/mcp-transport-strategy.js +66 -0
  73. package/scripts/memory-store-governance.js +60 -0
  74. package/scripts/meta-agent-loop.js +7 -13
  75. package/scripts/model-access-eligibility.js +38 -0
  76. package/scripts/model-migration-readiness.js +55 -0
  77. package/scripts/native-messaging-audit.js +514 -0
  78. package/scripts/operational-integrity.js +96 -3
  79. package/scripts/otel-declarative-config.js +56 -0
  80. package/scripts/perplexity-client.js +1 -1
  81. package/scripts/post-training-governance.js +34 -0
  82. package/scripts/pr-manager.js +47 -7
  83. package/scripts/private-core-boundary.js +72 -0
  84. package/scripts/production-agent-readiness.js +40 -0
  85. package/scripts/profile-router.js +16 -1
  86. package/scripts/prompt-eval.js +564 -32
  87. package/scripts/prompt-programs.js +93 -0
  88. package/scripts/provider-action-normalizer.js +585 -0
  89. package/scripts/rule-validator.js +285 -0
  90. package/scripts/scaling-law-claims.js +60 -0
  91. package/scripts/security-scanner.js +1 -1
  92. package/scripts/self-distill-agent.js +7 -32
  93. package/scripts/seo-gsd.js +400 -43
  94. package/scripts/skill-rag-router.js +53 -0
  95. package/scripts/spec-gate.js +1 -1
  96. package/scripts/student-consistent-training.js +73 -0
  97. package/scripts/synthetic-data-provenance.js +98 -0
  98. package/scripts/task-context-result.js +81 -0
  99. package/scripts/telemetry-analytics.js +149 -0
  100. package/scripts/thompson-sampling.js +2 -2
  101. package/scripts/token-savings.js +7 -6
  102. package/scripts/token-tco.js +46 -0
  103. package/scripts/tool-registry.js +75 -3
  104. package/scripts/verification-loop.js +10 -1
  105. package/scripts/verifier-scoring.js +71 -0
  106. package/scripts/workflow-sentinel.js +284 -28
  107. package/scripts/workspace-agent-routines.js +118 -0
  108. package/skills/thumbgate/SKILL.md +1 -1
  109. package/src/api/server.js +434 -120
  110. package/.claude-plugin/README.md +0 -170
  111. package/adapters/README.md +0 -12
  112. package/scripts/analytics-report.js +0 -328
  113. package/scripts/autonomous-workflow.js +0 -377
  114. package/scripts/billing-setup.js +0 -109
  115. package/scripts/creator-campaigns.js +0 -239
  116. package/scripts/cross-encoder-reranker.js +0 -235
  117. package/scripts/daemon-manager.js +0 -108
  118. package/scripts/decision-trace.js +0 -354
  119. package/scripts/delegation-runtime.js +0 -896
  120. package/scripts/dispatch-brief.js +0 -159
  121. package/scripts/distribution-surfaces.js +0 -110
  122. package/scripts/feedback-history-distiller.js +0 -382
  123. package/scripts/funnel-analytics.js +0 -35
  124. package/scripts/history-distiller.js +0 -200
  125. package/scripts/hosted-job-launcher.js +0 -256
  126. package/scripts/intent-router.js +0 -392
  127. package/scripts/lesson-reranker.js +0 -263
  128. package/scripts/lesson-retrieval.js +0 -148
  129. package/scripts/managed-lesson-agent.js +0 -183
  130. package/scripts/operational-dashboard.js +0 -103
  131. package/scripts/operational-summary.js +0 -129
  132. package/scripts/operator-artifacts.js +0 -608
  133. package/scripts/optimize-context.js +0 -17
  134. package/scripts/org-dashboard.js +0 -206
  135. package/scripts/partner-orchestration.js +0 -146
  136. package/scripts/predictive-insights.js +0 -356
  137. package/scripts/pulse.js +0 -80
  138. package/scripts/reflector-agent.js +0 -221
  139. package/scripts/sales-pipeline.js +0 -681
  140. package/scripts/session-episode-store.js +0 -329
  141. package/scripts/session-health-sensor.js +0 -242
  142. package/scripts/session-report.js +0 -120
  143. package/scripts/swarm-coordinator.js +0 -81
  144. package/scripts/tool-kpi-tracker.js +0 -12
  145. package/scripts/webhook-delivery.js +0 -62
  146. package/scripts/workflow-sprint-intake.js +0 -475
  147. package/skills/agent-memory/SKILL.md +0 -97
  148. package/skills/solve-architecture-autonomy/SKILL.md +0 -17
  149. package/skills/solve-architecture-autonomy/tool.js +0 -33
  150. package/skills/thumbgate-feedback/SKILL.md +0 -49
@@ -387,7 +387,7 @@ const TOOLS = [
387
387
  }),
388
388
  readOnlyTool({
389
389
  name: 'enforcement_matrix',
390
- description: 'Show the full Enforcement Matrix: feedback pipeline stats, active pre-action gates, and rejection ledger with revival conditions.',
390
+ description: 'Show the full Enforcement Matrix: feedback pipeline stats, active pre-action checks, and rejection ledger with revival conditions.',
391
391
  inputSchema: {
392
392
  type: 'object',
393
393
  properties: {},
@@ -773,9 +773,69 @@ const TOOLS = [
773
773
  description: 'Predict pre-action workflow risk, blast radius, and remediations before a tool call executes.',
774
774
  inputSchema: {
775
775
  type: 'object',
776
- required: ['toolName'],
777
776
  properties: {
778
- toolName: { type: 'string', description: 'Tool being assessed, such as Bash, Edit, or Write' },
777
+ toolName: { type: 'string', description: 'Tool being assessed, such as Bash, Edit, or Write. Optional when provider-native tool call payload is supplied.' },
778
+ provider: { type: 'string', description: 'Optional provider name, such as anthropic, openai, codex, cursor, gemini, or mcp' },
779
+ model: { type: 'string', description: 'Optional model name used for audit evidence and budget review' },
780
+ providerToolCall: {
781
+ type: 'object',
782
+ additionalProperties: true,
783
+ description: 'Provider-native tool call object, including Anthropic tool_use or OpenAI function/tool call shapes',
784
+ },
785
+ content: {
786
+ type: 'array',
787
+ items: { type: 'object', additionalProperties: true },
788
+ description: 'Provider-native message content blocks; Anthropic tool_use blocks are normalized automatically',
789
+ },
790
+ method: { type: 'string', description: 'Optional JSON-RPC/MCP method, such as tools/call' },
791
+ params: {
792
+ type: 'object',
793
+ additionalProperties: true,
794
+ description: 'Optional JSON-RPC/MCP params, including tools/call name and arguments, resources/read URI, or prompts/get template arguments',
795
+ },
796
+ usage: {
797
+ type: 'object',
798
+ additionalProperties: true,
799
+ description: 'Provider token/cost usage, such as input_tokens, output_tokens, or total_tokens',
800
+ },
801
+ tokenEstimate: { type: 'number', description: 'Estimated total tokens for this action when provider usage is unavailable' },
802
+ costUsd: { type: 'number', description: 'Estimated USD cost for this action when provider usage is unavailable' },
803
+ budget: {
804
+ type: 'object',
805
+ additionalProperties: true,
806
+ description: 'Optional per-action budget controls: maxTokensPerAction, remainingTokens, maxCostUsdPerAction, remainingCostUsd, maxParallelBranches',
807
+ },
808
+ workflowPattern: {
809
+ type: 'string',
810
+ enum: ['single_action', 'chaining', 'routing', 'parallelization', 'evaluator-optimizer', 'agent'],
811
+ description: 'Optional workflow architecture hint. Agents require inspection evidence; predefined workflows are easier to evaluate.',
812
+ },
813
+ workflow: {
814
+ type: 'object',
815
+ additionalProperties: true,
816
+ description: 'Optional workflow metadata: pattern, steps, routes, branches, tools, inspection, and verification evidence.',
817
+ },
818
+ goal: { type: 'string', description: 'Optional agent goal for open-ended tool planning.' },
819
+ tools: {
820
+ type: 'array',
821
+ items: { type: 'string' },
822
+ description: 'Optional abstract/combinable tool names available to an open-ended agent.',
823
+ },
824
+ branches: {
825
+ type: 'array',
826
+ items: { type: 'string' },
827
+ description: 'Optional parallel workflow branches for fan-out budget and review checks.',
828
+ },
829
+ steps: {
830
+ type: 'array',
831
+ items: { type: 'string' },
832
+ description: 'Optional predefined workflow steps for chaining/evaluator workflow audit evidence.',
833
+ },
834
+ routes: {
835
+ type: 'array',
836
+ items: { type: 'string' },
837
+ description: 'Optional routing workflow destinations or classes.',
838
+ },
779
839
  command: { type: 'string', description: 'Optional shell command when toolName is Bash' },
780
840
  filePath: { type: 'string', description: 'Optional primary file path for edit-like tools' },
781
841
  changedFiles: {
@@ -841,6 +901,18 @@ const TOOLS = [
841
901
  properties: {},
842
902
  },
843
903
  }),
904
+ readOnlyTool({
905
+ name: 'native_messaging_audit',
906
+ description: 'Audit local browser native messaging hosts and AI browser bridges. Flags missing host binaries, pre-authorized extension bridges, and manifests for browsers not detected locally.',
907
+ inputSchema: {
908
+ type: 'object',
909
+ properties: {
910
+ platform: { type: 'string', enum: ['darwin', 'linux', 'win32'], description: 'Optional platform override for manifest discovery.' },
911
+ homeDir: { type: 'string', description: 'Optional home-directory override for manifest discovery.' },
912
+ aiOnly: { type: 'boolean', description: 'When true, only AI/browser bridge manifests are returned.' },
913
+ },
914
+ },
915
+ }),
844
916
  readOnlyTool({
845
917
  name: 'commerce_recall',
846
918
  description: 'Recall past feedback filtered by commerce categories (product_recommendation, brand_compliance, sizing, pricing, regulatory). Returns quality scores alongside memories for agentic commerce agents.',
@@ -6,11 +6,20 @@ const {
6
6
  getFeedbackPaths,
7
7
  appendDiagnosticRecord,
8
8
  } = require('./feedback-loop');
9
+ const { loadOptionalModule } = require('./private-core-boundary');
9
10
  const {
10
11
  buildPartnerStrategy,
11
12
  computePartnerReward,
12
13
  resolveVerificationRetries,
13
- } = require('./partner-orchestration');
14
+ } = loadOptionalModule('./partner-orchestration', () => ({
15
+ buildPartnerStrategy: ({ partnerProfile } = {}) => ({
16
+ profile: partnerProfile || 'public-shell',
17
+ verificationMode: 'local-only',
18
+ recommendedChecks: [],
19
+ }),
20
+ computePartnerReward: () => 0,
21
+ resolveVerificationRetries: (requestedMaxRetries) => requestedMaxRetries,
22
+ }));
14
23
  const {
15
24
  diagnoseFailure,
16
25
  } = require('./failure-diagnostics');
@@ -0,0 +1,71 @@
1
+ 'use strict';
2
+
3
+ function buildVerifierScoringRubric(options = {}) {
4
+ const criteria = options.criteria || [
5
+ 'evidence_cited',
6
+ 'scope_respected',
7
+ 'tests_or_proof_run',
8
+ 'claim_matches_artifacts',
9
+ ];
10
+ const granularity = Number.isFinite(options.granularity) ? options.granularity : 100;
11
+ const repeats = Number.isFinite(options.repeats) ? options.repeats : 3;
12
+
13
+ return {
14
+ rubricId: 'granular_llm_verifier',
15
+ criteria,
16
+ granularity,
17
+ repeats,
18
+ scoring: 'probability_weighted_average',
19
+ passThreshold: Number.isFinite(options.passThreshold) ? options.passThreshold : 0.82,
20
+ caveats: [
21
+ 'calibrate against held-out human labels before production blocking',
22
+ 'fall back to coarse scores when model cannot expose score-token probabilities',
23
+ 'never use verifier score alone for destructive actions',
24
+ ],
25
+ };
26
+ }
27
+
28
+ function computeVerifierScore({ scores = [], rubric = buildVerifierScoringRubric() } = {}) {
29
+ const flattened = scores
30
+ .flatMap((criterion) => Array.isArray(criterion.repeats) ? criterion.repeats : [])
31
+ .filter((value) => Number.isFinite(value));
32
+
33
+ if (flattened.length === 0) {
34
+ return {
35
+ score: 0,
36
+ decision: 'warn',
37
+ issues: ['missing_verifier_scores'],
38
+ };
39
+ }
40
+
41
+ const normalized = flattened.map((value) => value > 1 ? value / rubric.granularity : value);
42
+ const score = normalized.reduce((sum, value) => sum + value, 0) / normalized.length;
43
+ const issues = [];
44
+ if (scores.length < rubric.criteria.length) issues.push('missing_criteria_scores');
45
+ if (flattened.length < rubric.criteria.length * rubric.repeats) issues.push('missing_repeat_verifications');
46
+
47
+ return {
48
+ score: Number(score.toFixed(4)),
49
+ decision: score >= rubric.passThreshold && issues.length === 0 ? 'allow' : 'warn',
50
+ issues,
51
+ };
52
+ }
53
+
54
+ function evaluateVerifierSetup(setup = {}) {
55
+ const issues = [];
56
+ if (!setup.criteria || setup.criteria.length < 3) issues.push('too_few_criteria');
57
+ if (!setup.repeats || setup.repeats < 2) issues.push('repeat_verification_required');
58
+ if (!setup.heldoutCalibration) issues.push('heldout_calibration_required');
59
+ if (setup.destructiveAction && !setup.humanReview) issues.push('human_review_required_for_destructive_action');
60
+
61
+ return {
62
+ decision: issues.length ? 'warn' : 'allow',
63
+ issues,
64
+ };
65
+ }
66
+
67
+ module.exports = {
68
+ buildVerifierScoringRubric,
69
+ computeVerifierScore,
70
+ evaluateVerifierSetup,
71
+ };