thumbgate 1.14.1 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.claude-plugin/marketplace.json +6 -6
  2. package/.claude-plugin/plugin.json +3 -3
  3. package/.well-known/llms.txt +5 -5
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +60 -35
  6. package/adapters/chatgpt/openapi.yaml +118 -2
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/mcp/server-stdio.js +217 -84
  9. package/adapters/opencode/opencode.json +1 -1
  10. package/bench/prompt-eval-suite.json +5 -1
  11. package/bin/cli.js +211 -8
  12. package/config/enforcement.json +59 -7
  13. package/config/evals/agent-safety-eval.json +338 -22
  14. package/config/gates/default.json +33 -0
  15. package/config/gates/routine.json +43 -0
  16. package/config/github-about.json +3 -3
  17. package/config/mcp-allowlists.json +4 -0
  18. package/config/merge-quality-checks.json +2 -1
  19. package/config/model-candidates.json +131 -0
  20. package/openapi/openapi.yaml +118 -2
  21. package/package.json +70 -51
  22. package/public/blog.html +7 -7
  23. package/public/codex-plugin.html +13 -7
  24. package/public/compare.html +29 -23
  25. package/public/dashboard.html +105 -12
  26. package/public/guide.html +28 -28
  27. package/public/index.html +233 -97
  28. package/public/learn.html +87 -20
  29. package/public/lessons.html +26 -2
  30. package/public/numbers.html +271 -0
  31. package/public/pro.html +89 -19
  32. package/scripts/agent-audit-trace.js +55 -0
  33. package/scripts/agent-memory-lifecycle.js +96 -0
  34. package/scripts/agent-readiness-plan.js +118 -0
  35. package/scripts/agentic-data-pipeline.js +21 -1
  36. package/scripts/agents-sdk-sandbox-plan.js +57 -0
  37. package/scripts/ai-org-governance.js +98 -0
  38. package/scripts/ai-search-distribution.js +43 -0
  39. package/scripts/artifact-agent-plan.js +81 -0
  40. package/scripts/billing.js +27 -8
  41. package/scripts/cli-feedback.js +2 -1
  42. package/scripts/cli-schema.js +60 -5
  43. package/scripts/code-mode-mcp-plan.js +71 -0
  44. package/scripts/commercial-offer.js +1 -1
  45. package/scripts/context-engine.js +1 -2
  46. package/scripts/context-manager.js +4 -1
  47. package/scripts/contextfs.js +214 -32
  48. package/scripts/dashboard-render-spec.js +1 -1
  49. package/scripts/dashboard.js +275 -9
  50. package/scripts/decision-journal.js +13 -3
  51. package/scripts/document-workflow-governance.js +62 -0
  52. package/scripts/enterprise-agent-rollout.js +34 -0
  53. package/scripts/experience-replay-governance.js +69 -0
  54. package/scripts/export-hf-dataset.js +1 -1
  55. package/scripts/feedback-loop.js +141 -9
  56. package/scripts/feedback-to-rules.js +17 -23
  57. package/scripts/gates-engine.js +4 -6
  58. package/scripts/growth-campaigns.js +49 -0
  59. package/scripts/harness-selector.js +145 -1
  60. package/scripts/hybrid-supervisor-agent.js +64 -0
  61. package/scripts/inference-cache-policy.js +72 -0
  62. package/scripts/inference-economics.js +53 -0
  63. package/scripts/internal-agent-bootstrap.js +12 -2
  64. package/scripts/knowledge-layer-plan.js +108 -0
  65. package/scripts/lesson-canonical.js +181 -0
  66. package/scripts/lesson-db.js +71 -10
  67. package/scripts/lesson-inference.js +183 -44
  68. package/scripts/lesson-search.js +4 -1
  69. package/scripts/lesson-synthesis.js +23 -2
  70. package/scripts/llm-client.js +157 -26
  71. package/scripts/mailer/resend-mailer.js +112 -1
  72. package/scripts/mcp-transport-strategy.js +66 -0
  73. package/scripts/memory-store-governance.js +60 -0
  74. package/scripts/meta-agent-loop.js +7 -13
  75. package/scripts/model-access-eligibility.js +38 -0
  76. package/scripts/model-migration-readiness.js +55 -0
  77. package/scripts/native-messaging-audit.js +514 -0
  78. package/scripts/operational-integrity.js +96 -3
  79. package/scripts/otel-declarative-config.js +56 -0
  80. package/scripts/perplexity-client.js +1 -1
  81. package/scripts/post-training-governance.js +34 -0
  82. package/scripts/pr-manager.js +47 -7
  83. package/scripts/private-core-boundary.js +72 -0
  84. package/scripts/production-agent-readiness.js +40 -0
  85. package/scripts/profile-router.js +16 -1
  86. package/scripts/prompt-eval.js +564 -32
  87. package/scripts/prompt-programs.js +93 -0
  88. package/scripts/provider-action-normalizer.js +585 -0
  89. package/scripts/rule-validator.js +285 -0
  90. package/scripts/scaling-law-claims.js +60 -0
  91. package/scripts/security-scanner.js +1 -1
  92. package/scripts/self-distill-agent.js +7 -32
  93. package/scripts/seo-gsd.js +400 -43
  94. package/scripts/skill-rag-router.js +53 -0
  95. package/scripts/spec-gate.js +1 -1
  96. package/scripts/student-consistent-training.js +73 -0
  97. package/scripts/synthetic-data-provenance.js +98 -0
  98. package/scripts/task-context-result.js +81 -0
  99. package/scripts/telemetry-analytics.js +149 -0
  100. package/scripts/thompson-sampling.js +2 -2
  101. package/scripts/token-savings.js +7 -6
  102. package/scripts/token-tco.js +46 -0
  103. package/scripts/tool-registry.js +75 -3
  104. package/scripts/verification-loop.js +10 -1
  105. package/scripts/verifier-scoring.js +71 -0
  106. package/scripts/workflow-sentinel.js +284 -28
  107. package/scripts/workspace-agent-routines.js +118 -0
  108. package/skills/thumbgate/SKILL.md +1 -1
  109. package/src/api/server.js +434 -120
  110. package/.claude-plugin/README.md +0 -170
  111. package/adapters/README.md +0 -12
  112. package/scripts/analytics-report.js +0 -328
  113. package/scripts/autonomous-workflow.js +0 -377
  114. package/scripts/billing-setup.js +0 -109
  115. package/scripts/creator-campaigns.js +0 -239
  116. package/scripts/cross-encoder-reranker.js +0 -235
  117. package/scripts/daemon-manager.js +0 -108
  118. package/scripts/decision-trace.js +0 -354
  119. package/scripts/delegation-runtime.js +0 -896
  120. package/scripts/dispatch-brief.js +0 -159
  121. package/scripts/distribution-surfaces.js +0 -110
  122. package/scripts/feedback-history-distiller.js +0 -382
  123. package/scripts/funnel-analytics.js +0 -35
  124. package/scripts/history-distiller.js +0 -200
  125. package/scripts/hosted-job-launcher.js +0 -256
  126. package/scripts/intent-router.js +0 -392
  127. package/scripts/lesson-reranker.js +0 -263
  128. package/scripts/lesson-retrieval.js +0 -148
  129. package/scripts/managed-lesson-agent.js +0 -183
  130. package/scripts/operational-dashboard.js +0 -103
  131. package/scripts/operational-summary.js +0 -129
  132. package/scripts/operator-artifacts.js +0 -608
  133. package/scripts/optimize-context.js +0 -17
  134. package/scripts/org-dashboard.js +0 -206
  135. package/scripts/partner-orchestration.js +0 -146
  136. package/scripts/predictive-insights.js +0 -356
  137. package/scripts/pulse.js +0 -80
  138. package/scripts/reflector-agent.js +0 -221
  139. package/scripts/sales-pipeline.js +0 -681
  140. package/scripts/session-episode-store.js +0 -329
  141. package/scripts/session-health-sensor.js +0 -242
  142. package/scripts/session-report.js +0 -120
  143. package/scripts/swarm-coordinator.js +0 -81
  144. package/scripts/tool-kpi-tracker.js +0 -12
  145. package/scripts/webhook-delivery.js +0 -62
  146. package/scripts/workflow-sprint-intake.js +0 -475
  147. package/skills/agent-memory/SKILL.md +0 -97
  148. package/skills/solve-architecture-autonomy/SKILL.md +0 -17
  149. package/skills/solve-architecture-autonomy/tool.js +0 -33
  150. package/skills/thumbgate-feedback/SKILL.md +0 -49
@@ -0,0 +1,131 @@
1
+ {
2
+ "version": 1,
3
+ "description": "Managed model candidates for ThumbGate workload benchmarking. Catalog only: no provider-specific runtime dependency is assumed here.",
4
+ "workloads": {
5
+ "pretool-gating": {
6
+ "label": "PreTool gating",
7
+ "summary": "Fast, reliable gate judgments for tool-use and agentic coding decisions before commands run.",
8
+ "desiredStrengths": ["agentic-coding", "tool-use", "reliability"],
9
+ "targetContextWindow": 64000,
10
+ "benchmarkCommands": [
11
+ "npx thumbgate eval --from-feedback --json --min-score=0",
12
+ "node scripts/gate-eval.js run",
13
+ "npx thumbgate bench --json --min-score=90"
14
+ ],
15
+ "metrics": [
16
+ "passRate",
17
+ "falsePositiveRate",
18
+ "falseNegativeRate",
19
+ "medianLatencyMs",
20
+ "costPer1kActionsUsd"
21
+ ]
22
+ },
23
+ "long-trace-review": {
24
+ "label": "Long trace review",
25
+ "summary": "Review long agent traces, multi-step failures, and large-context coding sessions without dropping important detail.",
26
+ "desiredStrengths": ["long-horizon-coding", "multi-agent", "reliability"],
27
+ "targetContextWindow": 128000,
28
+ "benchmarkCommands": [
29
+ "npx thumbgate eval --from-feedback --json --min-score=0",
30
+ "node scripts/gate-eval.js run",
31
+ "npx thumbgate bench --json --min-score=90"
32
+ ],
33
+ "metrics": [
34
+ "passRate",
35
+ "longContextReliability",
36
+ "traceCompressionLoss",
37
+ "medianLatencyMs",
38
+ "costPerTraceUsd"
39
+ ]
40
+ },
41
+ "cheap-fast-path": {
42
+ "label": "Cheap fast path",
43
+ "summary": "Low-cost first-pass model for cheap approval triage before escalating ambiguous work.",
44
+ "desiredStrengths": ["agentic-coding", "tool-use"],
45
+ "targetContextWindow": 32000,
46
+ "benchmarkCommands": [
47
+ "npx thumbgate eval --from-feedback --json --min-score=0",
48
+ "node scripts/gate-eval.js run",
49
+ "npx thumbgate bench --json --min-score=90"
50
+ ],
51
+ "metrics": [
52
+ "passRate",
53
+ "medianLatencyMs",
54
+ "costPer1kActionsUsd",
55
+ "escalationRate"
56
+ ]
57
+ }
58
+ },
59
+ "candidates": [
60
+ {
61
+ "id": "anthropic/claude-haiku-4-5",
62
+ "vendor": "Anthropic",
63
+ "family": "claude",
64
+ "provider": "anthropic",
65
+ "model": "claude-haiku-4-5-20251001",
66
+ "contextWindow": 200000,
67
+ "costClass": "low",
68
+ "strengths": ["tool-use", "reliability", "fast-inference"],
69
+ "notes": "Fast control candidate for cheap approval triage."
70
+ },
71
+ {
72
+ "id": "anthropic/claude-sonnet-4-6",
73
+ "vendor": "Anthropic",
74
+ "family": "claude",
75
+ "provider": "anthropic",
76
+ "model": "claude-sonnet-4-6",
77
+ "contextWindow": 200000,
78
+ "costClass": "medium",
79
+ "strengths": ["agentic-coding", "tool-use", "reliability", "long-horizon-coding"],
80
+ "notes": "Current stronger managed control candidate."
81
+ },
82
+ {
83
+ "id": "tinker/kimi-k2.6-32k",
84
+ "vendor": "Thinking Machines",
85
+ "family": "kimi",
86
+ "provider": "openai-compatible",
87
+ "gateway": "tinker",
88
+ "model": "kimi-k2.6-32k",
89
+ "contextWindow": 32000,
90
+ "costClass": "medium",
91
+ "strengths": ["long-horizon-coding", "multi-agent", "reliability"],
92
+ "notes": "Tinker April 23, 2026 release. Good candidate when long-horizon coding matters more than ultra-low latency."
93
+ },
94
+ {
95
+ "id": "tinker/kimi-k2.6-128k",
96
+ "vendor": "Thinking Machines",
97
+ "family": "kimi",
98
+ "provider": "openai-compatible",
99
+ "gateway": "tinker",
100
+ "model": "kimi-k2.6-128k",
101
+ "contextWindow": 128000,
102
+ "costClass": "medium",
103
+ "strengths": ["long-horizon-coding", "multi-agent", "reliability", "long-context"],
104
+ "notes": "Highest-ROI Kimi candidate for long traces and multi-step review."
105
+ },
106
+ {
107
+ "id": "tinker/qwen3.6-35b-a3b",
108
+ "vendor": "Thinking Machines",
109
+ "family": "qwen",
110
+ "provider": "openai-compatible",
111
+ "gateway": "tinker",
112
+ "model": "qwen3.6-35b-a3b",
113
+ "contextWindow": 64000,
114
+ "costClass": "low",
115
+ "strengths": ["agentic-coding", "tool-use", "reliability", "fast-inference"],
116
+ "notes": "Best first Tinker candidate for ThumbGate pre-action gating and tool-risk classification."
117
+ },
118
+ {
119
+ "id": "tinker/qwen3.6-27b",
120
+ "vendor": "Thinking Machines",
121
+ "family": "qwen",
122
+ "provider": "openai-compatible",
123
+ "gateway": "tinker",
124
+ "model": "qwen3.6-27b",
125
+ "contextWindow": 64000,
126
+ "costClass": "low",
127
+ "strengths": ["agentic-coding", "tool-use", "fast-inference"],
128
+ "notes": "Cheapest Tinker candidate for the fast gate path; use when latency/cost matter most."
129
+ }
130
+ ]
131
+ }
@@ -751,6 +751,34 @@ paths:
751
751
  $ref: '#/components/schemas/FunnelAnalyticsResponse'
752
752
  '401':
753
753
  description: Unauthorized
754
+ /v1/analytics/losses:
755
+ get:
756
+ operationId: getLossAnalytics
757
+ parameters:
758
+ - in: query
759
+ name: window
760
+ schema:
761
+ type: string
762
+ enum: [today, 7d, 30d, lifetime]
763
+ - in: query
764
+ name: timezone
765
+ schema:
766
+ type: string
767
+ - in: query
768
+ name: now
769
+ schema:
770
+ type: string
771
+ format: date-time
772
+ responses:
773
+ '200':
774
+ description: Ranked buyer-loss and revenue-opportunity analysis for the active analytics window
775
+ content:
776
+ application/json:
777
+ schema:
778
+ type: object
779
+ additionalProperties: true
780
+ '401':
781
+ description: Unauthorized
754
782
  /v1/dashboard:
755
783
  get:
756
784
  operationId: getDashboard
@@ -848,10 +876,79 @@ paths:
848
876
  application/json:
849
877
  schema:
850
878
  type: object
851
- required: [toolName]
852
879
  properties:
853
880
  toolName:
854
881
  type: string
882
+ description: Tool name is optional when provider-native tool call payload is supplied.
883
+ provider:
884
+ type: string
885
+ model:
886
+ type: string
887
+ providerToolCall:
888
+ type: object
889
+ additionalProperties: true
890
+ toolCall:
891
+ type: object
892
+ additionalProperties: true
893
+ toolUse:
894
+ type: object
895
+ additionalProperties: true
896
+ content:
897
+ type: array
898
+ items:
899
+ type: object
900
+ additionalProperties: true
901
+ input:
902
+ type: object
903
+ additionalProperties: true
904
+ arguments:
905
+ type: object
906
+ additionalProperties: true
907
+ method:
908
+ type: string
909
+ params:
910
+ type: object
911
+ additionalProperties: true
912
+ mcp:
913
+ type: object
914
+ additionalProperties: true
915
+ mcpToolCall:
916
+ type: object
917
+ additionalProperties: true
918
+ usage:
919
+ type: object
920
+ additionalProperties: true
921
+ tokenEstimate:
922
+ type: number
923
+ costUsd:
924
+ type: number
925
+ budget:
926
+ type: object
927
+ additionalProperties: true
928
+ workflowPattern:
929
+ type: string
930
+ enum: [single_action, chaining, routing, parallelization, evaluator-optimizer, agent]
931
+ workflow:
932
+ type: object
933
+ additionalProperties: true
934
+ goal:
935
+ type: string
936
+ tools:
937
+ type: array
938
+ items:
939
+ type: string
940
+ branches:
941
+ type: array
942
+ items:
943
+ type: string
944
+ steps:
945
+ type: array
946
+ items:
947
+ type: string
948
+ routes:
949
+ type: array
950
+ items:
951
+ type: string
855
952
  command:
856
953
  type: string
857
954
  filePath:
@@ -868,6 +965,25 @@ paths:
868
965
  type: boolean
869
966
  requireVersionNotBehindBase:
870
967
  type: boolean
968
+ workflowDispatch:
969
+ type: object
970
+ description: Evidence required before running `gh workflow run` or another environment-specific workflow dispatch.
971
+ properties:
972
+ environment:
973
+ type: string
974
+ description: Requested environment such as dev, staging, beta, or release.
975
+ workflow:
976
+ type: string
977
+ description: Expected workflow file or workflow name.
978
+ ref:
979
+ type: string
980
+ description: Expected branch or ref passed to the workflow dispatch command.
981
+ sha:
982
+ type: string
983
+ description: Expected HEAD SHA to verify before and after dispatch.
984
+ job:
985
+ type: string
986
+ description: Expected job name to verify before reporting the workflow URL.
871
987
  responses:
872
988
  '200':
873
989
  description: Persisted workflow-sentinel recommendation with decision-control metadata and actionId
@@ -1121,7 +1237,7 @@ paths:
1121
1237
  description: Comma-separated tags that must all be present on a lesson.
1122
1238
  responses:
1123
1239
  '200':
1124
- description: Searchable promoted lessons with linked corrective actions, prevention rules, and auto-gates
1240
+ description: Searchable promoted lessons with linked corrective actions, prevention rules, and auto-promoted checks
1125
1241
  '401':
1126
1242
  description: Unauthorized
1127
1243
  /v1/search: