thumbgate 1.14.1 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +6 -6
- package/.claude-plugin/plugin.json +3 -3
- package/.well-known/llms.txt +5 -5
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +60 -35
- package/adapters/chatgpt/openapi.yaml +118 -2
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +217 -84
- package/adapters/opencode/opencode.json +1 -1
- package/bench/prompt-eval-suite.json +5 -1
- package/bin/cli.js +211 -8
- package/config/enforcement.json +59 -7
- package/config/evals/agent-safety-eval.json +338 -22
- package/config/gates/default.json +33 -0
- package/config/gates/routine.json +43 -0
- package/config/github-about.json +3 -3
- package/config/mcp-allowlists.json +4 -0
- package/config/merge-quality-checks.json +2 -1
- package/config/model-candidates.json +131 -0
- package/openapi/openapi.yaml +118 -2
- package/package.json +70 -51
- package/public/blog.html +7 -7
- package/public/codex-plugin.html +13 -7
- package/public/compare.html +29 -23
- package/public/dashboard.html +105 -12
- package/public/guide.html +28 -28
- package/public/index.html +233 -97
- package/public/learn.html +87 -20
- package/public/lessons.html +26 -2
- package/public/numbers.html +271 -0
- package/public/pro.html +89 -19
- package/scripts/agent-audit-trace.js +55 -0
- package/scripts/agent-memory-lifecycle.js +96 -0
- package/scripts/agent-readiness-plan.js +118 -0
- package/scripts/agentic-data-pipeline.js +21 -1
- package/scripts/agents-sdk-sandbox-plan.js +57 -0
- package/scripts/ai-org-governance.js +98 -0
- package/scripts/ai-search-distribution.js +43 -0
- package/scripts/artifact-agent-plan.js +81 -0
- package/scripts/billing.js +27 -8
- package/scripts/cli-feedback.js +2 -1
- package/scripts/cli-schema.js +60 -5
- package/scripts/code-mode-mcp-plan.js +71 -0
- package/scripts/commercial-offer.js +1 -1
- package/scripts/context-engine.js +1 -2
- package/scripts/context-manager.js +4 -1
- package/scripts/contextfs.js +214 -32
- package/scripts/dashboard-render-spec.js +1 -1
- package/scripts/dashboard.js +275 -9
- package/scripts/decision-journal.js +13 -3
- package/scripts/document-workflow-governance.js +62 -0
- package/scripts/enterprise-agent-rollout.js +34 -0
- package/scripts/experience-replay-governance.js +69 -0
- package/scripts/export-hf-dataset.js +1 -1
- package/scripts/feedback-loop.js +141 -9
- package/scripts/feedback-to-rules.js +17 -23
- package/scripts/gates-engine.js +4 -6
- package/scripts/growth-campaigns.js +49 -0
- package/scripts/harness-selector.js +145 -1
- package/scripts/hybrid-supervisor-agent.js +64 -0
- package/scripts/inference-cache-policy.js +72 -0
- package/scripts/inference-economics.js +53 -0
- package/scripts/internal-agent-bootstrap.js +12 -2
- package/scripts/knowledge-layer-plan.js +108 -0
- package/scripts/lesson-canonical.js +181 -0
- package/scripts/lesson-db.js +71 -10
- package/scripts/lesson-inference.js +183 -44
- package/scripts/lesson-search.js +4 -1
- package/scripts/lesson-synthesis.js +23 -2
- package/scripts/llm-client.js +157 -26
- package/scripts/mailer/resend-mailer.js +112 -1
- package/scripts/mcp-transport-strategy.js +66 -0
- package/scripts/memory-store-governance.js +60 -0
- package/scripts/meta-agent-loop.js +7 -13
- package/scripts/model-access-eligibility.js +38 -0
- package/scripts/model-migration-readiness.js +55 -0
- package/scripts/native-messaging-audit.js +514 -0
- package/scripts/operational-integrity.js +96 -3
- package/scripts/otel-declarative-config.js +56 -0
- package/scripts/perplexity-client.js +1 -1
- package/scripts/post-training-governance.js +34 -0
- package/scripts/pr-manager.js +47 -7
- package/scripts/private-core-boundary.js +72 -0
- package/scripts/production-agent-readiness.js +40 -0
- package/scripts/profile-router.js +16 -1
- package/scripts/prompt-eval.js +564 -32
- package/scripts/prompt-programs.js +93 -0
- package/scripts/provider-action-normalizer.js +585 -0
- package/scripts/rule-validator.js +285 -0
- package/scripts/scaling-law-claims.js +60 -0
- package/scripts/security-scanner.js +1 -1
- package/scripts/self-distill-agent.js +7 -32
- package/scripts/seo-gsd.js +400 -43
- package/scripts/skill-rag-router.js +53 -0
- package/scripts/spec-gate.js +1 -1
- package/scripts/student-consistent-training.js +73 -0
- package/scripts/synthetic-data-provenance.js +98 -0
- package/scripts/task-context-result.js +81 -0
- package/scripts/telemetry-analytics.js +149 -0
- package/scripts/thompson-sampling.js +2 -2
- package/scripts/token-savings.js +7 -6
- package/scripts/token-tco.js +46 -0
- package/scripts/tool-registry.js +75 -3
- package/scripts/verification-loop.js +10 -1
- package/scripts/verifier-scoring.js +71 -0
- package/scripts/workflow-sentinel.js +284 -28
- package/scripts/workspace-agent-routines.js +118 -0
- package/skills/thumbgate/SKILL.md +1 -1
- package/src/api/server.js +434 -120
- package/.claude-plugin/README.md +0 -170
- package/adapters/README.md +0 -12
- package/scripts/analytics-report.js +0 -328
- package/scripts/autonomous-workflow.js +0 -377
- package/scripts/billing-setup.js +0 -109
- package/scripts/creator-campaigns.js +0 -239
- package/scripts/cross-encoder-reranker.js +0 -235
- package/scripts/daemon-manager.js +0 -108
- package/scripts/decision-trace.js +0 -354
- package/scripts/delegation-runtime.js +0 -896
- package/scripts/dispatch-brief.js +0 -159
- package/scripts/distribution-surfaces.js +0 -110
- package/scripts/feedback-history-distiller.js +0 -382
- package/scripts/funnel-analytics.js +0 -35
- package/scripts/history-distiller.js +0 -200
- package/scripts/hosted-job-launcher.js +0 -256
- package/scripts/intent-router.js +0 -392
- package/scripts/lesson-reranker.js +0 -263
- package/scripts/lesson-retrieval.js +0 -148
- package/scripts/managed-lesson-agent.js +0 -183
- package/scripts/operational-dashboard.js +0 -103
- package/scripts/operational-summary.js +0 -129
- package/scripts/operator-artifacts.js +0 -608
- package/scripts/optimize-context.js +0 -17
- package/scripts/org-dashboard.js +0 -206
- package/scripts/partner-orchestration.js +0 -146
- package/scripts/predictive-insights.js +0 -356
- package/scripts/pulse.js +0 -80
- package/scripts/reflector-agent.js +0 -221
- package/scripts/sales-pipeline.js +0 -681
- package/scripts/session-episode-store.js +0 -329
- package/scripts/session-health-sensor.js +0 -242
- package/scripts/session-report.js +0 -120
- package/scripts/swarm-coordinator.js +0 -81
- package/scripts/tool-kpi-tracker.js +0 -12
- package/scripts/webhook-delivery.js +0 -62
- package/scripts/workflow-sprint-intake.js +0 -475
- package/skills/agent-memory/SKILL.md +0 -97
- package/skills/solve-architecture-autonomy/SKILL.md +0 -17
- package/skills/solve-architecture-autonomy/tool.js +0 -33
- package/skills/thumbgate-feedback/SKILL.md +0 -49
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 1,
|
|
3
|
+
"description": "Managed model candidates for ThumbGate workload benchmarking. Catalog only: no provider-specific runtime dependency is assumed here.",
|
|
4
|
+
"workloads": {
|
|
5
|
+
"pretool-gating": {
|
|
6
|
+
"label": "PreTool gating",
|
|
7
|
+
"summary": "Fast, reliable gate judgments for tool-use and agentic coding decisions before commands run.",
|
|
8
|
+
"desiredStrengths": ["agentic-coding", "tool-use", "reliability"],
|
|
9
|
+
"targetContextWindow": 64000,
|
|
10
|
+
"benchmarkCommands": [
|
|
11
|
+
"npx thumbgate eval --from-feedback --json --min-score=0",
|
|
12
|
+
"node scripts/gate-eval.js run",
|
|
13
|
+
"npx thumbgate bench --json --min-score=90"
|
|
14
|
+
],
|
|
15
|
+
"metrics": [
|
|
16
|
+
"passRate",
|
|
17
|
+
"falsePositiveRate",
|
|
18
|
+
"falseNegativeRate",
|
|
19
|
+
"medianLatencyMs",
|
|
20
|
+
"costPer1kActionsUsd"
|
|
21
|
+
]
|
|
22
|
+
},
|
|
23
|
+
"long-trace-review": {
|
|
24
|
+
"label": "Long trace review",
|
|
25
|
+
"summary": "Review long agent traces, multi-step failures, and large-context coding sessions without dropping important detail.",
|
|
26
|
+
"desiredStrengths": ["long-horizon-coding", "multi-agent", "reliability"],
|
|
27
|
+
"targetContextWindow": 128000,
|
|
28
|
+
"benchmarkCommands": [
|
|
29
|
+
"npx thumbgate eval --from-feedback --json --min-score=0",
|
|
30
|
+
"node scripts/gate-eval.js run",
|
|
31
|
+
"npx thumbgate bench --json --min-score=90"
|
|
32
|
+
],
|
|
33
|
+
"metrics": [
|
|
34
|
+
"passRate",
|
|
35
|
+
"longContextReliability",
|
|
36
|
+
"traceCompressionLoss",
|
|
37
|
+
"medianLatencyMs",
|
|
38
|
+
"costPerTraceUsd"
|
|
39
|
+
]
|
|
40
|
+
},
|
|
41
|
+
"cheap-fast-path": {
|
|
42
|
+
"label": "Cheap fast path",
|
|
43
|
+
"summary": "Low-cost first-pass model for cheap approval triage before escalating ambiguous work.",
|
|
44
|
+
"desiredStrengths": ["agentic-coding", "tool-use"],
|
|
45
|
+
"targetContextWindow": 32000,
|
|
46
|
+
"benchmarkCommands": [
|
|
47
|
+
"npx thumbgate eval --from-feedback --json --min-score=0",
|
|
48
|
+
"node scripts/gate-eval.js run",
|
|
49
|
+
"npx thumbgate bench --json --min-score=90"
|
|
50
|
+
],
|
|
51
|
+
"metrics": [
|
|
52
|
+
"passRate",
|
|
53
|
+
"medianLatencyMs",
|
|
54
|
+
"costPer1kActionsUsd",
|
|
55
|
+
"escalationRate"
|
|
56
|
+
]
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
"candidates": [
|
|
60
|
+
{
|
|
61
|
+
"id": "anthropic/claude-haiku-4-5",
|
|
62
|
+
"vendor": "Anthropic",
|
|
63
|
+
"family": "claude",
|
|
64
|
+
"provider": "anthropic",
|
|
65
|
+
"model": "claude-haiku-4-5-20251001",
|
|
66
|
+
"contextWindow": 200000,
|
|
67
|
+
"costClass": "low",
|
|
68
|
+
"strengths": ["tool-use", "reliability", "fast-inference"],
|
|
69
|
+
"notes": "Fast control candidate for cheap approval triage."
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"id": "anthropic/claude-sonnet-4-6",
|
|
73
|
+
"vendor": "Anthropic",
|
|
74
|
+
"family": "claude",
|
|
75
|
+
"provider": "anthropic",
|
|
76
|
+
"model": "claude-sonnet-4-6",
|
|
77
|
+
"contextWindow": 200000,
|
|
78
|
+
"costClass": "medium",
|
|
79
|
+
"strengths": ["agentic-coding", "tool-use", "reliability", "long-horizon-coding"],
|
|
80
|
+
"notes": "Current stronger managed control candidate."
|
|
81
|
+
},
|
|
82
|
+
{
|
|
83
|
+
"id": "tinker/kimi-k2.6-32k",
|
|
84
|
+
"vendor": "Thinking Machines",
|
|
85
|
+
"family": "kimi",
|
|
86
|
+
"provider": "openai-compatible",
|
|
87
|
+
"gateway": "tinker",
|
|
88
|
+
"model": "kimi-k2.6-32k",
|
|
89
|
+
"contextWindow": 32000,
|
|
90
|
+
"costClass": "medium",
|
|
91
|
+
"strengths": ["long-horizon-coding", "multi-agent", "reliability"],
|
|
92
|
+
"notes": "Tinker April 23, 2026 release. Good candidate when long-horizon coding matters more than ultra-low latency."
|
|
93
|
+
},
|
|
94
|
+
{
|
|
95
|
+
"id": "tinker/kimi-k2.6-128k",
|
|
96
|
+
"vendor": "Thinking Machines",
|
|
97
|
+
"family": "kimi",
|
|
98
|
+
"provider": "openai-compatible",
|
|
99
|
+
"gateway": "tinker",
|
|
100
|
+
"model": "kimi-k2.6-128k",
|
|
101
|
+
"contextWindow": 128000,
|
|
102
|
+
"costClass": "medium",
|
|
103
|
+
"strengths": ["long-horizon-coding", "multi-agent", "reliability", "long-context"],
|
|
104
|
+
"notes": "Highest-ROI Kimi candidate for long traces and multi-step review."
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
"id": "tinker/qwen3.6-35b-a3b",
|
|
108
|
+
"vendor": "Thinking Machines",
|
|
109
|
+
"family": "qwen",
|
|
110
|
+
"provider": "openai-compatible",
|
|
111
|
+
"gateway": "tinker",
|
|
112
|
+
"model": "qwen3.6-35b-a3b",
|
|
113
|
+
"contextWindow": 64000,
|
|
114
|
+
"costClass": "low",
|
|
115
|
+
"strengths": ["agentic-coding", "tool-use", "reliability", "fast-inference"],
|
|
116
|
+
"notes": "Best first Tinker candidate for ThumbGate pre-action gating and tool-risk classification."
|
|
117
|
+
},
|
|
118
|
+
{
|
|
119
|
+
"id": "tinker/qwen3.6-27b",
|
|
120
|
+
"vendor": "Thinking Machines",
|
|
121
|
+
"family": "qwen",
|
|
122
|
+
"provider": "openai-compatible",
|
|
123
|
+
"gateway": "tinker",
|
|
124
|
+
"model": "qwen3.6-27b",
|
|
125
|
+
"contextWindow": 64000,
|
|
126
|
+
"costClass": "low",
|
|
127
|
+
"strengths": ["agentic-coding", "tool-use", "fast-inference"],
|
|
128
|
+
"notes": "Cheapest Tinker candidate for the fast gate path; use when latency/cost matter most."
|
|
129
|
+
}
|
|
130
|
+
]
|
|
131
|
+
}
|
package/openapi/openapi.yaml
CHANGED
|
@@ -751,6 +751,34 @@ paths:
|
|
|
751
751
|
$ref: '#/components/schemas/FunnelAnalyticsResponse'
|
|
752
752
|
'401':
|
|
753
753
|
description: Unauthorized
|
|
754
|
+
/v1/analytics/losses:
|
|
755
|
+
get:
|
|
756
|
+
operationId: getLossAnalytics
|
|
757
|
+
parameters:
|
|
758
|
+
- in: query
|
|
759
|
+
name: window
|
|
760
|
+
schema:
|
|
761
|
+
type: string
|
|
762
|
+
enum: [today, 7d, 30d, lifetime]
|
|
763
|
+
- in: query
|
|
764
|
+
name: timezone
|
|
765
|
+
schema:
|
|
766
|
+
type: string
|
|
767
|
+
- in: query
|
|
768
|
+
name: now
|
|
769
|
+
schema:
|
|
770
|
+
type: string
|
|
771
|
+
format: date-time
|
|
772
|
+
responses:
|
|
773
|
+
'200':
|
|
774
|
+
description: Ranked buyer-loss and revenue-opportunity analysis for the active analytics window
|
|
775
|
+
content:
|
|
776
|
+
application/json:
|
|
777
|
+
schema:
|
|
778
|
+
type: object
|
|
779
|
+
additionalProperties: true
|
|
780
|
+
'401':
|
|
781
|
+
description: Unauthorized
|
|
754
782
|
/v1/dashboard:
|
|
755
783
|
get:
|
|
756
784
|
operationId: getDashboard
|
|
@@ -848,10 +876,79 @@ paths:
|
|
|
848
876
|
application/json:
|
|
849
877
|
schema:
|
|
850
878
|
type: object
|
|
851
|
-
required: [toolName]
|
|
852
879
|
properties:
|
|
853
880
|
toolName:
|
|
854
881
|
type: string
|
|
882
|
+
description: Tool name is optional when provider-native tool call payload is supplied.
|
|
883
|
+
provider:
|
|
884
|
+
type: string
|
|
885
|
+
model:
|
|
886
|
+
type: string
|
|
887
|
+
providerToolCall:
|
|
888
|
+
type: object
|
|
889
|
+
additionalProperties: true
|
|
890
|
+
toolCall:
|
|
891
|
+
type: object
|
|
892
|
+
additionalProperties: true
|
|
893
|
+
toolUse:
|
|
894
|
+
type: object
|
|
895
|
+
additionalProperties: true
|
|
896
|
+
content:
|
|
897
|
+
type: array
|
|
898
|
+
items:
|
|
899
|
+
type: object
|
|
900
|
+
additionalProperties: true
|
|
901
|
+
input:
|
|
902
|
+
type: object
|
|
903
|
+
additionalProperties: true
|
|
904
|
+
arguments:
|
|
905
|
+
type: object
|
|
906
|
+
additionalProperties: true
|
|
907
|
+
method:
|
|
908
|
+
type: string
|
|
909
|
+
params:
|
|
910
|
+
type: object
|
|
911
|
+
additionalProperties: true
|
|
912
|
+
mcp:
|
|
913
|
+
type: object
|
|
914
|
+
additionalProperties: true
|
|
915
|
+
mcpToolCall:
|
|
916
|
+
type: object
|
|
917
|
+
additionalProperties: true
|
|
918
|
+
usage:
|
|
919
|
+
type: object
|
|
920
|
+
additionalProperties: true
|
|
921
|
+
tokenEstimate:
|
|
922
|
+
type: number
|
|
923
|
+
costUsd:
|
|
924
|
+
type: number
|
|
925
|
+
budget:
|
|
926
|
+
type: object
|
|
927
|
+
additionalProperties: true
|
|
928
|
+
workflowPattern:
|
|
929
|
+
type: string
|
|
930
|
+
enum: [single_action, chaining, routing, parallelization, evaluator-optimizer, agent]
|
|
931
|
+
workflow:
|
|
932
|
+
type: object
|
|
933
|
+
additionalProperties: true
|
|
934
|
+
goal:
|
|
935
|
+
type: string
|
|
936
|
+
tools:
|
|
937
|
+
type: array
|
|
938
|
+
items:
|
|
939
|
+
type: string
|
|
940
|
+
branches:
|
|
941
|
+
type: array
|
|
942
|
+
items:
|
|
943
|
+
type: string
|
|
944
|
+
steps:
|
|
945
|
+
type: array
|
|
946
|
+
items:
|
|
947
|
+
type: string
|
|
948
|
+
routes:
|
|
949
|
+
type: array
|
|
950
|
+
items:
|
|
951
|
+
type: string
|
|
855
952
|
command:
|
|
856
953
|
type: string
|
|
857
954
|
filePath:
|
|
@@ -868,6 +965,25 @@ paths:
|
|
|
868
965
|
type: boolean
|
|
869
966
|
requireVersionNotBehindBase:
|
|
870
967
|
type: boolean
|
|
968
|
+
workflowDispatch:
|
|
969
|
+
type: object
|
|
970
|
+
description: Evidence required before running `gh workflow run` or another environment-specific workflow dispatch.
|
|
971
|
+
properties:
|
|
972
|
+
environment:
|
|
973
|
+
type: string
|
|
974
|
+
description: Requested environment such as dev, staging, beta, or release.
|
|
975
|
+
workflow:
|
|
976
|
+
type: string
|
|
977
|
+
description: Expected workflow file or workflow name.
|
|
978
|
+
ref:
|
|
979
|
+
type: string
|
|
980
|
+
description: Expected branch or ref passed to the workflow dispatch command.
|
|
981
|
+
sha:
|
|
982
|
+
type: string
|
|
983
|
+
description: Expected HEAD SHA to verify before and after dispatch.
|
|
984
|
+
job:
|
|
985
|
+
type: string
|
|
986
|
+
description: Expected job name to verify before reporting the workflow URL.
|
|
871
987
|
responses:
|
|
872
988
|
'200':
|
|
873
989
|
description: Persisted workflow-sentinel recommendation with decision-control metadata and actionId
|
|
@@ -1121,7 +1237,7 @@ paths:
|
|
|
1121
1237
|
description: Comma-separated tags that must all be present on a lesson.
|
|
1122
1238
|
responses:
|
|
1123
1239
|
'200':
|
|
1124
|
-
description: Searchable promoted lessons with linked corrective actions, prevention rules, and auto-
|
|
1240
|
+
description: Searchable promoted lessons with linked corrective actions, prevention rules, and auto-promoted checks
|
|
1125
1241
|
'401':
|
|
1126
1242
|
description: Unauthorized
|
|
1127
1243
|
/v1/search:
|