npm - thumbgate - Versions diffs - 1.14.1 → 1.16.0 - Mend

thumbgate 1.14.1 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (150) hide show

package/.claude-plugin/marketplace.json +6 -6
package/.claude-plugin/plugin.json +3 -3
package/.well-known/llms.txt +5 -5
package/.well-known/mcp/server-card.json +1 -1
package/README.md +60 -35
package/adapters/chatgpt/openapi.yaml +118 -2
package/adapters/claude/.mcp.json +2 -2
package/adapters/mcp/server-stdio.js +217 -84
package/adapters/opencode/opencode.json +1 -1
package/bench/prompt-eval-suite.json +5 -1
package/bin/cli.js +211 -8
package/config/enforcement.json +59 -7
package/config/evals/agent-safety-eval.json +338 -22
package/config/gates/default.json +33 -0
package/config/gates/routine.json +43 -0
package/config/github-about.json +3 -3
package/config/mcp-allowlists.json +4 -0
package/config/merge-quality-checks.json +2 -1
package/config/model-candidates.json +131 -0
package/openapi/openapi.yaml +118 -2
package/package.json +70 -51
package/public/blog.html +7 -7
package/public/codex-plugin.html +13 -7
package/public/compare.html +29 -23
package/public/dashboard.html +105 -12
package/public/guide.html +28 -28
package/public/index.html +233 -97
package/public/learn.html +87 -20
package/public/lessons.html +26 -2
package/public/numbers.html +271 -0
package/public/pro.html +89 -19
package/scripts/agent-audit-trace.js +55 -0
package/scripts/agent-memory-lifecycle.js +96 -0
package/scripts/agent-readiness-plan.js +118 -0
package/scripts/agentic-data-pipeline.js +21 -1
package/scripts/agents-sdk-sandbox-plan.js +57 -0
package/scripts/ai-org-governance.js +98 -0
package/scripts/ai-search-distribution.js +43 -0
package/scripts/artifact-agent-plan.js +81 -0
package/scripts/billing.js +27 -8
package/scripts/cli-feedback.js +2 -1
package/scripts/cli-schema.js +60 -5
package/scripts/code-mode-mcp-plan.js +71 -0
package/scripts/commercial-offer.js +1 -1
package/scripts/context-engine.js +1 -2
package/scripts/context-manager.js +4 -1
package/scripts/contextfs.js +214 -32
package/scripts/dashboard-render-spec.js +1 -1
package/scripts/dashboard.js +275 -9
package/scripts/decision-journal.js +13 -3
package/scripts/document-workflow-governance.js +62 -0
package/scripts/enterprise-agent-rollout.js +34 -0
package/scripts/experience-replay-governance.js +69 -0
package/scripts/export-hf-dataset.js +1 -1
package/scripts/feedback-loop.js +141 -9
package/scripts/feedback-to-rules.js +17 -23
package/scripts/gates-engine.js +4 -6
package/scripts/growth-campaigns.js +49 -0
package/scripts/harness-selector.js +145 -1
package/scripts/hybrid-supervisor-agent.js +64 -0
package/scripts/inference-cache-policy.js +72 -0
package/scripts/inference-economics.js +53 -0
package/scripts/internal-agent-bootstrap.js +12 -2
package/scripts/knowledge-layer-plan.js +108 -0
package/scripts/lesson-canonical.js +181 -0
package/scripts/lesson-db.js +71 -10
package/scripts/lesson-inference.js +183 -44
package/scripts/lesson-search.js +4 -1
package/scripts/lesson-synthesis.js +23 -2
package/scripts/llm-client.js +157 -26
package/scripts/mailer/resend-mailer.js +112 -1
package/scripts/mcp-transport-strategy.js +66 -0
package/scripts/memory-store-governance.js +60 -0
package/scripts/meta-agent-loop.js +7 -13
package/scripts/model-access-eligibility.js +38 -0
package/scripts/model-migration-readiness.js +55 -0
package/scripts/native-messaging-audit.js +514 -0
package/scripts/operational-integrity.js +96 -3
package/scripts/otel-declarative-config.js +56 -0
package/scripts/perplexity-client.js +1 -1
package/scripts/post-training-governance.js +34 -0
package/scripts/pr-manager.js +47 -7
package/scripts/private-core-boundary.js +72 -0
package/scripts/production-agent-readiness.js +40 -0
package/scripts/profile-router.js +16 -1
package/scripts/prompt-eval.js +564 -32
package/scripts/prompt-programs.js +93 -0
package/scripts/provider-action-normalizer.js +585 -0
package/scripts/rule-validator.js +285 -0
package/scripts/scaling-law-claims.js +60 -0
package/scripts/security-scanner.js +1 -1
package/scripts/self-distill-agent.js +7 -32
package/scripts/seo-gsd.js +400 -43
package/scripts/skill-rag-router.js +53 -0
package/scripts/spec-gate.js +1 -1
package/scripts/student-consistent-training.js +73 -0
package/scripts/synthetic-data-provenance.js +98 -0
package/scripts/task-context-result.js +81 -0
package/scripts/telemetry-analytics.js +149 -0
package/scripts/thompson-sampling.js +2 -2
package/scripts/token-savings.js +7 -6
package/scripts/token-tco.js +46 -0
package/scripts/tool-registry.js +75 -3
package/scripts/verification-loop.js +10 -1
package/scripts/verifier-scoring.js +71 -0
package/scripts/workflow-sentinel.js +284 -28
package/scripts/workspace-agent-routines.js +118 -0
package/skills/thumbgate/SKILL.md +1 -1
package/src/api/server.js +434 -120
package/.claude-plugin/README.md +0 -170
package/adapters/README.md +0 -12
package/scripts/analytics-report.js +0 -328
package/scripts/autonomous-workflow.js +0 -377
package/scripts/billing-setup.js +0 -109
package/scripts/creator-campaigns.js +0 -239
package/scripts/cross-encoder-reranker.js +0 -235
package/scripts/daemon-manager.js +0 -108
package/scripts/decision-trace.js +0 -354
package/scripts/delegation-runtime.js +0 -896
package/scripts/dispatch-brief.js +0 -159
package/scripts/distribution-surfaces.js +0 -110
package/scripts/feedback-history-distiller.js +0 -382
package/scripts/funnel-analytics.js +0 -35
package/scripts/history-distiller.js +0 -200
package/scripts/hosted-job-launcher.js +0 -256
package/scripts/intent-router.js +0 -392
package/scripts/lesson-reranker.js +0 -263
package/scripts/lesson-retrieval.js +0 -148
package/scripts/managed-lesson-agent.js +0 -183
package/scripts/operational-dashboard.js +0 -103
package/scripts/operational-summary.js +0 -129
package/scripts/operator-artifacts.js +0 -608
package/scripts/optimize-context.js +0 -17
package/scripts/org-dashboard.js +0 -206
package/scripts/partner-orchestration.js +0 -146
package/scripts/predictive-insights.js +0 -356
package/scripts/pulse.js +0 -80
package/scripts/reflector-agent.js +0 -221
package/scripts/sales-pipeline.js +0 -681
package/scripts/session-episode-store.js +0 -329
package/scripts/session-health-sensor.js +0 -242
package/scripts/session-report.js +0 -120
package/scripts/swarm-coordinator.js +0 -81
package/scripts/tool-kpi-tracker.js +0 -12
package/scripts/webhook-delivery.js +0 -62
package/scripts/workflow-sprint-intake.js +0 -475
package/skills/agent-memory/SKILL.md +0 -97
package/skills/solve-architecture-autonomy/SKILL.md +0 -17
package/skills/solve-architecture-autonomy/tool.js +0 -33
package/skills/thumbgate-feedback/SKILL.md +0 -49

package/config/model-candidates.json ADDED Viewed

@@ -0,0 +1,131 @@
+{
+  "version": 1,
+  "description": "Managed model candidates for ThumbGate workload benchmarking. Catalog only: no provider-specific runtime dependency is assumed here.",
+  "workloads": {
+    "pretool-gating": {
+      "label": "PreTool gating",
+      "summary": "Fast, reliable gate judgments for tool-use and agentic coding decisions before commands run.",
+      "desiredStrengths": ["agentic-coding", "tool-use", "reliability"],
+      "targetContextWindow": 64000,
+      "benchmarkCommands": [
+        "npx thumbgate eval --from-feedback --json --min-score=0",
+        "node scripts/gate-eval.js run",
+        "npx thumbgate bench --json --min-score=90"
+      ],
+      "metrics": [
+        "passRate",
+        "falsePositiveRate",
+        "falseNegativeRate",
+        "medianLatencyMs",
+        "costPer1kActionsUsd"
+      ]
+    },
+    "long-trace-review": {
+      "label": "Long trace review",
+      "summary": "Review long agent traces, multi-step failures, and large-context coding sessions without dropping important detail.",
+      "desiredStrengths": ["long-horizon-coding", "multi-agent", "reliability"],
+      "targetContextWindow": 128000,
+      "benchmarkCommands": [
+        "npx thumbgate eval --from-feedback --json --min-score=0",
+        "node scripts/gate-eval.js run",
+        "npx thumbgate bench --json --min-score=90"
+      ],
+      "metrics": [
+        "passRate",
+        "longContextReliability",
+        "traceCompressionLoss",
+        "medianLatencyMs",
+        "costPerTraceUsd"
+      ]
+    },
+    "cheap-fast-path": {
+      "label": "Cheap fast path",
+      "summary": "Low-cost first-pass model for cheap approval triage before escalating ambiguous work.",
+      "desiredStrengths": ["agentic-coding", "tool-use"],
+      "targetContextWindow": 32000,
+      "benchmarkCommands": [
+        "npx thumbgate eval --from-feedback --json --min-score=0",
+        "node scripts/gate-eval.js run",
+        "npx thumbgate bench --json --min-score=90"
+      ],
+      "metrics": [
+        "passRate",
+        "medianLatencyMs",
+        "costPer1kActionsUsd",
+        "escalationRate"
+      ]
+    }
+  },
+  "candidates": [
+    {
+      "id": "anthropic/claude-haiku-4-5",
+      "vendor": "Anthropic",
+      "family": "claude",
+      "provider": "anthropic",
+      "model": "claude-haiku-4-5-20251001",
+      "contextWindow": 200000,
+      "costClass": "low",
+      "strengths": ["tool-use", "reliability", "fast-inference"],
+      "notes": "Fast control candidate for cheap approval triage."
+    },
+    {
+      "id": "anthropic/claude-sonnet-4-6",
+      "vendor": "Anthropic",
+      "family": "claude",
+      "provider": "anthropic",
+      "model": "claude-sonnet-4-6",
+      "contextWindow": 200000,
+      "costClass": "medium",
+      "strengths": ["agentic-coding", "tool-use", "reliability", "long-horizon-coding"],
+      "notes": "Current stronger managed control candidate."
+    },
+    {
+      "id": "tinker/kimi-k2.6-32k",
+      "vendor": "Thinking Machines",
+      "family": "kimi",
+      "provider": "openai-compatible",
+      "gateway": "tinker",
+      "model": "kimi-k2.6-32k",
+      "contextWindow": 32000,
+      "costClass": "medium",
+      "strengths": ["long-horizon-coding", "multi-agent", "reliability"],
+      "notes": "Tinker April 23, 2026 release. Good candidate when long-horizon coding matters more than ultra-low latency."
+    },
+    {
+      "id": "tinker/kimi-k2.6-128k",
+      "vendor": "Thinking Machines",
+      "family": "kimi",
+      "provider": "openai-compatible",
+      "gateway": "tinker",
+      "model": "kimi-k2.6-128k",
+      "contextWindow": 128000,
+      "costClass": "medium",
+      "strengths": ["long-horizon-coding", "multi-agent", "reliability", "long-context"],
+      "notes": "Highest-ROI Kimi candidate for long traces and multi-step review."
+    },
+    {
+      "id": "tinker/qwen3.6-35b-a3b",
+      "vendor": "Thinking Machines",
+      "family": "qwen",
+      "provider": "openai-compatible",
+      "gateway": "tinker",
+      "model": "qwen3.6-35b-a3b",
+      "contextWindow": 64000,
+      "costClass": "low",
+      "strengths": ["agentic-coding", "tool-use", "reliability", "fast-inference"],
+      "notes": "Best first Tinker candidate for ThumbGate pre-action gating and tool-risk classification."
+    },
+    {
+      "id": "tinker/qwen3.6-27b",
+      "vendor": "Thinking Machines",
+      "family": "qwen",
+      "provider": "openai-compatible",
+      "gateway": "tinker",
+      "model": "qwen3.6-27b",
+      "contextWindow": 64000,
+      "costClass": "low",
+      "strengths": ["agentic-coding", "tool-use", "fast-inference"],
+      "notes": "Cheapest Tinker candidate for the fast gate path; use when latency/cost matter most."
+    }
+  ]
+}

package/openapi/openapi.yaml CHANGED Viewed

@@ -751,6 +751,34 @@ paths:
                 $ref: '#/components/schemas/FunnelAnalyticsResponse'
         '401':
           description: Unauthorized
+  /v1/analytics/losses:
+    get:
+      operationId: getLossAnalytics
+      parameters:
+        - in: query
+          name: window
+          schema:
+            type: string
+            enum: [today, 7d, 30d, lifetime]
+        - in: query
+          name: timezone
+          schema:
+            type: string
+        - in: query
+          name: now
+          schema:
+            type: string
+            format: date-time
+      responses:
+        '200':
+          description: Ranked buyer-loss and revenue-opportunity analysis for the active analytics window
+          content:
+            application/json:
+              schema:
+                type: object
+                additionalProperties: true
+        '401':
+          description: Unauthorized
   /v1/dashboard:
     get:
       operationId: getDashboard
@@ -848,10 +876,79 @@ paths:
           application/json:
             schema:
               type: object
-              required: [toolName]
               properties:
                 toolName:
                   type: string
+                  description: Tool name is optional when provider-native tool call payload is supplied.
+                provider:
+                  type: string
+                model:
+                  type: string
+                providerToolCall:
+                  type: object
+                  additionalProperties: true
+                toolCall:
+                  type: object
+                  additionalProperties: true
+                toolUse:
+                  type: object
+                  additionalProperties: true
+                content:
+                  type: array
+                  items:
+                    type: object
+                    additionalProperties: true
+                input:
+                  type: object
+                  additionalProperties: true
+                arguments:
+                  type: object
+                  additionalProperties: true
+                method:
+                  type: string
+                params:
+                  type: object
+                  additionalProperties: true
+                mcp:
+                  type: object
+                  additionalProperties: true
+                mcpToolCall:
+                  type: object
+                  additionalProperties: true
+                usage:
+                  type: object
+                  additionalProperties: true
+                tokenEstimate:
+                  type: number
+                costUsd:
+                  type: number
+                budget:
+                  type: object
+                  additionalProperties: true
+                workflowPattern:
+                  type: string
+                  enum: [single_action, chaining, routing, parallelization, evaluator-optimizer, agent]
+                workflow:
+                  type: object
+                  additionalProperties: true
+                goal:
+                  type: string
+                tools:
+                  type: array
+                  items:
+                    type: string
+                branches:
+                  type: array
+                  items:
+                    type: string
+                steps:
+                  type: array
+                  items:
+                    type: string
+                routes:
+                  type: array
+                  items:
+                    type: string
                 command:
                   type: string
                 filePath:
@@ -868,6 +965,25 @@ paths:
                   type: boolean
                 requireVersionNotBehindBase:
                   type: boolean
+                workflowDispatch:
+                  type: object
+                  description: Evidence required before running `gh workflow run` or another environment-specific workflow dispatch.
+                  properties:
+                    environment:
+                      type: string
+                      description: Requested environment such as dev, staging, beta, or release.
+                    workflow:
+                      type: string
+                      description: Expected workflow file or workflow name.
+                    ref:
+                      type: string
+                      description: Expected branch or ref passed to the workflow dispatch command.
+                    sha:
+                      type: string
+                      description: Expected HEAD SHA to verify before and after dispatch.
+                    job:
+                      type: string
+                      description: Expected job name to verify before reporting the workflow URL.
       responses:
         '200':
           description: Persisted workflow-sentinel recommendation with decision-control metadata and actionId
@@ -1121,7 +1237,7 @@ paths:
           description: Comma-separated tags that must all be present on a lesson.
       responses:
         '200':
-          description: Searchable promoted lessons with linked corrective actions, prevention rules, and auto-gates
+          description: Searchable promoted lessons with linked corrective actions, prevention rules, and auto-promoted checks
         '401':
           description: Unauthorized
   /v1/search: