thumbgate 1.14.1 → 1.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +6 -6
- package/.claude-plugin/plugin.json +3 -3
- package/.well-known/llms.txt +5 -5
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +60 -35
- package/adapters/chatgpt/openapi.yaml +118 -2
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +217 -84
- package/adapters/opencode/opencode.json +1 -1
- package/bench/prompt-eval-suite.json +5 -1
- package/bin/cli.js +211 -8
- package/config/enforcement.json +59 -7
- package/config/evals/agent-safety-eval.json +338 -22
- package/config/gates/default.json +33 -0
- package/config/gates/routine.json +43 -0
- package/config/github-about.json +3 -3
- package/config/mcp-allowlists.json +4 -0
- package/config/merge-quality-checks.json +2 -1
- package/config/model-candidates.json +131 -0
- package/openapi/openapi.yaml +118 -2
- package/package.json +70 -51
- package/public/blog.html +7 -7
- package/public/codex-plugin.html +13 -7
- package/public/compare.html +29 -23
- package/public/dashboard.html +105 -12
- package/public/guide.html +28 -28
- package/public/index.html +233 -97
- package/public/learn.html +87 -20
- package/public/lessons.html +26 -2
- package/public/numbers.html +271 -0
- package/public/pro.html +89 -19
- package/scripts/agent-audit-trace.js +55 -0
- package/scripts/agent-memory-lifecycle.js +96 -0
- package/scripts/agent-readiness-plan.js +118 -0
- package/scripts/agentic-data-pipeline.js +21 -1
- package/scripts/agents-sdk-sandbox-plan.js +57 -0
- package/scripts/ai-org-governance.js +98 -0
- package/scripts/ai-search-distribution.js +43 -0
- package/scripts/artifact-agent-plan.js +81 -0
- package/scripts/billing.js +27 -8
- package/scripts/cli-feedback.js +2 -1
- package/scripts/cli-schema.js +60 -5
- package/scripts/code-mode-mcp-plan.js +71 -0
- package/scripts/commercial-offer.js +1 -1
- package/scripts/context-engine.js +1 -2
- package/scripts/context-manager.js +4 -1
- package/scripts/contextfs.js +214 -32
- package/scripts/dashboard-render-spec.js +1 -1
- package/scripts/dashboard.js +275 -9
- package/scripts/decision-journal.js +13 -3
- package/scripts/document-workflow-governance.js +62 -0
- package/scripts/enterprise-agent-rollout.js +34 -0
- package/scripts/experience-replay-governance.js +69 -0
- package/scripts/export-hf-dataset.js +1 -1
- package/scripts/feedback-loop.js +141 -9
- package/scripts/feedback-to-rules.js +17 -23
- package/scripts/gates-engine.js +4 -6
- package/scripts/growth-campaigns.js +49 -0
- package/scripts/harness-selector.js +145 -1
- package/scripts/hybrid-supervisor-agent.js +64 -0
- package/scripts/inference-cache-policy.js +72 -0
- package/scripts/inference-economics.js +53 -0
- package/scripts/internal-agent-bootstrap.js +12 -2
- package/scripts/knowledge-layer-plan.js +108 -0
- package/scripts/lesson-canonical.js +181 -0
- package/scripts/lesson-db.js +71 -10
- package/scripts/lesson-inference.js +183 -44
- package/scripts/lesson-search.js +4 -1
- package/scripts/lesson-synthesis.js +23 -2
- package/scripts/llm-client.js +157 -26
- package/scripts/mailer/resend-mailer.js +112 -1
- package/scripts/mcp-transport-strategy.js +66 -0
- package/scripts/memory-store-governance.js +60 -0
- package/scripts/meta-agent-loop.js +7 -13
- package/scripts/model-access-eligibility.js +38 -0
- package/scripts/model-migration-readiness.js +55 -0
- package/scripts/native-messaging-audit.js +514 -0
- package/scripts/operational-integrity.js +96 -3
- package/scripts/otel-declarative-config.js +56 -0
- package/scripts/perplexity-client.js +1 -1
- package/scripts/post-training-governance.js +34 -0
- package/scripts/pr-manager.js +47 -7
- package/scripts/private-core-boundary.js +72 -0
- package/scripts/production-agent-readiness.js +40 -0
- package/scripts/profile-router.js +16 -1
- package/scripts/prompt-eval.js +564 -32
- package/scripts/prompt-programs.js +93 -0
- package/scripts/provider-action-normalizer.js +585 -0
- package/scripts/rule-validator.js +285 -0
- package/scripts/scaling-law-claims.js +60 -0
- package/scripts/security-scanner.js +1 -1
- package/scripts/self-distill-agent.js +7 -32
- package/scripts/seo-gsd.js +400 -43
- package/scripts/skill-rag-router.js +53 -0
- package/scripts/spec-gate.js +1 -1
- package/scripts/student-consistent-training.js +73 -0
- package/scripts/synthetic-data-provenance.js +98 -0
- package/scripts/task-context-result.js +81 -0
- package/scripts/telemetry-analytics.js +149 -0
- package/scripts/thompson-sampling.js +2 -2
- package/scripts/token-savings.js +7 -6
- package/scripts/token-tco.js +46 -0
- package/scripts/tool-registry.js +75 -3
- package/scripts/verification-loop.js +10 -1
- package/scripts/verifier-scoring.js +71 -0
- package/scripts/workflow-sentinel.js +284 -28
- package/scripts/workspace-agent-routines.js +118 -0
- package/skills/thumbgate/SKILL.md +1 -1
- package/src/api/server.js +434 -120
- package/.claude-plugin/README.md +0 -170
- package/adapters/README.md +0 -12
- package/scripts/analytics-report.js +0 -328
- package/scripts/autonomous-workflow.js +0 -377
- package/scripts/billing-setup.js +0 -109
- package/scripts/creator-campaigns.js +0 -239
- package/scripts/cross-encoder-reranker.js +0 -235
- package/scripts/daemon-manager.js +0 -108
- package/scripts/decision-trace.js +0 -354
- package/scripts/delegation-runtime.js +0 -896
- package/scripts/dispatch-brief.js +0 -159
- package/scripts/distribution-surfaces.js +0 -110
- package/scripts/feedback-history-distiller.js +0 -382
- package/scripts/funnel-analytics.js +0 -35
- package/scripts/history-distiller.js +0 -200
- package/scripts/hosted-job-launcher.js +0 -256
- package/scripts/intent-router.js +0 -392
- package/scripts/lesson-reranker.js +0 -263
- package/scripts/lesson-retrieval.js +0 -148
- package/scripts/managed-lesson-agent.js +0 -183
- package/scripts/operational-dashboard.js +0 -103
- package/scripts/operational-summary.js +0 -129
- package/scripts/operator-artifacts.js +0 -608
- package/scripts/optimize-context.js +0 -17
- package/scripts/org-dashboard.js +0 -206
- package/scripts/partner-orchestration.js +0 -146
- package/scripts/predictive-insights.js +0 -356
- package/scripts/pulse.js +0 -80
- package/scripts/reflector-agent.js +0 -221
- package/scripts/sales-pipeline.js +0 -681
- package/scripts/session-episode-store.js +0 -329
- package/scripts/session-health-sensor.js +0 -242
- package/scripts/session-report.js +0 -120
- package/scripts/swarm-coordinator.js +0 -81
- package/scripts/tool-kpi-tracker.js +0 -12
- package/scripts/webhook-delivery.js +0 -62
- package/scripts/workflow-sprint-intake.js +0 -475
- package/skills/agent-memory/SKILL.md +0 -97
- package/skills/solve-architecture-autonomy/SKILL.md +0 -17
- package/skills/solve-architecture-autonomy/tool.js +0 -33
- package/skills/thumbgate-feedback/SKILL.md +0 -49
package/scripts/tool-registry.js
CHANGED
|
@@ -387,7 +387,7 @@ const TOOLS = [
|
|
|
387
387
|
}),
|
|
388
388
|
readOnlyTool({
|
|
389
389
|
name: 'enforcement_matrix',
|
|
390
|
-
description: 'Show the full Enforcement Matrix: feedback pipeline stats, active pre-action
|
|
390
|
+
description: 'Show the full Enforcement Matrix: feedback pipeline stats, active pre-action checks, and rejection ledger with revival conditions.',
|
|
391
391
|
inputSchema: {
|
|
392
392
|
type: 'object',
|
|
393
393
|
properties: {},
|
|
@@ -773,9 +773,69 @@ const TOOLS = [
|
|
|
773
773
|
description: 'Predict pre-action workflow risk, blast radius, and remediations before a tool call executes.',
|
|
774
774
|
inputSchema: {
|
|
775
775
|
type: 'object',
|
|
776
|
-
required: ['toolName'],
|
|
777
776
|
properties: {
|
|
778
|
-
toolName: { type: 'string', description: 'Tool being assessed, such as Bash, Edit, or Write' },
|
|
777
|
+
toolName: { type: 'string', description: 'Tool being assessed, such as Bash, Edit, or Write. Optional when provider-native tool call payload is supplied.' },
|
|
778
|
+
provider: { type: 'string', description: 'Optional provider name, such as anthropic, openai, codex, cursor, gemini, or mcp' },
|
|
779
|
+
model: { type: 'string', description: 'Optional model name used for audit evidence and budget review' },
|
|
780
|
+
providerToolCall: {
|
|
781
|
+
type: 'object',
|
|
782
|
+
additionalProperties: true,
|
|
783
|
+
description: 'Provider-native tool call object, including Anthropic tool_use or OpenAI function/tool call shapes',
|
|
784
|
+
},
|
|
785
|
+
content: {
|
|
786
|
+
type: 'array',
|
|
787
|
+
items: { type: 'object', additionalProperties: true },
|
|
788
|
+
description: 'Provider-native message content blocks; Anthropic tool_use blocks are normalized automatically',
|
|
789
|
+
},
|
|
790
|
+
method: { type: 'string', description: 'Optional JSON-RPC/MCP method, such as tools/call' },
|
|
791
|
+
params: {
|
|
792
|
+
type: 'object',
|
|
793
|
+
additionalProperties: true,
|
|
794
|
+
description: 'Optional JSON-RPC/MCP params, including tools/call name and arguments, resources/read URI, or prompts/get template arguments',
|
|
795
|
+
},
|
|
796
|
+
usage: {
|
|
797
|
+
type: 'object',
|
|
798
|
+
additionalProperties: true,
|
|
799
|
+
description: 'Provider token/cost usage, such as input_tokens, output_tokens, or total_tokens',
|
|
800
|
+
},
|
|
801
|
+
tokenEstimate: { type: 'number', description: 'Estimated total tokens for this action when provider usage is unavailable' },
|
|
802
|
+
costUsd: { type: 'number', description: 'Estimated USD cost for this action when provider usage is unavailable' },
|
|
803
|
+
budget: {
|
|
804
|
+
type: 'object',
|
|
805
|
+
additionalProperties: true,
|
|
806
|
+
description: 'Optional per-action budget controls: maxTokensPerAction, remainingTokens, maxCostUsdPerAction, remainingCostUsd, maxParallelBranches',
|
|
807
|
+
},
|
|
808
|
+
workflowPattern: {
|
|
809
|
+
type: 'string',
|
|
810
|
+
enum: ['single_action', 'chaining', 'routing', 'parallelization', 'evaluator-optimizer', 'agent'],
|
|
811
|
+
description: 'Optional workflow architecture hint. Agents require inspection evidence; predefined workflows are easier to evaluate.',
|
|
812
|
+
},
|
|
813
|
+
workflow: {
|
|
814
|
+
type: 'object',
|
|
815
|
+
additionalProperties: true,
|
|
816
|
+
description: 'Optional workflow metadata: pattern, steps, routes, branches, tools, inspection, and verification evidence.',
|
|
817
|
+
},
|
|
818
|
+
goal: { type: 'string', description: 'Optional agent goal for open-ended tool planning.' },
|
|
819
|
+
tools: {
|
|
820
|
+
type: 'array',
|
|
821
|
+
items: { type: 'string' },
|
|
822
|
+
description: 'Optional abstract/combinable tool names available to an open-ended agent.',
|
|
823
|
+
},
|
|
824
|
+
branches: {
|
|
825
|
+
type: 'array',
|
|
826
|
+
items: { type: 'string' },
|
|
827
|
+
description: 'Optional parallel workflow branches for fan-out budget and review checks.',
|
|
828
|
+
},
|
|
829
|
+
steps: {
|
|
830
|
+
type: 'array',
|
|
831
|
+
items: { type: 'string' },
|
|
832
|
+
description: 'Optional predefined workflow steps for chaining/evaluator workflow audit evidence.',
|
|
833
|
+
},
|
|
834
|
+
routes: {
|
|
835
|
+
type: 'array',
|
|
836
|
+
items: { type: 'string' },
|
|
837
|
+
description: 'Optional routing workflow destinations or classes.',
|
|
838
|
+
},
|
|
779
839
|
command: { type: 'string', description: 'Optional shell command when toolName is Bash' },
|
|
780
840
|
filePath: { type: 'string', description: 'Optional primary file path for edit-like tools' },
|
|
781
841
|
changedFiles: {
|
|
@@ -841,6 +901,18 @@ const TOOLS = [
|
|
|
841
901
|
properties: {},
|
|
842
902
|
},
|
|
843
903
|
}),
|
|
904
|
+
readOnlyTool({
|
|
905
|
+
name: 'native_messaging_audit',
|
|
906
|
+
description: 'Audit local browser native messaging hosts and AI browser bridges. Flags missing host binaries, pre-authorized extension bridges, and manifests for browsers not detected locally.',
|
|
907
|
+
inputSchema: {
|
|
908
|
+
type: 'object',
|
|
909
|
+
properties: {
|
|
910
|
+
platform: { type: 'string', enum: ['darwin', 'linux', 'win32'], description: 'Optional platform override for manifest discovery.' },
|
|
911
|
+
homeDir: { type: 'string', description: 'Optional home-directory override for manifest discovery.' },
|
|
912
|
+
aiOnly: { type: 'boolean', description: 'When true, only AI/browser bridge manifests are returned.' },
|
|
913
|
+
},
|
|
914
|
+
},
|
|
915
|
+
}),
|
|
844
916
|
readOnlyTool({
|
|
845
917
|
name: 'commerce_recall',
|
|
846
918
|
description: 'Recall past feedback filtered by commerce categories (product_recommendation, brand_compliance, sizing, pricing, regulatory). Returns quality scores alongside memories for agentic commerce agents.',
|
|
@@ -6,11 +6,20 @@ const {
|
|
|
6
6
|
getFeedbackPaths,
|
|
7
7
|
appendDiagnosticRecord,
|
|
8
8
|
} = require('./feedback-loop');
|
|
9
|
+
const { loadOptionalModule } = require('./private-core-boundary');
|
|
9
10
|
const {
|
|
10
11
|
buildPartnerStrategy,
|
|
11
12
|
computePartnerReward,
|
|
12
13
|
resolveVerificationRetries,
|
|
13
|
-
} =
|
|
14
|
+
} = loadOptionalModule('./partner-orchestration', () => ({
|
|
15
|
+
buildPartnerStrategy: ({ partnerProfile } = {}) => ({
|
|
16
|
+
profile: partnerProfile || 'public-shell',
|
|
17
|
+
verificationMode: 'local-only',
|
|
18
|
+
recommendedChecks: [],
|
|
19
|
+
}),
|
|
20
|
+
computePartnerReward: () => 0,
|
|
21
|
+
resolveVerificationRetries: (requestedMaxRetries) => requestedMaxRetries,
|
|
22
|
+
}));
|
|
14
23
|
const {
|
|
15
24
|
diagnoseFailure,
|
|
16
25
|
} = require('./failure-diagnostics');
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
function buildVerifierScoringRubric(options = {}) {
|
|
4
|
+
const criteria = options.criteria || [
|
|
5
|
+
'evidence_cited',
|
|
6
|
+
'scope_respected',
|
|
7
|
+
'tests_or_proof_run',
|
|
8
|
+
'claim_matches_artifacts',
|
|
9
|
+
];
|
|
10
|
+
const granularity = Number.isFinite(options.granularity) ? options.granularity : 100;
|
|
11
|
+
const repeats = Number.isFinite(options.repeats) ? options.repeats : 3;
|
|
12
|
+
|
|
13
|
+
return {
|
|
14
|
+
rubricId: 'granular_llm_verifier',
|
|
15
|
+
criteria,
|
|
16
|
+
granularity,
|
|
17
|
+
repeats,
|
|
18
|
+
scoring: 'probability_weighted_average',
|
|
19
|
+
passThreshold: Number.isFinite(options.passThreshold) ? options.passThreshold : 0.82,
|
|
20
|
+
caveats: [
|
|
21
|
+
'calibrate against held-out human labels before production blocking',
|
|
22
|
+
'fall back to coarse scores when model cannot expose score-token probabilities',
|
|
23
|
+
'never use verifier score alone for destructive actions',
|
|
24
|
+
],
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function computeVerifierScore({ scores = [], rubric = buildVerifierScoringRubric() } = {}) {
|
|
29
|
+
const flattened = scores
|
|
30
|
+
.flatMap((criterion) => Array.isArray(criterion.repeats) ? criterion.repeats : [])
|
|
31
|
+
.filter((value) => Number.isFinite(value));
|
|
32
|
+
|
|
33
|
+
if (flattened.length === 0) {
|
|
34
|
+
return {
|
|
35
|
+
score: 0,
|
|
36
|
+
decision: 'warn',
|
|
37
|
+
issues: ['missing_verifier_scores'],
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const normalized = flattened.map((value) => value > 1 ? value / rubric.granularity : value);
|
|
42
|
+
const score = normalized.reduce((sum, value) => sum + value, 0) / normalized.length;
|
|
43
|
+
const issues = [];
|
|
44
|
+
if (scores.length < rubric.criteria.length) issues.push('missing_criteria_scores');
|
|
45
|
+
if (flattened.length < rubric.criteria.length * rubric.repeats) issues.push('missing_repeat_verifications');
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
score: Number(score.toFixed(4)),
|
|
49
|
+
decision: score >= rubric.passThreshold && issues.length === 0 ? 'allow' : 'warn',
|
|
50
|
+
issues,
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function evaluateVerifierSetup(setup = {}) {
|
|
55
|
+
const issues = [];
|
|
56
|
+
if (!setup.criteria || setup.criteria.length < 3) issues.push('too_few_criteria');
|
|
57
|
+
if (!setup.repeats || setup.repeats < 2) issues.push('repeat_verification_required');
|
|
58
|
+
if (!setup.heldoutCalibration) issues.push('heldout_calibration_required');
|
|
59
|
+
if (setup.destructiveAction && !setup.humanReview) issues.push('human_review_required_for_destructive_action');
|
|
60
|
+
|
|
61
|
+
return {
|
|
62
|
+
decision: issues.length ? 'warn' : 'allow',
|
|
63
|
+
issues,
|
|
64
|
+
};
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
module.exports = {
|
|
68
|
+
buildVerifierScoringRubric,
|
|
69
|
+
computeVerifierScore,
|
|
70
|
+
evaluateVerifierSetup,
|
|
71
|
+
};
|