thumbgate 1.26.7 → 1.27.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/agentic-verify.txt +1 -0
- package/.well-known/llms.txt +2 -0
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +20 -9
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/gcp/dfcx-webhook-gate.js +295 -0
- package/adapters/mcp/server-stdio.js +28 -1
- package/adapters/opencode/opencode.json +1 -1
- package/bench/thumbgate-bench.json +2 -2
- package/bin/cli.js +147 -10
- package/bin/dashboard-cli.js +7 -0
- package/config/gate-classifier-routing.json +98 -0
- package/config/gate-templates.json +60 -0
- package/config/mcp-allowlists.json +8 -7
- package/config/model-candidates.json +71 -6
- package/package.json +26 -10
- package/public/chatgpt-app.html +330 -0
- package/public/codex-plugin.html +66 -14
- package/public/dashboard.html +203 -17
- package/public/index.html +79 -4
- package/public/learn.html +70 -0
- package/public/lessons.html +129 -6
- package/public/numbers.html +2 -2
- package/public/pricing.html +20 -2
- package/scripts/agent-operations-planner.js +621 -0
- package/scripts/agent-reward-model.js +53 -1
- package/scripts/ai-component-inventory.js +367 -0
- package/scripts/classifier-routing.js +130 -0
- package/scripts/cli-schema.js +26 -0
- package/scripts/dashboard-chat.js +64 -17
- package/scripts/feedback-sanitizer.js +105 -0
- package/scripts/gates-engine.js +258 -61
- package/scripts/hybrid-feedback-context.js +141 -7
- package/scripts/memory-scope-readiness.js +159 -0
- package/scripts/parallel-workflow-orchestrator.js +293 -0
- package/scripts/plausible-domain-config.js +86 -0
- package/scripts/plausible-server-events.js +4 -2
- package/scripts/proxy-pointer-rag-guardrails.js +42 -1
- package/scripts/qa-scenario-planner.js +136 -0
- package/scripts/repeat-metric.js +28 -12
- package/scripts/secret-fixture-tokens.js +61 -0
- package/scripts/secret-scanner.js +44 -5
- package/scripts/security-scanner.js +80 -0
- package/scripts/seo-gsd.js +53 -0
- package/scripts/thumbgate-bench.js +16 -1
- package/scripts/tool-registry.js +37 -0
- package/scripts/workflow-sentinel.js +189 -4
- package/src/api/server.js +276 -10
|
@@ -331,6 +331,55 @@ function buildRewardReport(episodes = [], options = {}) {
|
|
|
331
331
|
deep: 'destructive, public, or production-adjacent work',
|
|
332
332
|
xhigh: 'payments, secrets, deploy-prod, data-loss, force-push-main',
|
|
333
333
|
},
|
|
334
|
+
continualAdapterPlan: buildContinualAdapterTrainingPlan(episodes, options),
|
|
335
|
+
};
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
function buildContinualAdapterTrainingPlan(episodes = [], options = {}) {
|
|
339
|
+
const maxAdapters = Math.max(1, Number(options.maxAdapters || 4));
|
|
340
|
+
const minExamples = Math.max(1, Number(options.minExamples || 2));
|
|
341
|
+
const pairs = buildPreferencePairs(episodes, { ...options, maxPairs: Number(options.maxPairs || 24) });
|
|
342
|
+
const candidates = rankGateCandidatesByReward(episodes, {
|
|
343
|
+
...options,
|
|
344
|
+
minOccurrences: minExamples,
|
|
345
|
+
maxGateCandidates: maxAdapters,
|
|
346
|
+
}).slice(0, maxAdapters);
|
|
347
|
+
|
|
348
|
+
const adapters = candidates.map((candidate, index) => ({
|
|
349
|
+
id: `lora_${candidate.gateId || `adapter_${index + 1}`}`.slice(0, 96),
|
|
350
|
+
source: candidate.key,
|
|
351
|
+
trainingMode: 'concurrent-lora',
|
|
352
|
+
examples: candidate.occurrences,
|
|
353
|
+
priorityScore: candidate.priorityScore,
|
|
354
|
+
targetBehavior: candidate.recommendation,
|
|
355
|
+
retentionChecks: [
|
|
356
|
+
'baseline gate pass-rate must not regress',
|
|
357
|
+
'previous adapter reward must stay within tolerance',
|
|
358
|
+
'new adapter must improve blocked-repeat or verification reward on held-out episodes',
|
|
359
|
+
],
|
|
360
|
+
}));
|
|
361
|
+
|
|
362
|
+
const enoughData = pairs.length >= minExamples && adapters.length > 0;
|
|
363
|
+
return {
|
|
364
|
+
status: enoughData ? 'ready' : 'needs_more_feedback',
|
|
365
|
+
trainingStack: 'multi-lora-continual-learning',
|
|
366
|
+
baseModelPolicy: 'freeze base model; train small adapters from reward-ranked DPO pairs',
|
|
367
|
+
scheduling: adapters.length > 1 ? 'batch adapters concurrently when they share the same frozen base' : 'train serially until at least two adapter candidates exist',
|
|
368
|
+
adapters,
|
|
369
|
+
dpoPairsAvailable: pairs.length,
|
|
370
|
+
retentionGate: 'ship no adapter unless reward, regression, and prior-domain retention checks pass',
|
|
371
|
+
nextActions: enoughData
|
|
372
|
+
? [
|
|
373
|
+
'Export reward-ranked DPO pairs.',
|
|
374
|
+
'Train candidate LoRA adapters concurrently on the same frozen base.',
|
|
375
|
+
'Evaluate each adapter against held-out negative episodes and prior positive episodes.',
|
|
376
|
+
'Promote only the adapter that improves reward without retention regression.',
|
|
377
|
+
]
|
|
378
|
+
: [
|
|
379
|
+
'Capture more thumbs-up/down outcomes before training.',
|
|
380
|
+
'Promote deterministic gates first when examples are sparse.',
|
|
381
|
+
'Use local semantic recall until enough reward-ranked pairs exist for adapter training.',
|
|
382
|
+
],
|
|
334
383
|
};
|
|
335
384
|
}
|
|
336
385
|
|
|
@@ -419,8 +468,10 @@ if (isCliInvocation()) {
|
|
|
419
468
|
console.log(JSON.stringify(buildPreferencePairs(episodes), null, 2));
|
|
420
469
|
} else if (args.command === 'gates') {
|
|
421
470
|
console.log(JSON.stringify(rankGateCandidatesByReward(episodes), null, 2));
|
|
471
|
+
} else if (args.command === 'adapters') {
|
|
472
|
+
console.log(JSON.stringify(buildContinualAdapterTrainingPlan(episodes), null, 2));
|
|
422
473
|
} else {
|
|
423
|
-
console.error(`Unknown command: ${args.command}. Use: report, pairs, gates`);
|
|
474
|
+
console.error(`Unknown command: ${args.command}. Use: report, pairs, gates, adapters`);
|
|
424
475
|
process.exit(1);
|
|
425
476
|
}
|
|
426
477
|
}
|
|
@@ -428,6 +479,7 @@ if (isCliInvocation()) {
|
|
|
428
479
|
module.exports = {
|
|
429
480
|
HIGH_RISK_TAGS,
|
|
430
481
|
allocateTestTimeCompute,
|
|
482
|
+
buildContinualAdapterTrainingPlan,
|
|
431
483
|
buildPreferencePairFromEpisodes,
|
|
432
484
|
buildPreferencePairs,
|
|
433
485
|
buildRewardReport,
|
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
const crypto = require('crypto');
|
|
7
|
+
|
|
8
|
+
const DEFAULT_MAX_FILES = 2500;
|
|
9
|
+
const DEFAULT_MAX_BYTES = 1024 * 1024;
|
|
10
|
+
|
|
11
|
+
const IGNORE_DIRS = new Set([
|
|
12
|
+
'.git',
|
|
13
|
+
'.hg',
|
|
14
|
+
'.svn',
|
|
15
|
+
'.thumbgate',
|
|
16
|
+
'.venv',
|
|
17
|
+
'venv',
|
|
18
|
+
'__pycache__',
|
|
19
|
+
'node_modules',
|
|
20
|
+
'dist',
|
|
21
|
+
'build',
|
|
22
|
+
'coverage',
|
|
23
|
+
'.next',
|
|
24
|
+
'.turbo',
|
|
25
|
+
'.cache',
|
|
26
|
+
]);
|
|
27
|
+
|
|
28
|
+
const TEXT_EXTENSIONS = new Set([
|
|
29
|
+
'.js',
|
|
30
|
+
'.jsx',
|
|
31
|
+
'.ts',
|
|
32
|
+
'.tsx',
|
|
33
|
+
'.mjs',
|
|
34
|
+
'.cjs',
|
|
35
|
+
'.py',
|
|
36
|
+
'.ipynb',
|
|
37
|
+
'.go',
|
|
38
|
+
'.java',
|
|
39
|
+
'.rb',
|
|
40
|
+
'.rs',
|
|
41
|
+
'.php',
|
|
42
|
+
'.cs',
|
|
43
|
+
'.swift',
|
|
44
|
+
'.kt',
|
|
45
|
+
'.scala',
|
|
46
|
+
'.sh',
|
|
47
|
+
'.yaml',
|
|
48
|
+
'.yml',
|
|
49
|
+
'.toml',
|
|
50
|
+
'.json',
|
|
51
|
+
'.ini',
|
|
52
|
+
]);
|
|
53
|
+
|
|
54
|
+
const MODEL_EXTENSIONS = new Map([
|
|
55
|
+
['.gguf', { name: 'GGUF model artifact', category: 'model_artifact', ecosystem: 'local-model' }],
|
|
56
|
+
['.safetensors', { name: 'SafeTensors model artifact', category: 'model_artifact', ecosystem: 'local-model' }],
|
|
57
|
+
['.onnx', { name: 'ONNX model artifact', category: 'model_artifact', ecosystem: 'onnx' }],
|
|
58
|
+
['.pt', { name: 'PyTorch model artifact', category: 'model_artifact', ecosystem: 'pytorch' }],
|
|
59
|
+
['.pth', { name: 'PyTorch model artifact', category: 'model_artifact', ecosystem: 'pytorch' }],
|
|
60
|
+
['.tflite', { name: 'TensorFlow Lite model artifact', category: 'model_artifact', ecosystem: 'tensorflow' }],
|
|
61
|
+
]);
|
|
62
|
+
|
|
63
|
+
const SOURCE_PATTERNS = [
|
|
64
|
+
component('openai', 'OpenAI SDK/API', 'provider_sdk', 'openai', /\b(from\s+openai\s+import|import\s+openai\b|require\(['"]openai['"]\)|from\s+['"]openai['"]|@openai\/agents|new\s+OpenAI\s*\()/i),
|
|
65
|
+
component('anthropic', 'Anthropic Claude SDK/API', 'provider_sdk', 'anthropic', /\b(@anthropic-ai\/sdk|from\s+anthropic\s+import|import\s+anthropic\b|require\(['"]@anthropic-ai\/sdk['"]\)|new\s+Anthropic\s*\()/i),
|
|
66
|
+
component('google-gemini', 'Google Gemini SDK/API', 'provider_sdk', 'google', /\b(@google\/generative-ai|google-genai|from\s+google\s+import\s+genai|GoogleGenerativeAI|GenerativeModel)/i),
|
|
67
|
+
component('vertex-ai', 'Google Vertex AI', 'ai_platform', 'google-cloud', /\b(vertexai|aiplatform|@google-cloud\/vertexai|PredictionServiceClient|projects\.locations\.publishers\.models)/i),
|
|
68
|
+
component('dialogflow-cx', 'Google Dialogflow CX', 'conversation_ai', 'google-cloud', /\b(dialogflowcx|dialogflow-cx|@google-cloud\/dialogflow-cx|SessionsClient|DetectIntentRequest)/i),
|
|
69
|
+
component('langchain', 'LangChain', 'agent_framework', 'langchain', /\b(@langchain\/|langchain\b|from\s+langchain(_community|_core)?\b)/i),
|
|
70
|
+
component('llamaindex', 'LlamaIndex', 'agent_framework', 'llamaindex', /\b(llama_index|llamaindex|from\s+llama_index\b)/i),
|
|
71
|
+
component('semantic-kernel', 'Semantic Kernel', 'agent_framework', 'microsoft', /\b(semantic-kernel|semantic_kernel|Microsoft\.SemanticKernel)/i),
|
|
72
|
+
component('crewai', 'CrewAI', 'agent_framework', 'crewai', /\b(crewai|from\s+crewai\b)/i),
|
|
73
|
+
component('autogen', 'AutoGen', 'agent_framework', 'microsoft', /\b(autogen|pyautogen|@microsoft\/autogen)/i),
|
|
74
|
+
component('transformers', 'Hugging Face Transformers', 'ml_framework', 'huggingface', /\b(transformers|AutoModel|AutoTokenizer|pipeline\s*\()/i),
|
|
75
|
+
component('sentence-transformers', 'Sentence Transformers', 'embedding_model', 'huggingface', /\b(sentence_transformers|SentenceTransformer)/i),
|
|
76
|
+
component('pytorch', 'PyTorch', 'ml_framework', 'pytorch', /\b(import\s+torch\b|from\s+torch\b|torch\.)/i),
|
|
77
|
+
component('tensorflow', 'TensorFlow/Keras', 'ml_framework', 'tensorflow', /\b(import\s+tensorflow\b|from\s+tensorflow\b|import\s+keras\b|from\s+keras\b|tf\.keras)/i),
|
|
78
|
+
component('scikit-learn', 'scikit-learn', 'ml_framework', 'scikit-learn', /\b(sklearn|scikit-learn|from\s+sklearn\b)/i),
|
|
79
|
+
component('onnxruntime', 'ONNX Runtime', 'ml_runtime', 'onnx', /\b(onnxruntime|InferenceSession)/i),
|
|
80
|
+
component('pinecone', 'Pinecone vector database', 'vector_database', 'pinecone', /\b(pinecone|@pinecone-database\/pinecone)/i),
|
|
81
|
+
component('weaviate', 'Weaviate vector database', 'vector_database', 'weaviate', /\b(weaviate|weaviate-client)/i),
|
|
82
|
+
component('qdrant', 'Qdrant vector database', 'vector_database', 'qdrant', /\b(qdrant|@qdrant\/js-client-rest|qdrant-client)/i),
|
|
83
|
+
component('chroma', 'Chroma vector database', 'vector_database', 'chroma', /\b(chromadb|chroma-client|ChromaClient)/i),
|
|
84
|
+
component('lancedb', 'LanceDB vector database', 'vector_database', 'lancedb', /\b(lancedb|@lancedb\/lancedb)/i),
|
|
85
|
+
component('faiss', 'FAISS vector index', 'vector_database', 'faiss', /\b(faiss|faiss-cpu|faiss-gpu)/i),
|
|
86
|
+
component('pgvector', 'Postgres pgvector', 'vector_database', 'postgres', /\b(pgvector|vector\(\d+\)|CREATE\s+EXTENSION\s+vector)/i),
|
|
87
|
+
];
|
|
88
|
+
|
|
89
|
+
const MANIFEST_FILES = new Set([
|
|
90
|
+
'package.json',
|
|
91
|
+
'requirements.txt',
|
|
92
|
+
'pyproject.toml',
|
|
93
|
+
'poetry.lock',
|
|
94
|
+
'Pipfile',
|
|
95
|
+
'Gemfile',
|
|
96
|
+
'go.mod',
|
|
97
|
+
'Cargo.toml',
|
|
98
|
+
]);
|
|
99
|
+
|
|
100
|
+
function component(id, name, category, ecosystem, pattern) {
|
|
101
|
+
return { id, name, category, ecosystem, pattern };
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function relativePath(rootDir, filePath) {
|
|
105
|
+
return path.relative(rootDir, filePath).split(path.sep).join('/');
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function shouldIgnoreDir(name) {
|
|
109
|
+
return IGNORE_DIRS.has(name);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function isTextLike(filePath) {
|
|
113
|
+
const base = path.basename(filePath);
|
|
114
|
+
if (MANIFEST_FILES.has(base)) return true;
|
|
115
|
+
return TEXT_EXTENSIONS.has(path.extname(filePath).toLowerCase());
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function walkFiles(rootDir, options = {}) {
|
|
119
|
+
const maxFiles = Number(options.maxFiles || DEFAULT_MAX_FILES);
|
|
120
|
+
const files = [];
|
|
121
|
+
const queue = [rootDir];
|
|
122
|
+
|
|
123
|
+
while (queue.length && files.length < maxFiles) {
|
|
124
|
+
const dir = queue.shift();
|
|
125
|
+
let entries = [];
|
|
126
|
+
try {
|
|
127
|
+
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
128
|
+
} catch (_) {
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
for (const entry of entries) {
|
|
133
|
+
const fullPath = path.join(dir, entry.name);
|
|
134
|
+
if (entry.isDirectory()) {
|
|
135
|
+
if (!shouldIgnoreDir(entry.name)) queue.push(fullPath);
|
|
136
|
+
continue;
|
|
137
|
+
}
|
|
138
|
+
if (entry.isFile()) {
|
|
139
|
+
files.push(fullPath);
|
|
140
|
+
if (files.length >= maxFiles) break;
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return files;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
function readLines(filePath, maxBytes = DEFAULT_MAX_BYTES) {
|
|
149
|
+
let stat;
|
|
150
|
+
try {
|
|
151
|
+
stat = fs.statSync(filePath);
|
|
152
|
+
} catch (_) {
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
if (!stat.isFile() || stat.size > maxBytes) return null;
|
|
156
|
+
|
|
157
|
+
try {
|
|
158
|
+
return fs.readFileSync(filePath, 'utf8').split(/\r?\n/);
|
|
159
|
+
} catch (_) {
|
|
160
|
+
return null;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function addEvidence(map, componentDef, evidence, maxEvidencePerComponent) {
|
|
165
|
+
const current = map.get(componentDef.id) || {
|
|
166
|
+
id: componentDef.id,
|
|
167
|
+
name: componentDef.name,
|
|
168
|
+
category: componentDef.category,
|
|
169
|
+
ecosystem: componentDef.ecosystem,
|
|
170
|
+
evidence: [],
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
if (current.evidence.length < maxEvidencePerComponent) {
|
|
174
|
+
const duplicate = current.evidence.some((item) => (
|
|
175
|
+
item.file === evidence.file && item.line === evidence.line && item.kind === evidence.kind
|
|
176
|
+
));
|
|
177
|
+
if (!duplicate) current.evidence.push(evidence);
|
|
178
|
+
}
|
|
179
|
+
map.set(componentDef.id, current);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
function scanSourceFile(rootDir, filePath, map, options) {
|
|
183
|
+
const lines = readLines(filePath, options.maxBytes);
|
|
184
|
+
if (!lines) return;
|
|
185
|
+
|
|
186
|
+
const rel = relativePath(rootDir, filePath);
|
|
187
|
+
lines.forEach((line, idx) => {
|
|
188
|
+
for (const def of SOURCE_PATTERNS) {
|
|
189
|
+
if (!def.pattern.test(line)) continue;
|
|
190
|
+
addEvidence(map, def, {
|
|
191
|
+
kind: 'source',
|
|
192
|
+
file: rel,
|
|
193
|
+
line: idx + 1,
|
|
194
|
+
snippet: options.includeSnippets === false ? undefined : line.trim().slice(0, 220),
|
|
195
|
+
}, options.maxEvidencePerComponent);
|
|
196
|
+
}
|
|
197
|
+
});
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
function scanManifestFile(rootDir, filePath, map, options) {
|
|
201
|
+
const lines = readLines(filePath, options.maxBytes);
|
|
202
|
+
if (!lines) return;
|
|
203
|
+
const rel = relativePath(rootDir, filePath);
|
|
204
|
+
|
|
205
|
+
lines.forEach((line, idx) => {
|
|
206
|
+
for (const def of SOURCE_PATTERNS) {
|
|
207
|
+
if (!def.pattern.test(line)) continue;
|
|
208
|
+
addEvidence(map, def, {
|
|
209
|
+
kind: 'manifest',
|
|
210
|
+
file: rel,
|
|
211
|
+
line: idx + 1,
|
|
212
|
+
snippet: options.includeSnippets === false ? undefined : line.trim().slice(0, 220),
|
|
213
|
+
}, options.maxEvidencePerComponent);
|
|
214
|
+
}
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
function scanModelArtifact(rootDir, filePath, map, options) {
|
|
219
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
220
|
+
const def = MODEL_EXTENSIONS.get(ext);
|
|
221
|
+
if (!def) return;
|
|
222
|
+
|
|
223
|
+
let stat = null;
|
|
224
|
+
try {
|
|
225
|
+
stat = fs.statSync(filePath);
|
|
226
|
+
} catch (_) {
|
|
227
|
+
stat = null;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
addEvidence(map, {
|
|
231
|
+
id: `${def.ecosystem}-${ext.slice(1)}-artifact`,
|
|
232
|
+
...def,
|
|
233
|
+
}, {
|
|
234
|
+
kind: 'artifact',
|
|
235
|
+
file: relativePath(rootDir, filePath),
|
|
236
|
+
line: null,
|
|
237
|
+
bytes: stat ? stat.size : undefined,
|
|
238
|
+
}, options.maxEvidencePerComponent);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function summarizeComponents(components) {
|
|
242
|
+
const byCategory = {};
|
|
243
|
+
const byEcosystem = {};
|
|
244
|
+
for (const item of components) {
|
|
245
|
+
byCategory[item.category] = (byCategory[item.category] || 0) + 1;
|
|
246
|
+
byEcosystem[item.ecosystem] = (byEcosystem[item.ecosystem] || 0) + 1;
|
|
247
|
+
}
|
|
248
|
+
return { byCategory, byEcosystem };
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function scanAiComponents(options = {}) {
|
|
252
|
+
const rootDir = path.resolve(options.rootDir || process.cwd());
|
|
253
|
+
const maxEvidencePerComponent = Number(options.maxEvidencePerComponent || 10);
|
|
254
|
+
const scanOptions = {
|
|
255
|
+
maxFiles: options.maxFiles || DEFAULT_MAX_FILES,
|
|
256
|
+
maxBytes: options.maxBytes || DEFAULT_MAX_BYTES,
|
|
257
|
+
includeSnippets: options.includeSnippets !== false,
|
|
258
|
+
maxEvidencePerComponent,
|
|
259
|
+
};
|
|
260
|
+
|
|
261
|
+
const files = walkFiles(rootDir, scanOptions);
|
|
262
|
+
const map = new Map();
|
|
263
|
+
|
|
264
|
+
for (const filePath of files) {
|
|
265
|
+
scanModelArtifact(rootDir, filePath, map, scanOptions);
|
|
266
|
+
const base = path.basename(filePath);
|
|
267
|
+
if (MANIFEST_FILES.has(base)) {
|
|
268
|
+
scanManifestFile(rootDir, filePath, map, scanOptions);
|
|
269
|
+
continue;
|
|
270
|
+
}
|
|
271
|
+
if (isTextLike(filePath)) scanSourceFile(rootDir, filePath, map, scanOptions);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
const components = Array.from(map.values()).sort((a, b) => a.id.localeCompare(b.id));
|
|
275
|
+
const summary = summarizeComponents(components);
|
|
276
|
+
return {
|
|
277
|
+
schemaVersion: 'thumbgate.ai-inventory.v1',
|
|
278
|
+
generatedAt: new Date().toISOString(),
|
|
279
|
+
rootDir,
|
|
280
|
+
filesScanned: files.length,
|
|
281
|
+
componentCount: components.length,
|
|
282
|
+
summary,
|
|
283
|
+
components,
|
|
284
|
+
};
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function buildCycloneDxMlBom(inventory, options = {}) {
|
|
288
|
+
const serialHash = crypto
|
|
289
|
+
.createHash('sha256')
|
|
290
|
+
.update(JSON.stringify({
|
|
291
|
+
rootDir: inventory.rootDir,
|
|
292
|
+
components: inventory.components.map((item) => [item.id, item.evidence.map((e) => e.file)]),
|
|
293
|
+
}))
|
|
294
|
+
.digest('hex')
|
|
295
|
+
.slice(0, 32);
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
bomFormat: 'CycloneDX',
|
|
299
|
+
specVersion: '1.5',
|
|
300
|
+
serialNumber: `urn:uuid:${serialHash.slice(0, 8)}-${serialHash.slice(8, 12)}-${serialHash.slice(12, 16)}-${serialHash.slice(16, 20)}-${serialHash.slice(20, 32)}`,
|
|
301
|
+
version: 1,
|
|
302
|
+
metadata: {
|
|
303
|
+
timestamp: inventory.generatedAt,
|
|
304
|
+
tools: [
|
|
305
|
+
{
|
|
306
|
+
vendor: 'ThumbGate',
|
|
307
|
+
name: 'AI Component Inventory',
|
|
308
|
+
version: options.version || 'local',
|
|
309
|
+
},
|
|
310
|
+
],
|
|
311
|
+
properties: [
|
|
312
|
+
{ name: 'thumbgate:rootDir', value: inventory.rootDir },
|
|
313
|
+
{ name: 'thumbgate:filesScanned', value: String(inventory.filesScanned) },
|
|
314
|
+
{ name: 'thumbgate:componentCount', value: String(inventory.componentCount) },
|
|
315
|
+
],
|
|
316
|
+
},
|
|
317
|
+
components: inventory.components.map((item) => ({
|
|
318
|
+
type: item.category === 'model_artifact' ? 'machine-learning-model' : 'library',
|
|
319
|
+
name: item.name,
|
|
320
|
+
group: item.ecosystem,
|
|
321
|
+
bomRef: `thumbgate:${item.id}`,
|
|
322
|
+
properties: [
|
|
323
|
+
{ name: 'thumbgate:category', value: item.category },
|
|
324
|
+
{ name: 'thumbgate:evidenceCount', value: String(item.evidence.length) },
|
|
325
|
+
{ name: 'thumbgate:evidence', value: JSON.stringify(item.evidence.map((e) => ({ file: e.file, line: e.line, kind: e.kind }))) },
|
|
326
|
+
],
|
|
327
|
+
})),
|
|
328
|
+
};
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
function formatInventoryText(inventory) {
|
|
332
|
+
const lines = [];
|
|
333
|
+
lines.push('ThumbGate AI Component Inventory');
|
|
334
|
+
lines.push(` Root : ${inventory.rootDir}`);
|
|
335
|
+
lines.push(` Files scanned : ${inventory.filesScanned}`);
|
|
336
|
+
lines.push(` AI components : ${inventory.componentCount}`);
|
|
337
|
+
lines.push('');
|
|
338
|
+
lines.push('By category:');
|
|
339
|
+
const categories = Object.entries(inventory.summary.byCategory).sort((a, b) => a[0].localeCompare(b[0]));
|
|
340
|
+
if (!categories.length) lines.push(' none detected');
|
|
341
|
+
for (const [category, count] of categories) lines.push(` ${category}: ${count}`);
|
|
342
|
+
lines.push('');
|
|
343
|
+
lines.push('Evidence:');
|
|
344
|
+
if (!inventory.components.length) lines.push(' none detected');
|
|
345
|
+
for (const item of inventory.components) {
|
|
346
|
+
lines.push(` - ${item.name} (${item.category}, ${item.ecosystem})`);
|
|
347
|
+
for (const evidence of item.evidence.slice(0, 3)) {
|
|
348
|
+
const loc = evidence.line ? `${evidence.file}:${evidence.line}` : evidence.file;
|
|
349
|
+
lines.push(` ${loc} [${evidence.kind}]`);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
return lines.join('\n');
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
function writeOutput(filePath, data) {
|
|
356
|
+
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
|
357
|
+
fs.writeFileSync(filePath, data);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
module.exports = {
|
|
361
|
+
SOURCE_PATTERNS,
|
|
362
|
+
MODEL_EXTENSIONS,
|
|
363
|
+
scanAiComponents,
|
|
364
|
+
buildCycloneDxMlBom,
|
|
365
|
+
formatInventoryText,
|
|
366
|
+
writeOutput,
|
|
367
|
+
};
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const fs = require('fs');
|
|
5
|
+
const path = require('path');
|
|
6
|
+
|
|
7
|
+
const DEFAULT_CONFIG_PATH = path.join(__dirname, '..', 'config', 'gate-classifier-routing.json');
|
|
8
|
+
|
|
9
|
+
function clamp01(value, fallback = 0) {
|
|
10
|
+
const number = Number(value);
|
|
11
|
+
if (!Number.isFinite(number)) return fallback;
|
|
12
|
+
return Math.min(1, Math.max(0, number));
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function riskRank(risk) {
|
|
16
|
+
const normalized = String(risk || 'medium').toLowerCase();
|
|
17
|
+
if (['critical', 'block', 'regulated'].includes(normalized)) return 4;
|
|
18
|
+
if (['high', 'dangerous'].includes(normalized)) return 3;
|
|
19
|
+
if (['medium', 'warn'].includes(normalized)) return 2;
|
|
20
|
+
return 1;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function loadClassifierRoutingConfig(configPath = DEFAULT_CONFIG_PATH) {
|
|
24
|
+
return JSON.parse(fs.readFileSync(configPath, 'utf8'));
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
function lane(config, laneName, reason, input, overrides = {}) {
|
|
28
|
+
const laneConfig = (config.lanes && config.lanes[laneName]) || {};
|
|
29
|
+
return {
|
|
30
|
+
lane: laneName,
|
|
31
|
+
reason,
|
|
32
|
+
requiresEvidence: Boolean(laneConfig.requiresEvidence || overrides.requiresEvidence),
|
|
33
|
+
cloudAllowed: Boolean(laneConfig.cloudAllowed),
|
|
34
|
+
maxLatencyMs: laneConfig.maxLatencyMs,
|
|
35
|
+
risk: String(input.risk || 'medium').toLowerCase(),
|
|
36
|
+
ambiguity: clamp01(input.ambiguity),
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function routeClassifier(input = {}, config = loadClassifierRoutingConfig()) {
|
|
41
|
+
const thresholds = config.thresholds || {};
|
|
42
|
+
const labelCount = Number(input.labelCount || input.examples || 0);
|
|
43
|
+
const latencyBudgetMs = Number(input.latencyBudgetMs || 0);
|
|
44
|
+
const ambiguity = clamp01(input.ambiguity);
|
|
45
|
+
const risk = riskRank(input.risk);
|
|
46
|
+
const largeBatch = Number(input.batchRows || 0) >= Number(thresholds.largeBatchRows || 50);
|
|
47
|
+
const privacySensitive = Boolean(input.privacySensitive || input.containsSecrets || input.customerData);
|
|
48
|
+
const allowCloud = Boolean(input.allowCloud);
|
|
49
|
+
|
|
50
|
+
if (input.hasHardRule || input.exactPolicyMatch) {
|
|
51
|
+
return lane(config, 'deterministic', 'exact hard rule or policy match; do not spend model tokens', input);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (input.semanticCacheHit || input.equivalentRepeat) {
|
|
55
|
+
return lane(config, 'semantic_cache', 'semantically equivalent repeat; reuse the proven prior decision without a model call', input);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
if (input.rubricFailed || input.missingEvidence || input.completionClaimWithoutProof) {
|
|
59
|
+
return lane(config, 'rubric_gate', 'rubric or completion evidence failed; block done claims until proof exists', input);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
if (input.structuredDataset && (input.missingProvenance || input.missingSources)) {
|
|
63
|
+
return lane(config, 'rubric_gate', 'structured data claim is missing source provenance', input);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (privacySensitive && !allowCloud && risk >= 3 && ambiguity >= Number(thresholds.mediumAmbiguity || 0.35)) {
|
|
67
|
+
return lane(config, 'human_review', 'private high-risk ambiguous action; keep data local and require approval', input);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
if (labelCount >= Number(thresholds.classicalMinExamples || 40) && (largeBatch || latencyBudgetMs <= Number(thresholds.lowLatencyBudgetMs || 300)) && ambiguity < Number(thresholds.mediumAmbiguity || 0.35)) {
|
|
71
|
+
return lane(config, 'local_classical', 'enough examples and low ambiguity; use cheap local classification', input);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
if (risk >= 3 && ambiguity >= Number(thresholds.highRiskAmbiguity || 0.65)) {
|
|
75
|
+
if (allowCloud && latencyBudgetMs >= Number(thresholds.llmMinLatencyBudgetMs || 2000)) {
|
|
76
|
+
return lane(config, 'llm_judge', 'high-risk semantic ambiguity; use a budget-capped LLM judge with evidence', input);
|
|
77
|
+
}
|
|
78
|
+
return lane(config, 'human_review', 'high-risk ambiguity without approved cloud/budget route', input);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (labelCount < Number(thresholds.classicalMinExamples || 40) || ambiguity >= Number(thresholds.mediumAmbiguity || 0.35)) {
|
|
82
|
+
return lane(config, 'local_semantic', 'sparse labels or fuzzy intent; use local semantic recall before any LLM', input);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return lane(config, config.defaultLane || 'local_classical', 'default local route for routine gate classification', input);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function explainClassifierRoute(input = {}, config = loadClassifierRoutingConfig()) {
|
|
89
|
+
const decision = routeClassifier(input, config);
|
|
90
|
+
const laneConfig = (config.lanes && config.lanes[decision.lane]) || {};
|
|
91
|
+
return {
|
|
92
|
+
...decision,
|
|
93
|
+
description: laneConfig.description || '',
|
|
94
|
+
useFor: laneConfig.useFor || [],
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function parseArgs(argv) {
|
|
99
|
+
const input = {};
|
|
100
|
+
for (const arg of argv) {
|
|
101
|
+
if (arg === '--hard-rule') input.hasHardRule = true;
|
|
102
|
+
else if (arg === '--privacy-sensitive') input.privacySensitive = true;
|
|
103
|
+
else if (arg === '--allow-cloud') input.allowCloud = true;
|
|
104
|
+
else if (arg === '--customer-data') input.customerData = true;
|
|
105
|
+
else if (arg === '--semantic-cache-hit') input.semanticCacheHit = true;
|
|
106
|
+
else if (arg === '--equivalent-repeat') input.equivalentRepeat = true;
|
|
107
|
+
else if (arg === '--rubric-failed') input.rubricFailed = true;
|
|
108
|
+
else if (arg === '--missing-evidence') input.missingEvidence = true;
|
|
109
|
+
else if (arg === '--structured-dataset') input.structuredDataset = true;
|
|
110
|
+
else if (arg === '--missing-provenance') input.missingProvenance = true;
|
|
111
|
+
else if (arg.startsWith('--risk=')) input.risk = arg.slice('--risk='.length);
|
|
112
|
+
else if (arg.startsWith('--ambiguity=')) input.ambiguity = Number(arg.slice('--ambiguity='.length));
|
|
113
|
+
else if (arg.startsWith('--labels=')) input.labelCount = Number(arg.slice('--labels='.length));
|
|
114
|
+
else if (arg.startsWith('--latency-ms=')) input.latencyBudgetMs = Number(arg.slice('--latency-ms='.length));
|
|
115
|
+
else if (arg.startsWith('--batch-rows=')) input.batchRows = Number(arg.slice('--batch-rows='.length));
|
|
116
|
+
}
|
|
117
|
+
return input;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
module.exports = {
|
|
121
|
+
DEFAULT_CONFIG_PATH,
|
|
122
|
+
explainClassifierRoute,
|
|
123
|
+
loadClassifierRoutingConfig,
|
|
124
|
+
routeClassifier,
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
if (require.main === module) {
|
|
128
|
+
const decision = explainClassifierRoute(parseArgs(process.argv.slice(2)));
|
|
129
|
+
process.stdout.write(`${JSON.stringify(decision, null, 2)}\n`);
|
|
130
|
+
}
|
package/scripts/cli-schema.js
CHANGED
|
@@ -351,6 +351,19 @@ const CLI_COMMANDS = [
|
|
|
351
351
|
{ name: 'high-risk-workflows', type: 'string', description: 'Comma-separated workflows touching money, prod, secrets, data, or publishing' },
|
|
352
352
|
],
|
|
353
353
|
}),
|
|
354
|
+
discoveryCommand({
|
|
355
|
+
name: 'ai-inventory',
|
|
356
|
+
aliases: ['ai-component-inventory', 'ml-bom', 'mlbom'],
|
|
357
|
+
description: 'Scan AI/ML components and export enterprise ML-BOM evidence',
|
|
358
|
+
mcpTool: 'ai_component_inventory',
|
|
359
|
+
flags: [
|
|
360
|
+
jsonFlag(),
|
|
361
|
+
{ name: 'root', type: 'string', description: 'Project root to scan' },
|
|
362
|
+
{ name: 'format', type: 'string', description: 'summary, json, or cyclonedx' },
|
|
363
|
+
{ name: 'output', type: 'string', description: 'Write evidence to this path' },
|
|
364
|
+
{ name: 'max-files', type: 'number', description: 'Maximum files to scan' },
|
|
365
|
+
],
|
|
366
|
+
}),
|
|
354
367
|
discoveryCommand({
|
|
355
368
|
name: 'long-running-agent-context-guardrails',
|
|
356
369
|
aliases: ['agent-context-guardrails', 'slack-context-guardrails'],
|
|
@@ -614,6 +627,19 @@ const CLI_COMMANDS = [
|
|
|
614
627
|
{ name: 'json', type: 'boolean', description: 'Output the structured model as JSON' },
|
|
615
628
|
],
|
|
616
629
|
},
|
|
630
|
+
{
|
|
631
|
+
name: 'workflow',
|
|
632
|
+
aliases: ['swarm'],
|
|
633
|
+
description: 'Execute a dynamic parallel workflow for security audit, benchmarking, or exploration',
|
|
634
|
+
group: 'ops',
|
|
635
|
+
mcpTool: 'parallel_workflow',
|
|
636
|
+
flags: [
|
|
637
|
+
{ name: 'objective', type: 'string', required: true, description: 'The objective to plan and execute (e.g. security audit, performance benchmark)' },
|
|
638
|
+
{ name: 'concurrency', type: 'number', description: 'Maximum parallel subtasks (default 3)' },
|
|
639
|
+
{ name: 'timeoutMs', type: 'number', description: 'Timeout in milliseconds (default 60000)' },
|
|
640
|
+
{ name: 'json', type: 'boolean', description: 'Output results as JSON' },
|
|
641
|
+
],
|
|
642
|
+
},
|
|
617
643
|
];
|
|
618
644
|
|
|
619
645
|
/**
|