thumbgate 1.14.1 → 1.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.well-known/mcp/server-card.json +1 -1
- package/README.md +2 -1
- package/adapters/claude/.mcp.json +2 -2
- package/adapters/mcp/server-stdio.js +8 -1
- package/adapters/opencode/opencode.json +1 -1
- package/bin/cli.js +54 -0
- package/config/enforcement.json +59 -7
- package/config/gates/default.json +33 -0
- package/config/mcp-allowlists.json +4 -0
- package/config/merge-quality-checks.json +2 -1
- package/package.json +17 -5
- package/public/codex-plugin.html +7 -1
- package/public/dashboard.html +23 -2
- package/public/index.html +20 -2
- package/public/learn.html +39 -0
- package/public/lessons.html +25 -1
- package/public/numbers.html +271 -0
- package/public/pro.html +7 -1
- package/scripts/cli-feedback.js +2 -1
- package/scripts/cli-schema.js +43 -4
- package/scripts/commercial-offer.js +1 -1
- package/scripts/contextfs.js +214 -32
- package/scripts/feedback-loop.js +49 -5
- package/scripts/harness-selector.js +132 -0
- package/scripts/lesson-canonical.js +181 -0
- package/scripts/lesson-db.js +71 -10
- package/scripts/lesson-synthesis.js +23 -2
- package/scripts/native-messaging-audit.js +514 -0
- package/scripts/pr-manager.js +47 -7
- package/scripts/profile-router.js +16 -1
- package/scripts/rule-validator.js +285 -0
- package/scripts/seo-gsd.js +182 -2
- package/scripts/tool-registry.js +12 -0
- package/skills/thumbgate/SKILL.md +1 -1
- package/src/api/server.js +53 -0
- package/.claude-plugin/README.md +0 -170
- package/adapters/README.md +0 -12
- package/skills/agent-memory/SKILL.md +0 -97
- package/skills/solve-architecture-autonomy/SKILL.md +0 -17
- package/skills/solve-architecture-autonomy/tool.js +0 -33
- package/skills/thumbgate-feedback/SKILL.md +0 -49
package/scripts/contextfs.js
CHANGED
|
@@ -605,6 +605,137 @@ function selectFlatContextItems(candidates, maxItems, maxChars) {
|
|
|
605
605
|
};
|
|
606
606
|
}
|
|
607
607
|
|
|
608
|
+
/* ── Summarize-then-expand selection ───────────────────────────────
|
|
609
|
+
*
|
|
610
|
+
* Two-pass retrieval that front-loads recall, then spends remaining char
|
|
611
|
+
* budget on depth for the highest-scoring candidates.
|
|
612
|
+
*
|
|
613
|
+
* Pass 1 — breadth. Walk the ranked candidate list and add each as a
|
|
614
|
+
* compact "summary tier" item: title + one-line hint drawn from the
|
|
615
|
+
* structured fields (whatToChange / whatWentWrong / first content line).
|
|
616
|
+
* A summary is small and bounded (SUMMARY_HINT_MAX chars), so many fit in
|
|
617
|
+
* a fraction of the budget. Stops when maxItems or a summary-reservation
|
|
618
|
+
* budget cap (SUMMARY_RESERVE_FRACTION of maxChars) is hit — this protects
|
|
619
|
+
* enough headroom for Pass 2 to actually do something.
|
|
620
|
+
*
|
|
621
|
+
* Pass 2 — depth. Walk the selected list top-down and try to upgrade each
|
|
622
|
+
* summary to the full structured context. The upgrade cost is the delta
|
|
623
|
+
* between full doc chars and the summary we already accounted for; if it
|
|
624
|
+
* fits under the *overall* maxChars, swap the summary for the full item
|
|
625
|
+
* and tag it tier='expanded'. Stop when the budget is exhausted.
|
|
626
|
+
*
|
|
627
|
+
* Rationale: the flat selector overcommits chars on the first few full-size
|
|
628
|
+
* hits and silently drops the tail. Summarize-then-expand means a consumer
|
|
629
|
+
* always knows which docs matched (full roster of titles), and the model
|
|
630
|
+
* sees full context for the top answers.
|
|
631
|
+
*
|
|
632
|
+
* The option is wired into constructContextPack via `strategy` or the
|
|
633
|
+
* explicit `summarizeThenExpand` flag. Default behavior is unchanged so
|
|
634
|
+
* existing callers / tests don't shift.
|
|
635
|
+
*/
|
|
636
|
+
|
|
637
|
+
const SUMMARY_HINT_MAX = 160;
|
|
638
|
+
const SUMMARY_RESERVE_FRACTION = 0.35;
|
|
639
|
+
|
|
640
|
+
function buildSummaryContext(doc) {
|
|
641
|
+
const full = buildStructuredContext(doc);
|
|
642
|
+
// Priority: explicit whatToChange > whatWentWrong > reasoning > first
|
|
643
|
+
// non-empty content line. We truncate aggressively because a summary's
|
|
644
|
+
// purpose is to fit dozens per pack, not to win a precision test.
|
|
645
|
+
const hint = (
|
|
646
|
+
full.whatToChange
|
|
647
|
+
|| full.whatWentWrong
|
|
648
|
+
|| full.reasoning
|
|
649
|
+
|| (doc.content || '').split('\n').map((l) => l.trim()).find(Boolean)
|
|
650
|
+
|| ''
|
|
651
|
+
).slice(0, SUMMARY_HINT_MAX);
|
|
652
|
+
return {
|
|
653
|
+
rawContent: hint,
|
|
654
|
+
reasoning: null,
|
|
655
|
+
whatWentWrong: null,
|
|
656
|
+
whatToChange: null,
|
|
657
|
+
rubricFailure: null,
|
|
658
|
+
};
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
function measureSummaryChars(doc) {
|
|
662
|
+
const hint = buildSummaryContext(doc).rawContent;
|
|
663
|
+
return `${doc.title || ''}\n${hint}`.length;
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
function selectSummarizeThenExpand(candidates, maxItems, maxChars) {
|
|
667
|
+
// Pass 1 — breadth. Pack summaries greedily under a share of the budget.
|
|
668
|
+
const summaryBudget = Math.max(
|
|
669
|
+
Math.floor(maxChars * SUMMARY_RESERVE_FRACTION),
|
|
670
|
+
measureSummaryChars({ title: '', content: '' }) + 1,
|
|
671
|
+
);
|
|
672
|
+
const selected = [];
|
|
673
|
+
let usedChars = 0;
|
|
674
|
+
let skippedByMaxChars = 0;
|
|
675
|
+
|
|
676
|
+
for (const item of candidates) {
|
|
677
|
+
if (selected.length >= maxItems) break;
|
|
678
|
+
|
|
679
|
+
const summaryLen = measureSummaryChars(item.doc);
|
|
680
|
+
if (usedChars + summaryLen > summaryBudget) {
|
|
681
|
+
skippedByMaxChars += 1;
|
|
682
|
+
continue;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
selected.push({
|
|
686
|
+
id: item.doc.id,
|
|
687
|
+
namespace: item.doc.namespace,
|
|
688
|
+
title: item.doc.title,
|
|
689
|
+
structuredContext: buildSummaryContext(item.doc),
|
|
690
|
+
tags: item.doc.tags || [],
|
|
691
|
+
score: item.score,
|
|
692
|
+
tier: 'summary',
|
|
693
|
+
_doc: item.doc,
|
|
694
|
+
_summaryLen: summaryLen,
|
|
695
|
+
});
|
|
696
|
+
usedChars += summaryLen;
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
// Pass 2 — depth. Upgrade top-ranked summaries to full items while the
|
|
700
|
+
// overall char budget can absorb the delta. Walks in current (score) order
|
|
701
|
+
// so the most relevant docs are expanded first.
|
|
702
|
+
let expandedCount = 0;
|
|
703
|
+
for (const entry of selected) {
|
|
704
|
+
const fullLen = measureDocumentChars(entry._doc);
|
|
705
|
+
const delta = fullLen - entry._summaryLen;
|
|
706
|
+
if (delta <= 0) continue; // already at or under summary size; leave it.
|
|
707
|
+
if (usedChars + delta > maxChars) continue;
|
|
708
|
+
|
|
709
|
+
entry.structuredContext = buildStructuredContext(entry._doc);
|
|
710
|
+
entry.tier = 'expanded';
|
|
711
|
+
usedChars += delta;
|
|
712
|
+
expandedCount += 1;
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
// Strip the private helpers before returning — they're builder-only state.
|
|
716
|
+
const items = selected.map(({ _doc, _summaryLen, ...rest }) => rest);
|
|
717
|
+
|
|
718
|
+
return {
|
|
719
|
+
items,
|
|
720
|
+
usedChars,
|
|
721
|
+
skippedByMaxChars,
|
|
722
|
+
retrieval: {
|
|
723
|
+
strategy: 'summarize-then-expand',
|
|
724
|
+
themeCount: 0,
|
|
725
|
+
semanticCount: 0,
|
|
726
|
+
selectedThemes: [],
|
|
727
|
+
selectedSemanticGroups: [],
|
|
728
|
+
representativeCount: items.length,
|
|
729
|
+
expandedEpisodes: expandedCount,
|
|
730
|
+
summaryCount: items.length - expandedCount,
|
|
731
|
+
summaryBudget,
|
|
732
|
+
queryCoverage: null,
|
|
733
|
+
initialCoverage: null,
|
|
734
|
+
coverageTarget: null,
|
|
735
|
+
},
|
|
736
|
+
};
|
|
737
|
+
}
|
|
738
|
+
|
|
608
739
|
/* ── Memex-style Indexed Memory ────────────────────────────────── */
|
|
609
740
|
|
|
610
741
|
const MEMEX_INDEX_FILE = 'memex-index.jsonl';
|
|
@@ -750,17 +881,38 @@ function constructMemexPack({ query = '', maxItems = 8, maxChars = 6000, namespa
|
|
|
750
881
|
return pack;
|
|
751
882
|
}
|
|
752
883
|
|
|
753
|
-
function constructContextPack({
|
|
884
|
+
function constructContextPack({
|
|
885
|
+
query = '',
|
|
886
|
+
maxItems = 8,
|
|
887
|
+
maxChars = 6000,
|
|
888
|
+
namespaces = [],
|
|
889
|
+
strategy = null,
|
|
890
|
+
summarizeThenExpand = false,
|
|
891
|
+
} = {}) {
|
|
754
892
|
const normalizedNamespaces = normalizeNamespaces(namespaces);
|
|
755
893
|
const tokens = tokenizeQuery(query);
|
|
756
894
|
const sourceHash = getSourceHash(normalizedNamespaces);
|
|
757
895
|
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
896
|
+
// Resolve the effective strategy. Explicit `strategy` wins; otherwise
|
|
897
|
+
// `summarizeThenExpand: true` flips the flag. Default remains auto
|
|
898
|
+
// (flat | hierarchical) so callers that don't opt in keep their cached
|
|
899
|
+
// packs addressable.
|
|
900
|
+
const effectiveStrategy = strategy
|
|
901
|
+
|| (summarizeThenExpand ? 'summarize-then-expand' : null);
|
|
902
|
+
|
|
903
|
+
// Skip the semantic cache for summarize-then-expand packs. The cache key
|
|
904
|
+
// is (namespaces, maxItems, maxChars) — it doesn't include the strategy,
|
|
905
|
+
// so a cached flat pack would be served to an STE caller (and vice versa)
|
|
906
|
+
// with the wrong shape. Cheaper to recompute than to extend the cache key
|
|
907
|
+
// and invalidate every entry on disk.
|
|
908
|
+
const cacheHit = effectiveStrategy === 'summarize-then-expand'
|
|
909
|
+
? null
|
|
910
|
+
: findSemanticCacheHit({
|
|
911
|
+
query,
|
|
912
|
+
namespaces: normalizedNamespaces,
|
|
913
|
+
maxItems,
|
|
914
|
+
maxChars,
|
|
915
|
+
});
|
|
764
916
|
|
|
765
917
|
if (cacheHit) {
|
|
766
918
|
const packId = `pack_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
|
|
@@ -796,25 +948,51 @@ function constructContextPack({ query = '', maxItems = 8, maxChars = 6000, names
|
|
|
796
948
|
.sort((a, b) => b.score - a.score);
|
|
797
949
|
|
|
798
950
|
const hierarchicalRetrievalEnabled = shouldUseHierarchicalRetrieval(normalizedNamespaces);
|
|
799
|
-
|
|
800
|
-
|
|
951
|
+
let selection;
|
|
952
|
+
if (effectiveStrategy === 'summarize-then-expand') {
|
|
953
|
+
// Explicit opt-in: bypass the hierarchical path entirely. The
|
|
954
|
+
// summarize-then-expand selector assumes a flat ranked list where each
|
|
955
|
+
// item is a single episode, and mixing it with theme-based hierarchical
|
|
956
|
+
// retrieval would double-compress the top-of-list.
|
|
957
|
+
selection = selectSummarizeThenExpand(candidates, maxItems, maxChars);
|
|
958
|
+
} else if (hierarchicalRetrievalEnabled) {
|
|
959
|
+
selection = retrieveHierarchicalDocuments({
|
|
801
960
|
documents: candidates.map((candidate) => candidate.doc),
|
|
802
961
|
query,
|
|
803
962
|
maxItems,
|
|
804
963
|
maxChars,
|
|
805
964
|
scorer: scoreDocument,
|
|
806
965
|
measureDocument: measureDocumentChars,
|
|
807
|
-
})
|
|
808
|
-
|
|
966
|
+
});
|
|
967
|
+
} else {
|
|
968
|
+
selection = selectFlatContextItems(candidates, maxItems, maxChars);
|
|
969
|
+
}
|
|
809
970
|
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
971
|
+
// The flat + hierarchical paths emit raw docs; summarize-then-expand emits
|
|
972
|
+
// fully-shaped items that already carry structuredContext and a `tier`
|
|
973
|
+
// marker. Detect the shape so we don't double-canonicalize STE items
|
|
974
|
+
// (which would re-expand every summary into full content).
|
|
975
|
+
const selected = selection.items.map((item) => {
|
|
976
|
+
if (item && item.structuredContext) {
|
|
977
|
+
return {
|
|
978
|
+
id: item.id,
|
|
979
|
+
namespace: item.namespace,
|
|
980
|
+
title: item.title,
|
|
981
|
+
structuredContext: item.structuredContext,
|
|
982
|
+
tags: item.tags || [],
|
|
983
|
+
score: typeof item.score === 'number' ? item.score : scoreDocument(item, tokens),
|
|
984
|
+
...(item.tier ? { tier: item.tier } : {}),
|
|
985
|
+
};
|
|
986
|
+
}
|
|
987
|
+
return {
|
|
988
|
+
id: item.id,
|
|
989
|
+
namespace: item.namespace,
|
|
990
|
+
title: item.title,
|
|
991
|
+
structuredContext: buildStructuredContext(item),
|
|
992
|
+
tags: item.tags || [],
|
|
993
|
+
score: scoreDocument(item, tokens),
|
|
994
|
+
};
|
|
995
|
+
});
|
|
818
996
|
const usedChars = selection.usedChars;
|
|
819
997
|
const skippedByMaxChars = selection.skippedByMaxChars;
|
|
820
998
|
|
|
@@ -848,19 +1026,23 @@ function constructContextPack({ query = '', maxItems = 8, maxChars = 6000, names
|
|
|
848
1026
|
};
|
|
849
1027
|
|
|
850
1028
|
appendJsonl(contextFsPath(NAMESPACES.provenance, 'packs.jsonl'), pack);
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
859
|
-
|
|
860
|
-
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
1029
|
+
// Symmetric with the cache read: don't persist STE packs into the shared
|
|
1030
|
+
// semantic cache because the cache key is strategy-agnostic.
|
|
1031
|
+
if (effectiveStrategy !== 'summarize-then-expand') {
|
|
1032
|
+
appendSemanticCacheEntry({
|
|
1033
|
+
id: `cache_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
|
1034
|
+
timestamp: nowIso(),
|
|
1035
|
+
key: buildSemanticCacheKey({
|
|
1036
|
+
namespaces: normalizedNamespaces,
|
|
1037
|
+
maxItems,
|
|
1038
|
+
maxChars,
|
|
1039
|
+
}),
|
|
1040
|
+
query,
|
|
1041
|
+
tokens,
|
|
1042
|
+
sourceHash,
|
|
1043
|
+
pack,
|
|
1044
|
+
});
|
|
1045
|
+
}
|
|
864
1046
|
recordProvenance({
|
|
865
1047
|
type: 'context_pack_constructed',
|
|
866
1048
|
packId,
|
package/scripts/feedback-loop.js
CHANGED
|
@@ -1167,6 +1167,15 @@ function captureFeedback(params) {
|
|
|
1167
1167
|
timestamp: now,
|
|
1168
1168
|
};
|
|
1169
1169
|
|
|
1170
|
+
// Stamp a cross-session canonical hash on every memory record so future
|
|
1171
|
+
// captures can short-circuit dedup without re-canonicalizing legacy entries.
|
|
1172
|
+
// See scripts/lesson-canonical.js for the normalization contract.
|
|
1173
|
+
try {
|
|
1174
|
+
const { canonicalHash } = require('./lesson-canonical');
|
|
1175
|
+
const hash = canonicalHash(memoryRecord);
|
|
1176
|
+
if (hash) memoryRecord.canonicalHash = hash;
|
|
1177
|
+
} catch (_canonErr) { /* canonical hashing is non-blocking */ }
|
|
1178
|
+
|
|
1170
1179
|
// Bayesian Belief Update (Project Bayes)
|
|
1171
1180
|
try {
|
|
1172
1181
|
const { updateBelief, shouldPrune } = require('./belief-update');
|
|
@@ -1210,14 +1219,49 @@ function captureFeedback(params) {
|
|
|
1210
1219
|
const merged = mergeIntoExisting(MEMORY_LOG_PATH, similar.match, memoryRecord, feedbackEvent);
|
|
1211
1220
|
synthesisResult = { action: 'merged', existingId: similar.match.id, similarity: similar.similarity, occurrences: merged.occurrences };
|
|
1212
1221
|
|
|
1213
|
-
// Auto-promote if threshold reached
|
|
1222
|
+
// Auto-promote if threshold reached, but only after the rule
|
|
1223
|
+
// validator (scripts/rule-validator.js) confirms the proposed trigger
|
|
1224
|
+
// matches the seed lesson and has acceptable precision on recent
|
|
1225
|
+
// overlapping-tag events. This plugs the Autogenesis "validate
|
|
1226
|
+
// before integrate" phase that was missing from the original
|
|
1227
|
+
// promotion path — previously every threshold-crossing lesson
|
|
1228
|
+
// shipped a rule regardless of whether it would over-block positives.
|
|
1214
1229
|
if (shouldAutoPromote(merged)) {
|
|
1215
1230
|
const rule = synthesizePreventionRule(merged);
|
|
1216
|
-
|
|
1231
|
+
let validation = null;
|
|
1232
|
+
try {
|
|
1233
|
+
const { validateProposedRule } = require('./rule-validator');
|
|
1234
|
+
// Sample the last 50 memory events across both signals. Using
|
|
1235
|
+
// memory-log rather than feedback-log because memory records
|
|
1236
|
+
// carry the richer title/content fields the validator scores
|
|
1237
|
+
// against, and findSimilarLesson already reads this file.
|
|
1238
|
+
const recentEvents = readJSONL(MEMORY_LOG_PATH).slice(-50);
|
|
1239
|
+
validation = validateProposedRule(rule, {
|
|
1240
|
+
seedLesson: merged,
|
|
1241
|
+
recentEvents,
|
|
1242
|
+
});
|
|
1243
|
+
rule.validation = validation;
|
|
1244
|
+
} catch (_valErr) {
|
|
1245
|
+
// Validator failure must not block the existing pipeline; fall
|
|
1246
|
+
// back to the legacy "promote unconditionally" behavior.
|
|
1247
|
+
validation = { shouldPromote: true, reason: 'validator_error', error: _valErr.message };
|
|
1248
|
+
rule.validation = validation;
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1217
1251
|
synthesisResult.preventionRule = rule;
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1252
|
+
synthesisResult.validation = validation;
|
|
1253
|
+
if (validation.shouldPromote) {
|
|
1254
|
+
synthesisResult.autoPromoted = true;
|
|
1255
|
+
// Store the synthesized rule
|
|
1256
|
+
const rulesPath = path.join(path.dirname(MEMORY_LOG_PATH), 'synthesized-rules.jsonl');
|
|
1257
|
+
appendJSONLLocal(rulesPath, rule);
|
|
1258
|
+
} else {
|
|
1259
|
+
// Park rejected rules in a side log so operators can audit them.
|
|
1260
|
+
synthesisResult.autoPromoted = false;
|
|
1261
|
+
synthesisResult.rejectionReason = validation.reason;
|
|
1262
|
+
const rejectedPath = path.join(path.dirname(MEMORY_LOG_PATH), 'rejected-rules.jsonl');
|
|
1263
|
+
appendJSONLLocal(rejectedPath, rule);
|
|
1264
|
+
}
|
|
1221
1265
|
}
|
|
1222
1266
|
} else {
|
|
1223
1267
|
// No similar lesson — check exact duplicate, then store
|
|
@@ -16,8 +16,10 @@
|
|
|
16
16
|
*/
|
|
17
17
|
|
|
18
18
|
const path = require('path');
|
|
19
|
+
const fs = require('fs');
|
|
19
20
|
|
|
20
21
|
const HARNESS_DIR = path.join(__dirname, '..', 'config', 'gates');
|
|
22
|
+
const ROOT_DIR = path.join(__dirname, '..');
|
|
21
23
|
|
|
22
24
|
const HARNESSES = Object.freeze({
|
|
23
25
|
deploy: path.join(HARNESS_DIR, 'deploy.json'),
|
|
@@ -113,6 +115,132 @@ function getHarnessPath(name) {
|
|
|
113
115
|
return HARNESSES[name] ?? null;
|
|
114
116
|
}
|
|
115
117
|
|
|
118
|
+
function estimateTokenCount(text, charsPerToken = 4) {
|
|
119
|
+
const payload = String(text || '');
|
|
120
|
+
const divisor = Math.max(1, Number(charsPerToken) || 4);
|
|
121
|
+
return Math.ceil(Buffer.byteLength(payload, 'utf8') / divisor);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
function readIfExists(filePath) {
|
|
125
|
+
try {
|
|
126
|
+
return fs.readFileSync(filePath, 'utf8');
|
|
127
|
+
} catch {
|
|
128
|
+
return '';
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function readJsonIfExists(filePath) {
|
|
133
|
+
try {
|
|
134
|
+
return JSON.parse(fs.readFileSync(filePath, 'utf8'));
|
|
135
|
+
} catch {
|
|
136
|
+
return null;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function collectDefaultHarnessAuditInputs(rootDir = ROOT_DIR) {
|
|
141
|
+
const globalDocNames = ['AGENTS.md', 'CLAUDE.md', 'GEMINI.md'];
|
|
142
|
+
const globalDocs = globalDocNames.map((name) => {
|
|
143
|
+
const content = readIfExists(path.join(rootDir, name));
|
|
144
|
+
return {
|
|
145
|
+
name,
|
|
146
|
+
chars: Buffer.byteLength(content, 'utf8'),
|
|
147
|
+
estimatedTokens: estimateTokenCount(content),
|
|
148
|
+
exists: content.length > 0,
|
|
149
|
+
};
|
|
150
|
+
});
|
|
151
|
+
const toolIndex = readJsonIfExists(path.join(rootDir, '.well-known', 'mcp', 'tools.json'));
|
|
152
|
+
const tools = Array.isArray(toolIndex && toolIndex.tools) ? toolIndex.tools : [];
|
|
153
|
+
|
|
154
|
+
return {
|
|
155
|
+
globalDocs,
|
|
156
|
+
mcpToolCount: tools.length,
|
|
157
|
+
progressiveToolIndexPresent: tools.some((tool) => typeof tool.schemaUrl === 'string'),
|
|
158
|
+
specializedHarnesses: listHarnesses(),
|
|
159
|
+
};
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function scoreHarnessAudit(inputs = {}, options = {}) {
|
|
163
|
+
const globalDocs = Array.isArray(inputs.globalDocs) ? inputs.globalDocs : [];
|
|
164
|
+
const totalDocTokens = globalDocs.reduce((sum, doc) => sum + Number(doc.estimatedTokens || 0), 0);
|
|
165
|
+
const totalDocChars = globalDocs.reduce((sum, doc) => sum + Number(doc.chars || 0), 0);
|
|
166
|
+
const docTokenBudget = Number(options.docTokenBudget || 9000);
|
|
167
|
+
const docsOverBudget = totalDocTokens > docTokenBudget;
|
|
168
|
+
const mcpToolCount = Number(inputs.mcpToolCount || 0);
|
|
169
|
+
const progressiveToolIndexPresent = Boolean(inputs.progressiveToolIndexPresent);
|
|
170
|
+
const specializedHarnesses = Array.isArray(inputs.specializedHarnesses) ? inputs.specializedHarnesses : [];
|
|
171
|
+
const hasSpecializedHarnesses = specializedHarnesses.length >= 3;
|
|
172
|
+
const missingDocs = globalDocs.filter((doc) => doc.exists === false).map((doc) => doc.name);
|
|
173
|
+
const observations = [];
|
|
174
|
+
const recommendations = [];
|
|
175
|
+
|
|
176
|
+
let score = 100;
|
|
177
|
+
if (docsOverBudget) {
|
|
178
|
+
const overageRatio = totalDocTokens / docTokenBudget;
|
|
179
|
+
score -= Math.min(35, Math.ceil((overageRatio - 1) * 22));
|
|
180
|
+
observations.push(`Global agent docs use about ${totalDocTokens} tokens against a ${docTokenBudget} token harness budget.`);
|
|
181
|
+
recommendations.push('Move verbose runbooks into skills, guides, or tool help, then leave AGENTS.md/CLAUDE.md as short discovery pointers.');
|
|
182
|
+
} else {
|
|
183
|
+
observations.push(`Global agent docs stay within the ${docTokenBudget} token harness budget.`);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
if (!progressiveToolIndexPresent && mcpToolCount > 12) {
|
|
187
|
+
score -= 25;
|
|
188
|
+
observations.push(`${mcpToolCount} MCP tools appear preload-only, which can push agents toward instruction bloat.`);
|
|
189
|
+
recommendations.push('Expose a lightweight MCP tool index with per-tool schema URLs so agents fetch schemas only when needed.');
|
|
190
|
+
} else if (progressiveToolIndexPresent) {
|
|
191
|
+
observations.push('Progressive MCP tool discovery is available through schema URLs.');
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
if (!hasSpecializedHarnesses) {
|
|
195
|
+
score -= 18;
|
|
196
|
+
observations.push('Fewer than three specialized gate harnesses are available for risky workflows.');
|
|
197
|
+
recommendations.push('Add workflow-specific harnesses for deploy, code-edit, and database-write actions so default gates stay lean.');
|
|
198
|
+
} else {
|
|
199
|
+
observations.push(`Specialized harnesses are available: ${specializedHarnesses.join(', ')}.`);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
if (missingDocs.length > 0) {
|
|
203
|
+
score -= Math.min(12, missingDocs.length * 4);
|
|
204
|
+
recommendations.push(`Restore missing global discovery docs or remove stale references: ${missingDocs.join(', ')}.`);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
if (recommendations.length === 0) {
|
|
208
|
+
recommendations.push('Keep using Research -> Plan -> Implement prompts and delegate only subtasks whose summaries are enough for the main context.');
|
|
209
|
+
} else {
|
|
210
|
+
recommendations.push('Use Research -> Plan -> Implement prompts so implementation starts after the harness has isolated only the needed context.');
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const normalizedScore = Math.max(0, Math.min(100, score));
|
|
214
|
+
const status = normalizedScore >= 85 ? 'compounding' : normalizedScore >= 65 ? 'watch' : 'bloated';
|
|
215
|
+
|
|
216
|
+
return {
|
|
217
|
+
name: 'thumbgate-harness-optimization-audit',
|
|
218
|
+
status,
|
|
219
|
+
score: normalizedScore,
|
|
220
|
+
roiPriority: normalizedScore < 85 ? 'conversion' : 'retention',
|
|
221
|
+
totals: {
|
|
222
|
+
globalDocChars: totalDocChars,
|
|
223
|
+
globalDocEstimatedTokens: totalDocTokens,
|
|
224
|
+
mcpToolCount,
|
|
225
|
+
specializedHarnessCount: specializedHarnesses.length,
|
|
226
|
+
},
|
|
227
|
+
signals: {
|
|
228
|
+
docsOverBudget,
|
|
229
|
+
progressiveToolIndexPresent,
|
|
230
|
+
hasSpecializedHarnesses,
|
|
231
|
+
missingDocs,
|
|
232
|
+
},
|
|
233
|
+
observations,
|
|
234
|
+
recommendations,
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
function buildHarnessOptimizationAudit(options = {}) {
|
|
239
|
+
const rootDir = options.rootDir || ROOT_DIR;
|
|
240
|
+
const inputs = options.inputs || collectDefaultHarnessAuditInputs(rootDir);
|
|
241
|
+
return scoreHarnessAudit(inputs, options);
|
|
242
|
+
}
|
|
243
|
+
|
|
116
244
|
// ---------------------------------------------------------------------------
|
|
117
245
|
// Internal helpers
|
|
118
246
|
// ---------------------------------------------------------------------------
|
|
@@ -140,6 +268,10 @@ module.exports = {
|
|
|
140
268
|
selectHarnessName,
|
|
141
269
|
listHarnesses,
|
|
142
270
|
getHarnessPath,
|
|
271
|
+
estimateTokenCount,
|
|
272
|
+
collectDefaultHarnessAuditInputs,
|
|
273
|
+
scoreHarnessAudit,
|
|
274
|
+
buildHarnessOptimizationAudit,
|
|
143
275
|
extractCommandText,
|
|
144
276
|
HARNESSES,
|
|
145
277
|
DEPLOY_PATTERNS,
|