thumbgate 1.14.1 → 1.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (150) hide show
  1. package/.claude-plugin/marketplace.json +6 -6
  2. package/.claude-plugin/plugin.json +3 -3
  3. package/.well-known/llms.txt +5 -5
  4. package/.well-known/mcp/server-card.json +1 -1
  5. package/README.md +60 -35
  6. package/adapters/chatgpt/openapi.yaml +118 -2
  7. package/adapters/claude/.mcp.json +2 -2
  8. package/adapters/mcp/server-stdio.js +217 -84
  9. package/adapters/opencode/opencode.json +1 -1
  10. package/bench/prompt-eval-suite.json +5 -1
  11. package/bin/cli.js +211 -8
  12. package/config/enforcement.json +59 -7
  13. package/config/evals/agent-safety-eval.json +338 -22
  14. package/config/gates/default.json +33 -0
  15. package/config/gates/routine.json +43 -0
  16. package/config/github-about.json +3 -3
  17. package/config/mcp-allowlists.json +4 -0
  18. package/config/merge-quality-checks.json +2 -1
  19. package/config/model-candidates.json +131 -0
  20. package/openapi/openapi.yaml +118 -2
  21. package/package.json +70 -51
  22. package/public/blog.html +7 -7
  23. package/public/codex-plugin.html +13 -7
  24. package/public/compare.html +29 -23
  25. package/public/dashboard.html +105 -12
  26. package/public/guide.html +28 -28
  27. package/public/index.html +233 -97
  28. package/public/learn.html +87 -20
  29. package/public/lessons.html +26 -2
  30. package/public/numbers.html +271 -0
  31. package/public/pro.html +89 -19
  32. package/scripts/agent-audit-trace.js +55 -0
  33. package/scripts/agent-memory-lifecycle.js +96 -0
  34. package/scripts/agent-readiness-plan.js +118 -0
  35. package/scripts/agentic-data-pipeline.js +21 -1
  36. package/scripts/agents-sdk-sandbox-plan.js +57 -0
  37. package/scripts/ai-org-governance.js +98 -0
  38. package/scripts/ai-search-distribution.js +43 -0
  39. package/scripts/artifact-agent-plan.js +81 -0
  40. package/scripts/billing.js +27 -8
  41. package/scripts/cli-feedback.js +2 -1
  42. package/scripts/cli-schema.js +60 -5
  43. package/scripts/code-mode-mcp-plan.js +71 -0
  44. package/scripts/commercial-offer.js +1 -1
  45. package/scripts/context-engine.js +1 -2
  46. package/scripts/context-manager.js +4 -1
  47. package/scripts/contextfs.js +214 -32
  48. package/scripts/dashboard-render-spec.js +1 -1
  49. package/scripts/dashboard.js +275 -9
  50. package/scripts/decision-journal.js +13 -3
  51. package/scripts/document-workflow-governance.js +62 -0
  52. package/scripts/enterprise-agent-rollout.js +34 -0
  53. package/scripts/experience-replay-governance.js +69 -0
  54. package/scripts/export-hf-dataset.js +1 -1
  55. package/scripts/feedback-loop.js +141 -9
  56. package/scripts/feedback-to-rules.js +17 -23
  57. package/scripts/gates-engine.js +4 -6
  58. package/scripts/growth-campaigns.js +49 -0
  59. package/scripts/harness-selector.js +145 -1
  60. package/scripts/hybrid-supervisor-agent.js +64 -0
  61. package/scripts/inference-cache-policy.js +72 -0
  62. package/scripts/inference-economics.js +53 -0
  63. package/scripts/internal-agent-bootstrap.js +12 -2
  64. package/scripts/knowledge-layer-plan.js +108 -0
  65. package/scripts/lesson-canonical.js +181 -0
  66. package/scripts/lesson-db.js +71 -10
  67. package/scripts/lesson-inference.js +183 -44
  68. package/scripts/lesson-search.js +4 -1
  69. package/scripts/lesson-synthesis.js +23 -2
  70. package/scripts/llm-client.js +157 -26
  71. package/scripts/mailer/resend-mailer.js +112 -1
  72. package/scripts/mcp-transport-strategy.js +66 -0
  73. package/scripts/memory-store-governance.js +60 -0
  74. package/scripts/meta-agent-loop.js +7 -13
  75. package/scripts/model-access-eligibility.js +38 -0
  76. package/scripts/model-migration-readiness.js +55 -0
  77. package/scripts/native-messaging-audit.js +514 -0
  78. package/scripts/operational-integrity.js +96 -3
  79. package/scripts/otel-declarative-config.js +56 -0
  80. package/scripts/perplexity-client.js +1 -1
  81. package/scripts/post-training-governance.js +34 -0
  82. package/scripts/pr-manager.js +47 -7
  83. package/scripts/private-core-boundary.js +72 -0
  84. package/scripts/production-agent-readiness.js +40 -0
  85. package/scripts/profile-router.js +16 -1
  86. package/scripts/prompt-eval.js +564 -32
  87. package/scripts/prompt-programs.js +93 -0
  88. package/scripts/provider-action-normalizer.js +585 -0
  89. package/scripts/rule-validator.js +285 -0
  90. package/scripts/scaling-law-claims.js +60 -0
  91. package/scripts/security-scanner.js +1 -1
  92. package/scripts/self-distill-agent.js +7 -32
  93. package/scripts/seo-gsd.js +400 -43
  94. package/scripts/skill-rag-router.js +53 -0
  95. package/scripts/spec-gate.js +1 -1
  96. package/scripts/student-consistent-training.js +73 -0
  97. package/scripts/synthetic-data-provenance.js +98 -0
  98. package/scripts/task-context-result.js +81 -0
  99. package/scripts/telemetry-analytics.js +149 -0
  100. package/scripts/thompson-sampling.js +2 -2
  101. package/scripts/token-savings.js +7 -6
  102. package/scripts/token-tco.js +46 -0
  103. package/scripts/tool-registry.js +75 -3
  104. package/scripts/verification-loop.js +10 -1
  105. package/scripts/verifier-scoring.js +71 -0
  106. package/scripts/workflow-sentinel.js +284 -28
  107. package/scripts/workspace-agent-routines.js +118 -0
  108. package/skills/thumbgate/SKILL.md +1 -1
  109. package/src/api/server.js +434 -120
  110. package/.claude-plugin/README.md +0 -170
  111. package/adapters/README.md +0 -12
  112. package/scripts/analytics-report.js +0 -328
  113. package/scripts/autonomous-workflow.js +0 -377
  114. package/scripts/billing-setup.js +0 -109
  115. package/scripts/creator-campaigns.js +0 -239
  116. package/scripts/cross-encoder-reranker.js +0 -235
  117. package/scripts/daemon-manager.js +0 -108
  118. package/scripts/decision-trace.js +0 -354
  119. package/scripts/delegation-runtime.js +0 -896
  120. package/scripts/dispatch-brief.js +0 -159
  121. package/scripts/distribution-surfaces.js +0 -110
  122. package/scripts/feedback-history-distiller.js +0 -382
  123. package/scripts/funnel-analytics.js +0 -35
  124. package/scripts/history-distiller.js +0 -200
  125. package/scripts/hosted-job-launcher.js +0 -256
  126. package/scripts/intent-router.js +0 -392
  127. package/scripts/lesson-reranker.js +0 -263
  128. package/scripts/lesson-retrieval.js +0 -148
  129. package/scripts/managed-lesson-agent.js +0 -183
  130. package/scripts/operational-dashboard.js +0 -103
  131. package/scripts/operational-summary.js +0 -129
  132. package/scripts/operator-artifacts.js +0 -608
  133. package/scripts/optimize-context.js +0 -17
  134. package/scripts/org-dashboard.js +0 -206
  135. package/scripts/partner-orchestration.js +0 -146
  136. package/scripts/predictive-insights.js +0 -356
  137. package/scripts/pulse.js +0 -80
  138. package/scripts/reflector-agent.js +0 -221
  139. package/scripts/sales-pipeline.js +0 -681
  140. package/scripts/session-episode-store.js +0 -329
  141. package/scripts/session-health-sensor.js +0 -242
  142. package/scripts/session-report.js +0 -120
  143. package/scripts/swarm-coordinator.js +0 -81
  144. package/scripts/tool-kpi-tracker.js +0 -12
  145. package/scripts/webhook-delivery.js +0 -62
  146. package/scripts/workflow-sprint-intake.js +0 -475
  147. package/skills/agent-memory/SKILL.md +0 -97
  148. package/skills/solve-architecture-autonomy/SKILL.md +0 -17
  149. package/skills/solve-architecture-autonomy/tool.js +0 -33
  150. package/skills/thumbgate-feedback/SKILL.md +0 -49
@@ -0,0 +1,285 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * scripts/rule-validator.js
5
+ *
6
+ * Pre-promotion validation harness for synthesized prevention rules.
7
+ *
8
+ * Why this exists:
9
+ * Before this module, `synthesizePreventionRule` (lesson-synthesis.js) auto-
10
+ * promoted any lesson that hit the occurrence threshold straight into
11
+ * `synthesized-rules.jsonl` — no check that the proposed rule actually
12
+ * matches the mistake pattern it was synthesized from, and no check that
13
+ * it doesn't also fire on recent positive-signal events from overlapping
14
+ * tags. That's the exact failure mode Autogenesis
15
+ * (https://arxiv.org/abs/2604.15034) calls out: candidate improvements
16
+ * must be validated through testing before integration, otherwise static
17
+ * agents accumulate self-contradicting rules that degrade precision.
18
+ *
19
+ * We already had 3 of the 4 Autogenesis phases:
20
+ * - capability-gap identification (negative feedback events),
21
+ * - candidate generation (synthesizePreventionRule),
22
+ * - integration (append to synthesized-rules.jsonl).
23
+ * The missing phase was validation. This module fills it.
24
+ *
25
+ * Validation contract:
26
+ * A proposed rule is promotable iff:
27
+ * 1. It matches the seed lesson that triggered promotion (otherwise the
28
+ * rule is tautologically broken — it wouldn't catch the mistake it
29
+ * was built for).
30
+ * 2. Its precision on a recent-events sample clears a threshold
31
+ * (default 0.8) — of the events the rule fires on, most must carry
32
+ * the negative signal. A rule that blocks positive outcomes too is
33
+ * a regression, not a prevention.
34
+ *
35
+ * Recall is reported for operator visibility but does not gate
36
+ * promotion — an overly specific rule is less harmful than an overly
37
+ * broad one.
38
+ *
39
+ * Design notes:
40
+ * - Pure functions, no IO. Caller supplies the event samples so tests
41
+ * stay hermetic and the validator can run inside captureFeedback
42
+ * without reaching for the filesystem.
43
+ * - Token matching is deliberately simple (lowercase, punctuation strip,
44
+ * length-2+ tokens, all-tokens-present) so the behavior is debuggable
45
+ * from the console. We are not competing with NLP — we are gating a
46
+ * one-line trigger string against a handful of sibling events.
47
+ */
48
+
49
+ // Intentionally tiny stop list — we only drop noise that would erase the
50
+ // trigger's discriminative tokens. If a stop-word-only rule ever matches a
51
+ // positive event, that's a real false positive and we want to see it.
52
+ const STOP = new Set([
53
+ 'a', 'an', 'the', 'to', 'of', 'in', 'on', 'at', 'for', 'and', 'or',
54
+ 'is', 'are', 'was', 'were', 'be', 'do', 'does', 'did',
55
+ 'this', 'that', 'these', 'those',
56
+ 'it', 'its', 'i', 'you', 'we', 'they',
57
+ ]);
58
+
59
+ // Modality / negation words that `synthesizePreventionRule` commonly
60
+ // inherits from lesson titles like "MISTAKE: never force-push". We want
61
+ // these tokens to survive ordinary tokenize() output (they're legitimate
62
+ // English), but we strip them from a rule's trigger before matching so
63
+ // the rule still fires on events that describe the mistake without
64
+ // echoing the modality. They remain meaningful in haystack positions.
65
+ const TRIGGER_MODALITY = new Set(['never', 'always', 'ever', 'must', 'not', 'no']);
66
+
67
+ /**
68
+ * Strip a few common English suffixes so "force-pushed" in a bug report
69
+ * matches a trigger token "push". We are NOT doing Porter-grade stemming;
70
+ * the goal is just to keep morphological variants from silently breaking
71
+ * the matcher. Minimum 3-char stem preserved so "goes" → "goe" (harmless)
72
+ * but "is" / "as" stay intact.
73
+ */
74
+ function stem(token) {
75
+ if (token.length <= 3) return token;
76
+ if (token.endsWith('ing') && token.length > 5) return token.slice(0, -3);
77
+ if (token.endsWith('ed') && token.length > 4) return token.slice(0, -2);
78
+ if (token.endsWith('es') && token.length > 4) return token.slice(0, -2);
79
+ if (token.endsWith('s') && !token.endsWith('ss') && token.length > 3) {
80
+ return token.slice(0, -1);
81
+ }
82
+ return token;
83
+ }
84
+
85
+ function tokenize(text) {
86
+ if (text === null || text === undefined) return [];
87
+ return String(text)
88
+ .toLowerCase()
89
+ .replace(/[^a-z0-9\s]/g, ' ')
90
+ .split(/\s+/)
91
+ .filter((t) => t.length > 1 && !STOP.has(t))
92
+ .map(stem);
93
+ }
94
+
95
+ function eventText(event) {
96
+ if (!event || typeof event !== 'object') return '';
97
+ return [
98
+ event.title,
99
+ event.content,
100
+ event.whatToChange,
101
+ event.whatWentWrong,
102
+ event.whatWorked,
103
+ event.context,
104
+ ].filter(Boolean).join(' ');
105
+ }
106
+
107
+ function eventSignal(event) {
108
+ if (!event || typeof event !== 'object') return null;
109
+ const raw = event.signal;
110
+ if (!raw) return null;
111
+ const lower = String(raw).toLowerCase();
112
+ if (lower === 'up' || lower === 'positive') return 'positive';
113
+ if (lower === 'down' || lower === 'negative') return 'negative';
114
+ return lower;
115
+ }
116
+
117
+ /**
118
+ * Does `rule` fire on `event`? A rule fires when every content token of
119
+ * its trigger.condition appears in the event's combined text **in the
120
+ * same relative order** (subsequence match). An empty trigger never fires
121
+ * — that's a degenerate rule and we want the validator to reject it
122
+ * rather than silently match everything.
123
+ *
124
+ * Order matters because it's the cheapest way to distinguish
125
+ * "force-push to main caused incident" (trigger condition narrates the
126
+ * action) from "main branch healthy, no force push" (same tokens, wrong
127
+ * narrative). Without order we'd flag the second event as a false
128
+ * positive against every rule built on the same vocabulary.
129
+ */
130
+ function ruleMatches(rule, event) {
131
+ const trigger = rule && rule.rule && rule.rule.trigger && rule.rule.trigger.condition;
132
+ const rawTokens = tokenize(trigger);
133
+ const tokens = rawTokens.filter((t) => !TRIGGER_MODALITY.has(t));
134
+ if (tokens.length === 0) return false;
135
+
136
+ const haystack = tokenize(eventText(event));
137
+ let hi = 0;
138
+ for (const t of tokens) {
139
+ while (hi < haystack.length && haystack[hi] !== t) hi += 1;
140
+ if (hi >= haystack.length) return false;
141
+ hi += 1;
142
+ }
143
+ return true;
144
+ }
145
+
146
+ /**
147
+ * Count true-positive / false-positive / false-negative / true-negative
148
+ * firings on a sample. Tags are used to scope the sample — only events
149
+ * that share at least one tag with the rule are considered, on the premise
150
+ * that a rule about git force-push shouldn't be precision-scored against
151
+ * deploy-pipeline events it was never meant to see.
152
+ */
153
+ function scoreOnSample(rule, events, { scopeTags = null } = {}) {
154
+ const ruleTags = new Set((rule.tags || []).filter(Boolean).map((t) => String(t).toLowerCase()));
155
+ const scope = scopeTags ? new Set(scopeTags.map((t) => String(t).toLowerCase())) : null;
156
+
157
+ let tp = 0;
158
+ let fp = 0;
159
+ let fn = 0;
160
+ let tn = 0;
161
+
162
+ for (const event of Array.isArray(events) ? events : []) {
163
+ const tags = Array.isArray(event.tags)
164
+ ? event.tags.map((t) => String(t).toLowerCase())
165
+ : [];
166
+
167
+ // Out-of-scope events are ignored — they have nothing to say about
168
+ // this rule's precision.
169
+ if (scope && tags.length > 0 && !tags.some((t) => scope.has(t))) continue;
170
+ if (ruleTags.size > 0 && tags.length > 0 && !tags.some((t) => ruleTags.has(t))) continue;
171
+
172
+ const fires = ruleMatches(rule, event);
173
+ const signal = eventSignal(event);
174
+
175
+ if (signal === 'negative' && fires) tp += 1;
176
+ else if (signal === 'positive' && fires) fp += 1;
177
+ else if (signal === 'negative' && !fires) fn += 1;
178
+ else if (signal === 'positive' && !fires) tn += 1;
179
+ }
180
+
181
+ const firings = tp + fp;
182
+ const negatives = tp + fn;
183
+ return {
184
+ tp,
185
+ fp,
186
+ fn,
187
+ tn,
188
+ precision: firings > 0 ? tp / firings : null,
189
+ recall: negatives > 0 ? tp / negatives : null,
190
+ };
191
+ }
192
+
193
+ const DEFAULT_PRECISION_FLOOR = 0.8;
194
+ const DEFAULT_MIN_SAMPLE = 3;
195
+
196
+ /**
197
+ * Top-level validator. Returns a detailed report plus a boolean
198
+ * `shouldPromote`. The caller (feedback-loop) stamps the report onto the
199
+ * rule record so downstream operators can see why a rule was or wasn't
200
+ * promoted — silent rejection is worse than a rejected rule we can audit.
201
+ *
202
+ * Thresholds are overridable but the defaults are deliberately loose for
203
+ * Stage-1 rollout: precision ≥ 0.8, with a minimum of 3 sampled events in
204
+ * scope. Below the minimum sample, the validator promotes the rule but
205
+ * flags `reason: 'insufficient_sample'` so we don't starve the gate of new
206
+ * rules while feedback volume is still small.
207
+ */
208
+ function validateProposedRule(rule, {
209
+ seedLesson,
210
+ recentEvents = [],
211
+ precisionFloor = DEFAULT_PRECISION_FLOOR,
212
+ minSample = DEFAULT_MIN_SAMPLE,
213
+ } = {}) {
214
+ const report = {
215
+ shouldPromote: false,
216
+ reason: null,
217
+ matchesSeed: false,
218
+ precision: null,
219
+ recall: null,
220
+ sampleSize: 0,
221
+ tp: 0,
222
+ fp: 0,
223
+ fn: 0,
224
+ tn: 0,
225
+ };
226
+
227
+ if (!rule || !rule.rule) {
228
+ report.reason = 'invalid_rule_shape';
229
+ return report;
230
+ }
231
+
232
+ // Invariant 1: the rule must fire on the seed lesson. If it doesn't, the
233
+ // trigger extraction dropped the discriminative tokens and the rule is
234
+ // broken regardless of what the sample says.
235
+ report.matchesSeed = seedLesson ? ruleMatches(rule, seedLesson) : false;
236
+ if (!report.matchesSeed) {
237
+ report.reason = 'rule_does_not_match_seed_lesson';
238
+ return report;
239
+ }
240
+
241
+ // Invariant 2: precision on recent overlapping-tag events. We pass
242
+ // scopeTags = rule.tags so the scorer restricts to the same topical
243
+ // cluster as the rule.
244
+ const scoreReport = scoreOnSample(rule, recentEvents, { scopeTags: rule.tags });
245
+ Object.assign(report, scoreReport);
246
+ report.sampleSize = scoreReport.tp + scoreReport.fp + scoreReport.fn + scoreReport.tn;
247
+
248
+ if (report.sampleSize < minSample) {
249
+ // Permissive path: we can't prove harm, so allow promotion but flag
250
+ // the rule for later audit when more data accumulates.
251
+ report.shouldPromote = true;
252
+ report.reason = 'insufficient_sample';
253
+ return report;
254
+ }
255
+
256
+ if (report.precision === null) {
257
+ // Rule never fired on the in-scope sample. Still worth promoting
258
+ // because the seed invariant held — absence of firings just means
259
+ // this topic is quiet in recent history.
260
+ report.shouldPromote = true;
261
+ report.reason = 'no_firings_in_sample';
262
+ return report;
263
+ }
264
+
265
+ if (report.precision < precisionFloor) {
266
+ report.shouldPromote = false;
267
+ report.reason = 'precision_below_floor';
268
+ return report;
269
+ }
270
+
271
+ report.shouldPromote = true;
272
+ report.reason = 'validated';
273
+ return report;
274
+ }
275
+
276
+ module.exports = {
277
+ tokenize,
278
+ eventText,
279
+ eventSignal,
280
+ ruleMatches,
281
+ scoreOnSample,
282
+ validateProposedRule,
283
+ DEFAULT_PRECISION_FLOOR,
284
+ DEFAULT_MIN_SAMPLE,
285
+ };
@@ -0,0 +1,60 @@
1
+ #!/usr/bin/env node
2
+ 'use strict';
3
+
4
+ function normalizeText(value) {
5
+ if (value === undefined || value === null) return '';
6
+ return String(value).trim();
7
+ }
8
+
9
+ function classifyScalingClaim(claim) {
10
+ const text = normalizeText(claim).toLowerCase();
11
+ if (/\b(pretrain|pretraining|parameters|training tokens|flops|cross entropy|test loss)\b/.test(text)) {
12
+ return 'pretraining_scaling';
13
+ }
14
+ if (/\b(rl|reinforcement|feedback|dpo|kto|reward|policy|thumbs[-\s]?(up|down)|gate|prevention rule)\b/.test(text)) {
15
+ return 'feedback_policy_scaling';
16
+ }
17
+ return 'general_scaling';
18
+ }
19
+
20
+ function evaluateScalingClaim(input = {}) {
21
+ const claim = normalizeText(input.claim);
22
+ const claimType = classifyScalingClaim(claim);
23
+ const evidence = Array.isArray(input.evidence) ? input.evidence.filter(Boolean) : [];
24
+ const heldout = evidence.some((entry) => /held[-\s]?out|validation|eval|ablation|backtest/i.test(String(entry)));
25
+ const production = evidence.some((entry) => /production|real user|workflow run|decision journal|blocked action/i.test(String(entry)));
26
+ const rlCompute = evidence.some((entry) => /sampling compute|rollout|trajectory|policy update|reward model|rl compute/i.test(String(entry)));
27
+ const sampling = evidence.some((entry) => /pass@|best-of-n|majority vote|sample budget|sampling/i.test(String(entry)));
28
+ const issues = [];
29
+
30
+ if (!claim) issues.push('missing_claim');
31
+ if (claimType === 'feedback_policy_scaling' && !heldout) {
32
+ issues.push('missing_heldout_feedback_eval');
33
+ }
34
+ if (claimType === 'feedback_policy_scaling' && /rl|reinforcement|sampling/i.test(claim) && !rlCompute) {
35
+ issues.push('missing_rl_compute_evidence');
36
+ }
37
+ if (claimType === 'feedback_policy_scaling' && /sampling|best-of|vote|pass@/i.test(claim) && !sampling) {
38
+ issues.push('missing_sampling_budget_evidence');
39
+ }
40
+ if (claimType === 'pretraining_scaling' && evidence.length === 0) {
41
+ issues.push('missing_model_scaling_evidence');
42
+ }
43
+ if (/guarantee|always|never|100%|proves?/i.test(claim) && !production) {
44
+ issues.push('absolute_claim_without_production_evidence');
45
+ }
46
+
47
+ return {
48
+ claimType,
49
+ decision: issues.length === 0 ? 'allow' : 'warn',
50
+ issues,
51
+ requiredEvidence: claimType === 'feedback_policy_scaling'
52
+ ? ['held-out eval', 'ablation or backtest', 'RL/sampling compute budget when claimed', 'decision-journal production sample']
53
+ : ['source data', 'validation metric', 'scope limits'],
54
+ };
55
+ }
56
+
57
+ module.exports = {
58
+ classifyScalingClaim,
59
+ evaluateScalingClaim,
60
+ };
@@ -2,7 +2,7 @@
2
2
  'use strict';
3
3
 
4
4
  /**
5
- * Security Scanner — OWASP-aware static analysis for PreToolUse gates.
5
+ * Security Scanner — OWASP-aware static analysis for PreToolUse checks.
6
6
  *
7
7
  * Scans code being written/edited by AI agents for common vulnerability
8
8
  * patterns (injection, XSS, path traversal, etc.) and suspicious dependency
@@ -349,39 +349,14 @@ Return JSON only, no markdown fences:
349
349
  Focus on actionable, specific lessons. Ignore trivial interactions.`;
350
350
 
351
351
  async function callAnthropicApi(conversationText, model) {
352
- const apiKey = process.env.ANTHROPIC_API_KEY;
353
- if (!apiKey) return null;
354
-
355
- const body = JSON.stringify({
356
- model: model || 'claude-sonnet-4-20250514',
357
- max_tokens: 2048,
358
- system: LLM_SYSTEM_PROMPT,
359
- messages: [
360
- { role: 'user', content: `Analyze this conversation window and extract lessons:\n\n${conversationText}` },
361
- ],
352
+ const { callClaudeJson, MODELS } = require('./llm-client');
353
+ return callClaudeJson({
354
+ model: model || MODELS.SMART,
355
+ maxTokens: 2048,
356
+ systemPrompt: LLM_SYSTEM_PROMPT,
357
+ userPrompt: `Analyze this conversation window and extract lessons:\n\n${conversationText}`,
358
+ cache: true,
362
359
  });
363
-
364
- try {
365
- const resp = await fetch('https://api.anthropic.com/v1/messages', {
366
- method: 'POST',
367
- headers: {
368
- 'Content-Type': 'application/json',
369
- 'x-api-key': apiKey,
370
- 'anthropic-version': '2023-06-01',
371
- },
372
- body,
373
- });
374
-
375
- if (!resp.ok) return null;
376
-
377
- const data = await resp.json();
378
- const text = (data.content && data.content[0] && data.content[0].text) || '';
379
- // Strip markdown fences if present
380
- const cleaned = text.replace(/^```(?:json)?\s*/m, '').replace(/```\s*$/m, '').trim();
381
- return JSON.parse(cleaned);
382
- } catch {
383
- return null;
384
- }
385
360
  }
386
361
 
387
362
  async function generateLlmLessons(conversationWindow, model) {