mark-improving-agent 2.2.4 → 2.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/VERSION CHANGED
@@ -1 +1 @@
1
- 2.2.4
1
+ 2.2.6
@@ -2,3 +2,4 @@ export * from './multi-agent.js';
2
2
  export * from './agentic-loop.js';
3
3
  export * from './multi-agent-system.js';
4
4
  export { createMCPProtocol } from './mcp-protocol.js';
5
+ export { createPeerReviewSystem } from './peer-review.js';
@@ -0,0 +1,265 @@
1
+ /**
2
+ * Multi-Agent Peer Review System
3
+ *
4
+ * Enables cross-model peer review of agent decisions and outputs.
5
+ * Based on agentic-fleet-hub's peer review architecture.
6
+ *
7
+ * Key features:
8
+ * - Multiple agents review each other's work
9
+ * - Consensus-based approval
10
+ * - Dissent tracking for quality improvement
11
+ * - Reputation-weighted voting
12
+ *
13
+ * @module core/collaboration
14
+ * @fileoverview Cross-model peer review for agent outputs
15
+ */
16
+ import { randomUUID } from 'crypto';
17
+ import { createLogger } from '../../utils/logger.js';
18
+ const logger = createLogger('PeerReview');
19
+ /**
20
+ * Default reviewers
21
+ */
22
+ const DEFAULT_REVIEWERS = [
23
+ {
24
+ id: 'reviewer-logic',
25
+ name: 'Logic Reviewer',
26
+ model: 'claude-opus',
27
+ specialties: ['reasoning', 'logic', 'consistency'],
28
+ reputation: 0.95,
29
+ reviewsCompleted: 0,
30
+ approvalRate: 0,
31
+ },
32
+ {
33
+ id: 'reviewer-safety',
34
+ name: 'Safety Reviewer',
35
+ model: 'claude-opus',
36
+ specialties: ['safety', 'ethics', 'harm prevention'],
37
+ reputation: 0.98,
38
+ reviewsCompleted: 0,
39
+ approvalRate: 0,
40
+ },
41
+ {
42
+ id: 'reviewer-quality',
43
+ name: 'Quality Reviewer',
44
+ model: 'claude-sonnet',
45
+ specialties: ['code quality', 'documentation', 'best practices'],
46
+ reputation: 0.92,
47
+ reviewsCompleted: 0,
48
+ approvalRate: 0,
49
+ },
50
+ {
51
+ id: 'reviewer-creativity',
52
+ name: 'Creativity Reviewer',
53
+ model: 'claude-haiku',
54
+ specialties: ['innovation', 'alternatives', 'creative solutions'],
55
+ reputation: 0.88,
56
+ reviewsCompleted: 0,
57
+ approvalRate: 0,
58
+ },
59
+ ];
60
+ export function createPeerReviewSystem(options) {
61
+ const consensusThreshold = options?.consensusThreshold ?? 0.7;
62
+ const requiredReviewers = options?.requiredReviewers ?? 3;
63
+ const enableArbitration = options?.enableArbitration ?? true;
64
+ // State
65
+ const items = new Map();
66
+ const reviewers = new Map(DEFAULT_REVIEWERS.map(r => [r.id, r]));
67
+ const sessions = new Map();
68
+ const reviewHistory = [];
69
+ // Stats
70
+ let totalReviewTime = 0;
71
+ let consensusCount = 0;
72
+ let revisionCount = 0;
73
+ function submitItem(submitterId, content, type, context, options) {
74
+ const item = {
75
+ id: randomUUID(),
76
+ submitterId,
77
+ content,
78
+ type,
79
+ context: context ?? {},
80
+ status: 'pending',
81
+ votes: [],
82
+ createdAt: Date.now(),
83
+ consensusThreshold: options?.consensusThreshold ?? consensusThreshold,
84
+ requiredReviewers: options?.requiredReviewers ?? requiredReviewers,
85
+ };
86
+ items.set(item.id, item);
87
+ logger.info(`Review item submitted: ${item.id} by ${submitterId}`);
88
+ // Auto-assign reviewers
89
+ assignReviewers(item.id);
90
+ return item;
91
+ }
92
+ function registerReviewer(reviewer) {
93
+ reviewers.set(reviewer.id, {
94
+ ...reviewer,
95
+ reviewsCompleted: 0,
96
+ approvalRate: 0,
97
+ });
98
+ logger.info(`Reviewer registered: ${reviewer.name}`);
99
+ }
100
+ function getReviewersForType(contentType) {
101
+ const relevantReviewers = Array.from(reviewers.values()).filter(r => r.specialties.some(s => contentType === 'code' ? s.includes('code') :
102
+ contentType === 'decision' ? s.includes('reasoning') || s.includes('logic') :
103
+ contentType === 'response' ? s.includes('ethics') || s.includes('safety') :
104
+ true));
105
+ // Return up to 4 reviewers
106
+ return relevantReviewers.slice(0, 4);
107
+ }
108
+ function assignReviewers(itemId) {
109
+ const item = items.get(itemId);
110
+ if (!item || item.status !== 'pending')
111
+ return null;
112
+ const availableReviewers = getReviewersForType(item.type)
113
+ .filter(r => r.id !== item.submitterId)
114
+ .slice(0, item.requiredReviewers);
115
+ if (availableReviewers.length < 2) {
116
+ // Use default reviewers if no specialty match
117
+ const defaults = Array.from(reviewers.values())
118
+ .filter(r => r.id !== item.submitterId)
119
+ .slice(0, item.requiredReviewers);
120
+ availableReviewers.push(...defaults);
121
+ }
122
+ const session = {
123
+ itemId,
124
+ phase: 'review',
125
+ assignedReviewers: availableReviewers.map(r => r.id),
126
+ completedReviews: 0,
127
+ consensusReached: false,
128
+ };
129
+ sessions.set(itemId, session);
130
+ item.status = 'in_review';
131
+ logger.info(`Assigned ${availableReviewers.length} reviewers to ${itemId}`);
132
+ return item;
133
+ }
134
+ function submitVote(itemId, reviewerId, decision, feedback, confidence) {
135
+ const item = items.get(itemId);
136
+ const session = sessions.get(itemId);
137
+ const reviewer = reviewers.get(reviewerId);
138
+ if (!item || !session || !reviewer) {
139
+ logger.warn(`Invalid vote submission: item=${itemId}, reviewer=${reviewerId}`);
140
+ return null;
141
+ }
142
+ // Check if reviewer is assigned
143
+ if (!session.assignedReviewers.includes(reviewerId)) {
144
+ logger.warn(`Reviewer ${reviewerId} not assigned to ${itemId}`);
145
+ return null;
146
+ }
147
+ // Check if reviewer already voted
148
+ if (item.votes.some(v => v.reviewerId === reviewerId)) {
149
+ logger.warn(`Reviewer ${reviewerId} already voted on ${itemId}`);
150
+ return null;
151
+ }
152
+ const vote = {
153
+ reviewerId,
154
+ role: session.completedReviews === 0 ? 'primary' : 'secondary',
155
+ decision,
156
+ confidence: Math.max(0, Math.min(1, confidence)),
157
+ feedback,
158
+ timestamp: Date.now(),
159
+ };
160
+ item.votes.push(vote);
161
+ session.completedReviews++;
162
+ reviewHistory.push(vote);
163
+ // Update reviewer stats
164
+ reviewer.reviewsCompleted++;
165
+ logger.info(`Vote submitted: ${reviewerId} -> ${decision} on ${itemId}`);
166
+ // Check consensus
167
+ const consensus = checkConsensus(itemId);
168
+ if (consensus.reached && consensus.decision) {
169
+ item.status = consensus.decision;
170
+ item.completedAt = Date.now();
171
+ session.consensusReached = true;
172
+ session.decision = consensus.decision === 'approved' ? 'approved' :
173
+ consensus.decision === 'rejected' ? 'rejected' : 'revision_requested';
174
+ if (consensus.decision === 'revision_requested') {
175
+ revisionCount++;
176
+ }
177
+ consensusCount++;
178
+ }
179
+ return vote;
180
+ }
181
+ function checkConsensus(itemId) {
182
+ const item = items.get(itemId);
183
+ if (!item)
184
+ return { reached: false };
185
+ const requiredApprovals = Math.ceil(item.requiredReviewers * item.consensusThreshold);
186
+ const votes = item.votes;
187
+ // Need minimum votes
188
+ if (votes.length < item.requiredReviewers) {
189
+ return { reached: false };
190
+ }
191
+ // Count decisions
192
+ const approvals = votes.filter(v => v.decision === 'approve').length;
193
+ const rejections = votes.filter(v => v.decision === 'reject').length;
194
+ const revisions = votes.filter(v => v.decision === 'revision').length;
195
+ // Reputation-weighted voting
196
+ let weightedApprovals = 0;
197
+ let totalWeight = 0;
198
+ for (const vote of votes) {
199
+ const reviewer = reviewers.get(vote.reviewerId);
200
+ if (reviewer) {
201
+ const weight = reviewer.reputation * vote.confidence;
202
+ totalWeight += weight;
203
+ if (vote.decision === 'approve') {
204
+ weightedApprovals += weight;
205
+ }
206
+ }
207
+ }
208
+ const weightedApprovalRate = totalWeight > 0 ? weightedApprovals / totalWeight : 0;
209
+ if (weightedApprovalRate >= item.consensusThreshold) {
210
+ return { reached: true, decision: 'approved' };
211
+ }
212
+ if (rejections > item.requiredReviewers / 2) {
213
+ // Check for revision option before outright rejection
214
+ if (enableArbitration && revisions > 0) {
215
+ return { reached: true, decision: 'revision_requested' };
216
+ }
217
+ return { reached: true, decision: 'rejected' };
218
+ }
219
+ // No consensus yet
220
+ return { reached: false };
221
+ }
222
+ function getItemStatus(itemId) {
223
+ return items.get(itemId);
224
+ }
225
+ function getPendingReviews(reviewerId) {
226
+ return Array.from(items.values()).filter(item => {
227
+ const session = sessions.get(item.id);
228
+ return (item.status === 'in_review' &&
229
+ session?.assignedReviewers.includes(reviewerId) &&
230
+ !item.votes.some(v => v.reviewerId === reviewerId));
231
+ });
232
+ }
233
+ function getStats() {
234
+ const allItems = Array.from(items.values());
235
+ const completedItems = allItems.filter(i => i.completedAt);
236
+ const avgReviewTime = completedItems.length > 0
237
+ ? totalReviewTime / completedItems.length
238
+ : 0;
239
+ return {
240
+ itemsReviewed: completedItems.length,
241
+ approvalRate: completedItems.length > 0
242
+ ? completedItems.filter(i => i.status === 'approved').length / completedItems.length
243
+ : 0,
244
+ avgReviewTime,
245
+ consensusRate: completedItems.length > 0
246
+ ? consensusCount / completedItems.length
247
+ : 0,
248
+ revisionRate: completedItems.length > 0
249
+ ? revisionCount / completedItems.length
250
+ : 0,
251
+ dissentCount: reviewHistory.filter(v => v.decision !== 'approve').length,
252
+ };
253
+ }
254
+ return {
255
+ submitItem,
256
+ registerReviewer,
257
+ getReviewersForType,
258
+ assignReviewers,
259
+ submitVote,
260
+ checkConsensus,
261
+ getItemStatus,
262
+ getPendingReviews,
263
+ getStats,
264
+ };
265
+ }
@@ -0,0 +1,596 @@
1
+ /**
2
+ * Expert Mental Models Integration
3
+ *
4
+ * Loads expert mental models into HeartFlow to improve decisions,
5
+ * reasoning, and judgment without role-playing personas.
6
+ *
7
+ * Based on the expert-skills pattern: mental models provide
8
+ * structured thinking frameworks that enhance AI reasoning.
9
+ *
10
+ * Key features:
11
+ * - Mental model registry with predefined expert thinking patterns
12
+ * - Model application to any context or decision
13
+ * - Model blending for multi-perspective analysis
14
+ * - Learning from model application outcomes
15
+ * - Model performance tracking
16
+ *
17
+ * @module core/expert-models
18
+ * @fileoverview Expert mental model integration for enhanced reasoning
19
+ */
20
+ import { randomUUID } from 'crypto';
21
+ import { createLogger } from '../../utils/logger.js';
22
+ const logger = createLogger('[ExpertModels]');
23
+ const DEFAULT_CONFIG = {
24
+ autoSuggest: true,
25
+ minConfidence: 0.6,
26
+ trackPerformance: true,
27
+ allowBlending: true,
28
+ defaultBlendWeights: [0.5, 0.3, 0.2],
29
+ };
30
+ // ============================================================
31
+ // Predefined Mental Models
32
+ // ============================================================
33
+ const PREDEFINED_MODELS = [
34
+ {
35
+ name: 'First Principles Thinking',
36
+ description: 'Break down problems to their fundamental components and rebuild from there',
37
+ source: 'Aristotle / Elon Musk',
38
+ principles: [
39
+ 'Identify the current assumption',
40
+ 'Break it down to fundamental truths',
41
+ 'Create new solutions from scratch',
42
+ 'Test and iterate',
43
+ ],
44
+ applicationSteps: [
45
+ 'State the problem as commonly understood',
46
+ 'Ask "Is this really true?" for each component',
47
+ 'Break down to facts that cannot be reduced further',
48
+ 'Build new reasoning from these foundations',
49
+ 'Test the new solution',
50
+ ],
51
+ 适用场景: ['Innovation', 'Problem-solving', 'Challenging status quo'],
52
+ strengths: ['Eliminates assumptions', 'Enables breakthrough thinking', 'Reduces bias'],
53
+ weaknesses: ['Time-consuming', 'May miss practical constraints', 'Requires deep expertise'],
54
+ examples: ['SpaceX rocket costs', 'Tesla battery technology', 'Business model innovation'],
55
+ complexity: 4,
56
+ category: 'critical-thinking',
57
+ },
58
+ {
59
+ name: 'Inversion',
60
+ description: 'Think about the problem backwards to identify what to avoid',
61
+ source: 'Charlie Munger',
62
+ principles: [
63
+ 'Identify what you want to achieve',
64
+ 'Instead of asking how to succeed, ask how to fail',
65
+ 'Avoid those failure paths',
66
+ 'Focus on the inverse of problems',
67
+ ],
68
+ applicationSteps: [
69
+ 'Clearly state the goal',
70
+ 'List ways the goal could fail',
71
+ 'Identify the root causes of potential failure',
72
+ 'Create actions to prevent those failures',
73
+ 'Prioritize avoiding bad outcomes',
74
+ ],
75
+ 适用场景: ['Risk management', 'Strategic planning', 'Decision validation'],
76
+ strengths: ['Reveals hidden risks', 'Prevents hubris', 'Highlights what to avoid'],
77
+ weaknesses: ['Can be overly pessimistic', 'May miss opportunities', 'Focuses on negatives'],
78
+ examples: ['Avoiding stupid decisions', 'Risk assessment', 'Portfolio management'],
79
+ complexity: 3,
80
+ category: 'decision-making',
81
+ },
82
+ {
83
+ name: 'Second Order Thinking',
84
+ description: 'Consider the consequences of the consequences',
85
+ source: 'Howard Marks / Ray Dalio',
86
+ principles: [
87
+ 'First order: immediate result',
88
+ 'Second order: what follows from the first result',
89
+ 'Third order: long-term effects',
90
+ 'Most people stop at first order',
91
+ ],
92
+ applicationSteps: [
93
+ 'Identify the obvious first-order consequence',
94
+ 'Ask "And then?" for each subsequent effect',
95
+ 'Map out 2-3 levels of consequences',
96
+ 'Evaluate the full chain of events',
97
+ 'Make decision based on second/third order effects',
98
+ ],
99
+ 适用场景: ['Impact analysis', 'Long-term planning', 'Policy decisions'],
100
+ strengths: ['Prevents short-term thinking', 'Reveals hidden consequences', 'Improves foresight'],
101
+ weaknesses: ['Can lead to analysis paralysis', 'Hard to predict accurately', 'Uncertainty increases'],
102
+ examples: ['Economic policy', 'Technology adoption', 'Environmental decisions'],
103
+ complexity: 4,
104
+ category: 'systems-thinking',
105
+ },
106
+ {
107
+ name: 'Circle of Competence',
108
+ description: 'Know your boundaries and stay within them',
109
+ source: 'Warren Buffett',
110
+ principles: [
111
+ 'Recognize what you truly understand',
112
+ 'Identify areas where you have expertise',
113
+ 'Be honest about ignorance',
114
+ 'Stay within known boundaries',
115
+ 'Expand boundaries deliberately',
116
+ ],
117
+ applicationSteps: [
118
+ 'List areas of genuine expertise',
119
+ 'Identify knowledge gaps honestly',
120
+ 'When facing decisions, check if in circle',
121
+ 'If outside, seek expert input or abstain',
122
+ 'Gradually expand circle through learning',
123
+ ],
124
+ 适用场景: ['Investment decisions', 'Expert consultation', 'Self-awareness'],
125
+ strengths: ['Prevents overconfidence', 'Encourages humility', 'Focuses resources'],
126
+ weaknesses: ['Can be limiting', 'Circle boundaries unclear', 'May miss opportunities'],
127
+ examples: ['Buffett investment strategy', 'Professional specialization', 'Team building'],
128
+ complexity: 2,
129
+ category: 'self-awareness',
130
+ },
131
+ {
132
+ name: 'Probabilistic Thinking',
133
+ description: 'Think in probabilities rather than certainties',
134
+ source: 'Nassim Taleb / Nate Silver',
135
+ principles: [
136
+ 'Assign probabilities to outcomes',
137
+ 'Update beliefs with new evidence (Bayesian)',
138
+ 'Consider base rates',
139
+ 'Think in expected value',
140
+ 'Embrace uncertainty',
141
+ ],
142
+ applicationSteps: [
143
+ 'List possible outcomes',
144
+ 'Estimate probability of each',
145
+ 'Calculate expected value (P × outcome)',
146
+ 'Update with new information',
147
+ 'Choose highest expected value option',
148
+ ],
149
+ 适用场景: ['Risk assessment', 'Forecasting', 'Decision under uncertainty'],
150
+ strengths: ['Accounts for uncertainty', 'Prevents binary thinking', 'Quantifies risk'],
151
+ weaknesses: ['Requires accurate probability estimates', 'Can be gamed', 'Base rate neglect'],
152
+ examples: ['Weather forecasting', 'Sports betting', 'Project estimation'],
153
+ complexity: 4,
154
+ category: 'analytical',
155
+ },
156
+ {
157
+ name: 'Regret Minimization',
158
+ description: 'Minimize future regret rather than optimize present comfort',
159
+ source: 'Jeff Bezos',
160
+ principles: [
161
+ 'Project yourself to end of life',
162
+ 'Ask what you would regret not doing',
163
+ 'Prioritize bold moves for low regret',
164
+ 'Accept that regret is inevitable',
165
+ 'Take the path with least potential regret',
166
+ ],
167
+ applicationSteps: [
168
+ 'Imagine yourself at 80 years old',
169
+ 'Look back at this decision point',
170
+ 'Ask: "Will I regret not trying?"',
171
+ 'Weight long-term regret over short-term comfort',
172
+ 'Take the bolder action if regret is likely',
173
+ ],
174
+ 适用场景: ['Career decisions', 'Major life choices', 'Entrepreneurship'],
175
+ strengths: ['Encourages boldness', 'Long-term perspective', 'Overcomes fear'],
176
+ weaknesses: ['Can justify recklessness', 'Ignores practical constraints', 'Emotional basis'],
177
+ examples: ['Bezos Amazon decision', 'Career pivots', 'Startup founding'],
178
+ complexity: 2,
179
+ category: 'decision-making',
180
+ },
181
+ {
182
+ name: "Occam's Razor",
183
+ description: 'The simplest explanation is usually correct',
184
+ source: 'William of Ockham',
185
+ principles: [
186
+ 'Prefer simplicity over complexity',
187
+ 'When multiple explanations exist, choose simplest',
188
+ 'Entities should not be multiplied unnecessarily',
189
+ 'Test simple explanations first',
190
+ 'Complexity requires justification',
191
+ ],
192
+ applicationSteps: [
193
+ 'Gather all possible explanations',
194
+ 'Evaluate complexity of each',
195
+ 'Remove unnecessary assumptions',
196
+ 'Select the simplest that fits facts',
197
+ 'Only add complexity if evidence demands it',
198
+ ],
199
+ 适用场景: ['Diagnosis', 'Problem diagnosis', 'Theory building'],
200
+ strengths: ['Parsimonious', 'Practical', 'Avoids overfitting'],
201
+ weaknesses: ['Truth may be complex', 'Can oversimplify', 'Bias toward familiar'],
202
+ examples: ['Medical diagnosis', 'Troubleshooting', 'Scientific hypothesis'],
203
+ complexity: 2,
204
+ category: 'analytical',
205
+ },
206
+ {
207
+ name: "Hanlon's Razor",
208
+ description: 'Never attribute to malice that which is adequately explained by incompetence',
209
+ source: 'Robert Hanlon',
210
+ principles: [
211
+ 'Assume incompetence before malice',
212
+ 'Look for simple explanations first',
213
+ 'Consider communication failures',
214
+ 'Avoid unnecessary negative interpretations',
215
+ 'Give benefit of the doubt',
216
+ ],
217
+ applicationSteps: [
218
+ 'Observe an action that seems harmful',
219
+ 'Ask: is there a simple explanation?',
220
+ 'Consider information gaps or confusion',
221
+ 'Only assume malice if no other explanation fits',
222
+ 'Address the incompetence, not the person',
223
+ ],
224
+ 适用场景: ['Conflict resolution', 'Team dynamics', 'Communication'],
225
+ strengths: ['Preserves relationships', 'Avoids unnecessary conflict', 'Practical'],
226
+ weaknesses: ['May miss actual bad actors', 'Can excuse abuse', 'Naive in some contexts'],
227
+ examples: ['Office politics', 'Customer complaints', 'Cross-cultural communication'],
228
+ complexity: 2,
229
+ category: 'ethical',
230
+ },
231
+ {
232
+ name: 'Map and Territory',
233
+ description: 'Distinguish between your model of reality and reality itself',
234
+ source: 'Nassim Taleb',
235
+ principles: [
236
+ 'Your mental map is not the territory',
237
+ 'Models are simplifications of reality',
238
+ 'Reality is always more complex',
239
+ 'Update maps when territory disagrees',
240
+ 'Be humble about model accuracy',
241
+ ],
242
+ applicationSteps: [
243
+ 'Identify your current model/belief',
244
+ 'Acknowledge it is a simplification',
245
+ 'Compare to actual outcomes/evidence',
246
+ 'Note where model diverges from reality',
247
+ 'Update model to better fit territory',
248
+ ],
249
+ 适用场景: ['Belief revision', 'Forecast evaluation', 'Self-awareness'],
250
+ strengths: ['Promotes epistemic humility', 'Encourages testing', 'Reduces overconfidence'],
251
+ weaknesses: ['Can lead to perpetual doubt', 'Hard to know true territory', 'May paralyze'],
252
+ examples: ['Economic forecasts', 'Expert predictions', 'Personal beliefs'],
253
+ complexity: 3,
254
+ category: 'critical-thinking',
255
+ },
256
+ {
257
+ name: 'Thought Experiment',
258
+ description: 'Use imaginative scenarios to test ideas and assumptions',
259
+ source: 'Einstein / Philosophy tradition',
260
+ principles: [
261
+ 'Construct hypothetical scenarios',
262
+ 'Follow logic through to implications',
263
+ 'Isolate key variables',
264
+ 'Use imagination to explore possibilities',
265
+ 'Derive insights from deduction',
266
+ ],
267
+ applicationSteps: [
268
+ 'Identify the principle or rule to test',
269
+ 'Construct a hypothetical scenario',
270
+ 'Apply the principle to this scenario',
271
+ 'Follow implications logically',
272
+ 'Extract insights about the principle',
273
+ ],
274
+ 适用场景: ['Philosophy', 'Physics', 'Ethical reasoning', 'Strategy'],
275
+ strengths: ['No real-world consequences', 'Tests logic', 'Reveals assumptions'],
276
+ weaknesses: ['May not transfer to reality', 'Imagination limits', 'Logical errors possible'],
277
+ examples: ["Einstein's elevator", 'Trolley problem', 'Veil of ignorance'],
278
+ complexity: 3,
279
+ category: 'creative',
280
+ },
281
+ {
282
+ name: 'STS (Systems Thinking Synthesis)',
283
+ description: 'See the whole system, not just parts - feedback loops, emergence, delays',
284
+ source: 'Peter Senge / Donella Meadows',
285
+ principles: [
286
+ 'See interconnections, not just components',
287
+ 'Identify feedback loops (reinforcing/balancing)',
288
+ 'Notice delays between action and effect',
289
+ 'Look for emergent properties',
290
+ 'Find leverage points in the system',
291
+ ],
292
+ applicationSteps: [
293
+ 'Map the system components',
294
+ 'Identify relationships and connections',
295
+ 'Find feedback loops',
296
+ 'Notice delays and accumulations',
297
+ 'Identify high-leverage intervention points',
298
+ ],
299
+ 适用场景: ['Organizational change', 'Policy design', 'Complex problem diagnosis'],
300
+ strengths: ['Holistic view', 'Reveals hidden dynamics', 'Identifies leverage'],
301
+ weaknesses: ['Complex', 'Hard to model accurately', 'Delays hard to predict'],
302
+ examples: ['Climate policy', 'Company culture', 'Market dynamics'],
303
+ complexity: 5,
304
+ category: 'systems-thinking',
305
+ },
306
+ ];
307
+ /**
308
+ * Create an Expert Models Engine
309
+ */
310
+ export function createExpertModelsEngine(config = {}) {
311
+ const cfg = { ...DEFAULT_CONFIG, ...config };
312
+ // Model registry
313
+ const models = new Map();
314
+ // Application history
315
+ const applicationHistory = new Map();
316
+ // Model blends
317
+ const blends = new Map();
318
+ // Initialize with predefined models
319
+ for (const modelData of PREDEFINED_MODELS) {
320
+ const model = {
321
+ ...modelData,
322
+ id: randomUUID(),
323
+ usageCount: 0,
324
+ successRate: 0,
325
+ lastUsed: 0,
326
+ };
327
+ models.set(model.id, model);
328
+ }
329
+ logger.info(`Expert Models initialized with ${models.size} predefined models`);
330
+ // ========================================
331
+ // Utility functions
332
+ // ========================================
333
+ function scoreModelRelevance(model, context, purpose) {
334
+ const contextLower = (context + ' ' + (purpose || '')).toLowerCase();
335
+ // Score based on category keywords
336
+ const categoryKeywords = {
337
+ 'decision-making': ['decide', 'choice', 'option', 'select', 'pick', 'choose', 'risk', 'opportunity'],
338
+ 'critical-thinking': ['analyze', 'evaluate', 'assess', 'examine', 'critique', 'reason', 'think'],
339
+ 'problem-solving': ['problem', 'issue', 'fix', 'solve', 'resolve', 'troubleshoot', 'debug'],
340
+ 'strategic': ['strategy', 'long-term', 'planning', 'goal', 'future', 'vision', 'competitive'],
341
+ 'systems-thinking': ['system', 'feedback', 'loop', 'emergent', 'interconnected', 'holistic'],
342
+ 'creative': ['creative', 'innovate', 'new', 'idea', 'imagine', 'brainstorm', 'design'],
343
+ 'analytical': ['data', 'analyze', 'measure', 'quantify', 'statistic', 'probability', 'evidence'],
344
+ 'ethical': ['moral', 'ethical', 'right', 'wrong', 'fair', 'justice', 'value'],
345
+ 'self-awareness': ['aware', 'bias', 'blind', 'strength', 'weakness', 'competence', 'knowledge'],
346
+ };
347
+ const keywords = categoryKeywords[model.category] || [];
348
+ let score = 0;
349
+ for (const keyword of keywords) {
350
+ if (contextLower.includes(keyword)) {
351
+ score += 1;
352
+ }
353
+ }
354
+ // Boost for matching 应用场景
355
+ for (const scenario of model.适用场景) {
356
+ if (contextLower.includes(scenario.toLowerCase())) {
357
+ score += 2;
358
+ }
359
+ }
360
+ // Normalize by model complexity (simpler models score slightly higher for general use)
361
+ const complexityBonus = (6 - model.complexity) * 0.1;
362
+ return Math.min(1, (score / Math.max(1, keywords.length)) + complexityBonus);
363
+ }
364
+ function generateReasoning(model, context, input) {
365
+ const steps = model.applicationSteps.map((step, i) => `${i + 1}. ${step}`).join('\n');
366
+ return `Applying "${model.name}" mental model:
367
+
368
+ **Context**: ${context}
369
+
370
+ **Input**: ${input}
371
+
372
+ **Model Source**: ${model.source}
373
+
374
+ **Principles**:
375
+ ${model.principles.map(p => `- ${p}`).join('\n')}
376
+
377
+ **Application Steps**:
378
+ ${steps}
379
+
380
+ **Model Strengths**: ${model.strengths.join(', ')}
381
+ **Model Limitations**: ${model.weaknesses.join(', ')}
382
+ `;
383
+ }
384
+ function extractConclusion(reasoning, model) {
385
+ // Extract a conclusion based on model type
386
+ const modelConclusions = {
387
+ 'First Principles Thinking': 'Rebuild from fundamental truths, eliminating assumptions',
388
+ 'Inversion': 'Avoid failure paths to achieve success',
389
+ 'Second Order Thinking': 'Consider second and third order consequences',
390
+ 'Circle of Competence': 'Stay within known boundaries or expand deliberately',
391
+ 'Probabilistic Thinking': 'Calculate expected value and update with evidence',
392
+ 'Regret Minimization': 'Choose the path that minimizes long-term regret',
393
+ "Occam's Razor": 'Select the simplest explanation that fits',
394
+ "Hanlon's Razor": 'Assume incompetence before malice',
395
+ 'Map and Territory': 'Update mental models to match reality',
396
+ 'Thought Experiment': 'Use hypothetical scenarios to test principles',
397
+ 'STS (Systems Thinking Synthesis)': 'Find leverage points in the system',
398
+ };
399
+ return modelConclusions[model.name] || `Apply ${model.name} principles`;
400
+ }
401
+ // ========================================
402
+ // Engine implementation
403
+ // ========================================
404
+ return {
405
+ // Registry
406
+ registerModel(modelData) {
407
+ const model = {
408
+ ...modelData,
409
+ id: randomUUID(),
410
+ usageCount: 0,
411
+ successRate: 0,
412
+ lastUsed: Date.now(),
413
+ };
414
+ models.set(model.id, model);
415
+ logger.info(`Registered new model: ${model.name} (${model.id})`);
416
+ return model;
417
+ },
418
+ getModel(id) {
419
+ return models.get(id);
420
+ },
421
+ getModelsByCategory(category) {
422
+ return Array.from(models.values()).filter(m => m.category === category);
423
+ },
424
+ listModels() {
425
+ return Array.from(models.values());
426
+ },
427
+ removeModel(id) {
428
+ const deleted = models.delete(id);
429
+ if (deleted) {
430
+ logger.info(`Removed model: ${id}`);
431
+ }
432
+ return deleted;
433
+ },
434
+ // Application
435
+ async applyModel(modelId, context, input) {
436
+ const model = models.get(modelId);
437
+ if (!model) {
438
+ throw new Error(`Model not found: ${modelId}`);
439
+ }
440
+ const startTime = Date.now();
441
+ const reasoning = generateReasoning(model, context, input);
442
+ const conclusion = extractConclusion(reasoning, model);
443
+ const application = {
444
+ id: randomUUID(),
445
+ modelId,
446
+ context,
447
+ input,
448
+ reasoning,
449
+ timestamp: startTime,
450
+ duration: Date.now() - startTime,
451
+ };
452
+ applicationHistory.set(application.id, { application, modelId });
453
+ // Update model usage
454
+ model.usageCount++;
455
+ model.lastUsed = Date.now();
456
+ logger.info(`Applied model "${model.name}" to context "${context}"`);
457
+ return {
458
+ modelId,
459
+ modelName: model.name,
460
+ reasoning,
461
+ conclusion,
462
+ confidence: 0.7 + (model.complexity * 0.05), // Higher complexity = higher confidence
463
+ principlesApplied: model.principles.slice(0, 2),
464
+ alternativePerspectives: [],
465
+ potentialBiases: model.weaknesses.slice(0, 2),
466
+ quality: 0.75, // Placeholder until outcome recorded
467
+ };
468
+ },
469
+ suggestModels(context, purpose) {
470
+ const scored = Array.from(models.values())
471
+ .map(model => ({
472
+ model,
473
+ score: scoreModelRelevance(model, context, purpose),
474
+ }))
475
+ .filter(s => s.score > 0.1)
476
+ .sort((a, b) => b.score - a.score);
477
+ return scored.slice(0, 3).map(s => s.model);
478
+ },
479
+ getApplicableModels(scenario) {
480
+ return this.suggestModels(scenario);
481
+ },
482
+ // Blending
483
+ createBlend(name, modelIds, weights) {
484
+ if (modelIds.length < 2) {
485
+ throw new Error('Blend requires at least 2 models');
486
+ }
487
+ const blendWeights = weights || cfg.defaultBlendWeights.slice(0, modelIds.length);
488
+ while (blendWeights.length < modelIds.length) {
489
+ blendWeights.push(0);
490
+ }
491
+ // Normalize weights
492
+ const total = blendWeights.reduce((a, b) => a + b, 0);
493
+ const normalized = blendWeights.map(w => w / total);
494
+ const blend = {
495
+ id: randomUUID(),
496
+ name,
497
+ modelIds,
498
+ weights: normalized,
499
+ description: `Blend of ${modelIds.length} mental models`,
500
+ purpose: 'Multi-perspective analysis',
501
+ };
502
+ blends.set(blend.id, blend);
503
+ logger.info(`Created blend: ${name} (${blend.id})`);
504
+ return blend;
505
+ },
506
+ async applyBlend(blendId, context, input) {
507
+ const blend = blends.get(blendId);
508
+ if (!blend) {
509
+ throw new Error(`Blend not found: ${blendId}`);
510
+ }
511
+ const perspectives = [];
512
+ const conflicts = [];
513
+ for (let i = 0; i < blend.modelIds.length; i++) {
514
+ const result = await this.applyModel(blend.modelIds[i], context, input);
515
+ perspectives.push(result);
516
+ }
517
+ // Check agreement between perspectives
518
+ const conclusions = perspectives.map(p => p.conclusion);
519
+ const uniqueConclusions = new Set(conclusions);
520
+ const agreementLevel = 1 - (uniqueConclusions.size - 1) / conclusions.length;
521
+ if (agreementLevel < 0.5) {
522
+ conflicts.push(`Only ${Math.round(agreementLevel * 100)}% agreement between models`);
523
+ }
524
+ // Synthesize conclusion
525
+ const synthesizedConclusion = `Multi-model analysis (${perspectives.length} perspectives): ${perspectives.map(p => p.modelName).join(', ')}. Agreement: ${Math.round(agreementLevel * 100)}%.`;
526
+ return {
527
+ blendId: blend.id,
528
+ blendName: blend.name,
529
+ perspectives,
530
+ synthesizedConclusion,
531
+ agreementLevel,
532
+ conflicts,
533
+ confidence: agreementLevel * 0.8 + 0.2,
534
+ quality: 0.75,
535
+ };
536
+ },
537
+ getBlends() {
538
+ return Array.from(blends.values());
539
+ },
540
+ removeBlend(id) {
541
+ return blends.delete(id);
542
+ },
543
+ // Performance tracking
544
+ recordOutcome(applicationId, outcome, quality) {
545
+ const record = applicationHistory.get(applicationId);
546
+ if (!record) {
547
+ logger.warn(`Application not found: ${applicationId}`);
548
+ return;
549
+ }
550
+ record.application.outcome = outcome;
551
+ record.application.quality = quality;
552
+ const model = models.get(record.modelId);
553
+ if (model && cfg.trackPerformance) {
554
+ // Update running success rate
555
+ const totalQuality = model.successRate * model.usageCount + quality;
556
+ model.successRate = totalQuality / model.usageCount;
557
+ }
558
+ logger.info(`Recorded outcome for ${applicationId}: quality=${quality}`);
559
+ },
560
+ getModelStats(modelId) {
561
+ const model = models.get(modelId);
562
+ if (!model) {
563
+ return { usageCount: 0, successRate: 0, avgQuality: 0 };
564
+ }
565
+ return {
566
+ usageCount: model.usageCount,
567
+ successRate: model.successRate,
568
+ avgQuality: model.successRate, // Same as successRate
569
+ };
570
+ },
571
+ getTopModels(limit = 5) {
572
+ return Array.from(models.values())
573
+ .filter(m => m.usageCount > 0)
574
+ .sort((a, b) => b.successRate - a.successRate)
575
+ .slice(0, limit);
576
+ },
577
+ // Analysis
578
+ async analyzeDecision(context, input) {
579
+ const suggestedModels = this.suggestModels(context);
580
+ const modelsToApply = suggestedModels.slice(0, 3);
581
+ const results = [];
582
+ for (const model of modelsToApply) {
583
+ const result = await this.applyModel(model.id, context, input);
584
+ results.push(result);
585
+ }
586
+ // Generate recommendation
587
+ const avgConfidence = results.reduce((sum, r) => sum + r.confidence, 0) / results.length;
588
+ const topResult = results.reduce((best, r) => r.confidence > best.confidence ? r : best, results[0]);
589
+ return {
590
+ models: results,
591
+ recommendation: `Apply "${topResult.modelName}" for highest confidence. Consider blending ${results.length} models for multi-perspective analysis.`,
592
+ confidence: avgConfidence,
593
+ };
594
+ },
595
+ };
596
+ }
@@ -0,0 +1,177 @@
1
+ /**
2
+ * Hybrid Memory Search
3
+ *
4
+ * Combines vector similarity search with BM25 keyword matching and knowledge graph traversal
5
+ * Based on Dakera AI's hybrid search architecture (87.8% LoCoMo accuracy)
6
+ *
7
+ * @module core/memory
8
+ * @fileoverview Hybrid search combining vector + BM25 + knowledge graph for superior recall
9
+ */
10
+ import { createLogger } from '../../utils/logger.js';
11
+ const logger = createLogger('HybridSearch');
12
+ /**
13
+ * Default hybrid search configuration
14
+ */
15
+ export const DEFAULT_HYBRID_CONFIG = {
16
+ vectorWeight: 0.5,
17
+ bm25Weight: 0.3,
18
+ kgWeight: 0.2,
19
+ bm25: { k1: 1.5, b: 0.75 },
20
+ minScore: 0.1,
21
+ maxResults: 20,
22
+ };
23
+ /**
24
+ * Tokenizer for BM25
25
+ */
26
+ function tokenize(text) {
27
+ return text
28
+ .toLowerCase()
29
+ .replace(/[^\w\s]/g, ' ')
30
+ .split(/\s+/)
31
+ .filter(token => token.length > 2);
32
+ }
33
+ /**
34
+ * Create a BM25 index from memory entries
35
+ */
36
+ export function createBM25Index(entries) {
37
+ const index = {
38
+ docLengths: new Map(),
39
+ termDocFreq: new Map(),
40
+ avgDocLength: 0,
41
+ totalDocs: entries.length,
42
+ invertedIndex: new Map(),
43
+ };
44
+ let totalLength = 0;
45
+ for (const entry of entries) {
46
+ const tokens = tokenize(entry.content);
47
+ const docLength = tokens.length;
48
+ index.docLengths.set(entry.id, docLength);
49
+ totalLength += docLength;
50
+ // Count term frequencies
51
+ const termFreq = new Map();
52
+ for (const token of tokens) {
53
+ termFreq.set(token, (termFreq.get(token) ?? 0) + 1);
54
+ if (!index.invertedIndex.has(token)) {
55
+ index.invertedIndex.set(token, new Map());
56
+ }
57
+ const posting = index.invertedIndex.get(token);
58
+ posting.set(entry.id, termFreq.get(token));
59
+ }
60
+ // Update document frequency
61
+ for (const token of new Set(tokens)) {
62
+ index.termDocFreq.set(token, (index.termDocFreq.get(token) ?? 0) + 1);
63
+ }
64
+ }
65
+ index.avgDocLength = totalLength / Math.max(entries.length, 1);
66
+ return index;
67
+ }
68
+ /**
69
+ * Calculate BM25 score for a single document
70
+ */
71
+ export function bm25Score(index, docId, queryTokens, config) {
72
+ const docLength = index.docLengths.get(docId) ?? 0;
73
+ let score = 0;
74
+ for (const token of queryTokens) {
75
+ const tf = index.invertedIndex.get(token)?.get(docId) ?? 0;
76
+ if (tf === 0)
77
+ continue;
78
+ const df = index.termDocFreq.get(token) ?? 0;
79
+ if (df === 0)
80
+ continue;
81
+ const idf = Math.log((index.totalDocs - df + 0.5) / (df + 0.5) + 1);
82
+ const tfComponent = (tf * (config.k1 + 1)) / (tf + config.k1 * (1 - config.b + config.b * (docLength / index.avgDocLength)));
83
+ score += idf * tfComponent;
84
+ }
85
+ return score;
86
+ }
87
+ /**
88
+ * Normalize BM25 scores to 0-1 range
89
+ */
90
+ export function normalizeBM25Scores(scores) {
91
+ const maxScore = Math.max(...Array.from(scores.values()), 1);
92
+ const normalized = new Map();
93
+ for (const [docId, score] of scores) {
94
+ normalized.set(docId, score / maxScore);
95
+ }
96
+ return normalized;
97
+ }
98
+ /**
99
+ * Create a hybrid search engine
100
+ */
101
+ export function createHybridSearchEngine(initialEntries = [], config = {}) {
102
+ const fullConfig = {
103
+ ...DEFAULT_HYBRID_CONFIG,
104
+ ...config,
105
+ bm25: { ...DEFAULT_HYBRID_CONFIG.bm25, ...config.bm25 },
106
+ };
107
+ let bm25Index = createBM25Index(initialEntries);
108
+ function rebuildIndex(entries) {
109
+ logger.info(`Rebuilding BM25 index with ${entries.length} documents`);
110
+ bm25Index = createBM25Index(entries);
111
+ }
112
+ function search(query, entries, options) {
113
+ const searchConfig = {
114
+ ...fullConfig,
115
+ ...options?.config,
116
+ };
117
+ // Tokenize query for BM25
118
+ const queryTokens = tokenize(query);
119
+ // Calculate BM25 scores for all entries
120
+ const bm25Scores = new Map();
121
+ for (const entry of entries) {
122
+ const score = bm25Score(bm25Index, entry.id, queryTokens, searchConfig.bm25);
123
+ if (score > 0) {
124
+ bm25Scores.set(entry.id, score);
125
+ }
126
+ }
127
+ const normalizedBM25 = normalizeBM25Scores(bm25Scores);
128
+ // Calculate KG scores based on connection count
129
+ const kgScores = new Map();
130
+ if (options?.kgConnections) {
131
+ const maxConnections = Math.max(...Array.from(options.kgConnections.values()).map(arr => arr.length), 1);
132
+ for (const entry of entries) {
133
+ const connections = options.kgConnections.get(entry.id)?.length ?? 0;
134
+ kgScores.set(entry.id, connections / maxConnections);
135
+ }
136
+ }
137
+ // Combine scores
138
+ const combinedResults = new Map();
139
+ for (const entry of entries) {
140
+ const vectorScore = options?.vectorScores?.get(entry.id) ?? 0;
141
+ const bm25Score = normalizedBM25.get(entry.id) ?? 0;
142
+ const kgScore = kgScores.get(entry.id) ?? 0;
143
+ // Weighted combination
144
+ const combinedScore = (vectorScore * searchConfig.vectorWeight) +
145
+ (bm25Score * searchConfig.bm25Weight) +
146
+ (kgScore * searchConfig.kgWeight);
147
+ if (combinedScore >= searchConfig.minScore) {
148
+ combinedResults.set(entry.id, {
149
+ entry,
150
+ score: combinedScore,
151
+ combinedScore,
152
+ vectorScore,
153
+ bm25Score,
154
+ kgScore,
155
+ reason: 'semantic',
156
+ });
157
+ }
158
+ }
159
+ // Sort by combined score
160
+ const results = Array.from(combinedResults.values())
161
+ .sort((a, b) => b.combinedScore - a.combinedScore)
162
+ .slice(0, searchConfig.maxResults);
163
+ logger.debug(`Hybrid search for "${query}": ${results.length} results`);
164
+ return results;
165
+ }
166
+ function getStats() {
167
+ return {
168
+ indexedDocs: bm25Index.totalDocs,
169
+ avgDocLength: Math.round(bm25Index.avgDocLength),
170
+ };
171
+ }
172
+ return {
173
+ search,
174
+ rebuildIndex,
175
+ getStats,
176
+ };
177
+ }
@@ -8,3 +8,4 @@ export * from './spaced-repetition.js';
8
8
  export * from './hopfield-network.js';
9
9
  export * from './adaptive-rag.js';
10
10
  export { createContextFragmentationEngine } from './context-fragmentation.js';
11
+ export { createHybridSearchEngine, createBM25Index, bm25Score, normalizeBM25Scores, DEFAULT_HYBRID_CONFIG } from './hybrid-search.js';
package/dist/index.js CHANGED
@@ -9,3 +9,4 @@ export { createIdentityContinuityVerifier } from './core/identity/identity-conti
9
9
  export { createMCPProtocol } from './core/collaboration/mcp-protocol.js';
10
10
  export { createTruthTeller, formatTruthStatement } from './core/truth-teller.js';
11
11
  export { createActiveInferenceEngine, formatFreeEnergyMetrics, formatBeliefState } from './core/cognition/active-inference.js';
12
+ export { createExpertModelsEngine } from './core/expert-models/index.js';
package/dist/version.js CHANGED
@@ -1 +1 @@
1
- export const VERSION = '2.2.4';
1
+ export const VERSION = '2.2.6';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mark-improving-agent",
3
- "version": "2.2.4",
3
+ "version": "2.2.6",
4
4
  "description": "Self-evolving AI agent with permanent memory, identity continuity, and self-evolution — for AI agents that need to remember, learn, and evolve across sessions",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",