@hailer/mcp 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/commands/tool-builder.md +37 -0
- package/.claude/commands/ws-pull.md +44 -0
- package/.claude/settings.json +8 -0
- package/.claude/settings.local.json +49 -0
- package/.claude/skills/activity-api/SKILL.md +96 -0
- package/.claude/skills/activity-api/references/activity-endpoints.md +845 -0
- package/.claude/skills/add-app-member-skill/SKILL.md +977 -0
- package/.claude/skills/agent-building/SKILL.md +243 -0
- package/.claude/skills/agent-building/references/architecture-patterns.md +446 -0
- package/.claude/skills/agent-building/references/code-examples.md +587 -0
- package/.claude/skills/agent-building/references/implementation-guide.md +619 -0
- package/.claude/skills/app-api/SKILL.md +219 -0
- package/.claude/skills/app-api/references/app-endpoints.md +759 -0
- package/.claude/skills/building-hailer-apps-skill/SKILL.md +548 -0
- package/.claude/skills/create-app-skill/SKILL.md +1101 -0
- package/.claude/skills/create-insight-skill/SKILL.md +1317 -0
- package/.claude/skills/get-insight-data-skill/SKILL.md +1053 -0
- package/.claude/skills/hailer-api/SKILL.md +283 -0
- package/.claude/skills/hailer-api/references/activities.md +620 -0
- package/.claude/skills/hailer-api/references/authentication.md +216 -0
- package/.claude/skills/hailer-api/references/datasets.md +437 -0
- package/.claude/skills/hailer-api/references/files.md +301 -0
- package/.claude/skills/hailer-api/references/insights.md +469 -0
- package/.claude/skills/hailer-api/references/workflows.md +720 -0
- package/.claude/skills/hailer-api/references/workspaces-users.md +445 -0
- package/.claude/skills/insight-api/SKILL.md +185 -0
- package/.claude/skills/insight-api/references/insight-endpoints.md +514 -0
- package/.claude/skills/install-workflow-skill/SKILL.md +1056 -0
- package/.claude/skills/list-apps-skill/SKILL.md +1010 -0
- package/.claude/skills/list-workflows-minimal-skill/SKILL.md +992 -0
- package/.claude/skills/local-first-skill/SKILL.md +570 -0
- package/.claude/skills/mcp-tools/SKILL.md +419 -0
- package/.claude/skills/mcp-tools/references/api-endpoints.md +499 -0
- package/.claude/skills/mcp-tools/references/data-structures.md +554 -0
- package/.claude/skills/mcp-tools/references/implementation-patterns.md +717 -0
- package/.claude/skills/preview-insight-skill/SKILL.md +1290 -0
- package/.claude/skills/publish-hailer-app-skill/SKILL.md +453 -0
- package/.claude/skills/remove-app-member-skill/SKILL.md +671 -0
- package/.claude/skills/remove-app-skill/SKILL.md +985 -0
- package/.claude/skills/remove-insight-skill/SKILL.md +1011 -0
- package/.claude/skills/remove-workflow-skill/SKILL.md +920 -0
- package/.claude/skills/scaffold-hailer-app-skill/SKILL.md +1034 -0
- package/.claude/skills/skill-testing/README.md +137 -0
- package/.claude/skills/skill-testing/SKILL.md +348 -0
- package/.claude/skills/skill-testing/references/test-patterns.md +705 -0
- package/.claude/skills/skill-testing/references/testing-guide.md +603 -0
- package/.claude/skills/skill-testing/references/validation-checklist.md +537 -0
- package/.claude/skills/tool-builder/SKILL.md +328 -0
- package/.claude/skills/update-app-skill/SKILL.md +970 -0
- package/.claude/skills/update-workflow-field-skill/SKILL.md +1098 -0
- package/.env.example +81 -0
- package/.mcp.json +13 -0
- package/README.md +297 -0
- package/dist/app.d.ts +4 -0
- package/dist/app.js +74 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.js +5 -0
- package/dist/client/adaptive-documentation-bot.d.ts +108 -0
- package/dist/client/adaptive-documentation-bot.js +475 -0
- package/dist/client/adaptive-documentation-types.d.ts +66 -0
- package/dist/client/adaptive-documentation-types.js +9 -0
- package/dist/client/agent-activity-bot.d.ts +51 -0
- package/dist/client/agent-activity-bot.js +166 -0
- package/dist/client/agent-tracker.d.ts +499 -0
- package/dist/client/agent-tracker.js +659 -0
- package/dist/client/description-updater.d.ts +56 -0
- package/dist/client/description-updater.js +259 -0
- package/dist/client/log-parser.d.ts +72 -0
- package/dist/client/log-parser.js +387 -0
- package/dist/client/mcp-client.d.ts +50 -0
- package/dist/client/mcp-client.js +532 -0
- package/dist/client/message-processor.d.ts +35 -0
- package/dist/client/message-processor.js +352 -0
- package/dist/client/multi-bot-manager.d.ts +24 -0
- package/dist/client/multi-bot-manager.js +74 -0
- package/dist/client/providers/anthropic-provider.d.ts +19 -0
- package/dist/client/providers/anthropic-provider.js +631 -0
- package/dist/client/providers/llm-provider.d.ts +47 -0
- package/dist/client/providers/llm-provider.js +367 -0
- package/dist/client/providers/openai-provider.d.ts +23 -0
- package/dist/client/providers/openai-provider.js +621 -0
- package/dist/client/simple-llm-caller.d.ts +19 -0
- package/dist/client/simple-llm-caller.js +100 -0
- package/dist/client/skill-generator.d.ts +81 -0
- package/dist/client/skill-generator.js +386 -0
- package/dist/client/test-adaptive-bot.d.ts +9 -0
- package/dist/client/test-adaptive-bot.js +82 -0
- package/dist/client/token-pricing.d.ts +38 -0
- package/dist/client/token-pricing.js +127 -0
- package/dist/client/token-tracker.d.ts +232 -0
- package/dist/client/token-tracker.js +457 -0
- package/dist/client/token-usage-bot.d.ts +53 -0
- package/dist/client/token-usage-bot.js +153 -0
- package/dist/client/tool-executor.d.ts +69 -0
- package/dist/client/tool-executor.js +159 -0
- package/dist/client/tool-schema-loader.d.ts +60 -0
- package/dist/client/tool-schema-loader.js +178 -0
- package/dist/client/types.d.ts +69 -0
- package/dist/client/types.js +7 -0
- package/dist/config.d.ts +162 -0
- package/dist/config.js +296 -0
- package/dist/core.d.ts +26 -0
- package/dist/core.js +147 -0
- package/dist/lib/context-manager.d.ts +111 -0
- package/dist/lib/context-manager.js +431 -0
- package/dist/lib/logger.d.ts +74 -0
- package/dist/lib/logger.js +277 -0
- package/dist/lib/materialize.d.ts +3 -0
- package/dist/lib/materialize.js +101 -0
- package/dist/lib/normalizedName.d.ts +7 -0
- package/dist/lib/normalizedName.js +48 -0
- package/dist/lib/prompt-length-manager.d.ts +81 -0
- package/dist/lib/prompt-length-manager.js +457 -0
- package/dist/lib/terminal-prompt.d.ts +9 -0
- package/dist/lib/terminal-prompt.js +108 -0
- package/dist/mcp/UserContextCache.d.ts +56 -0
- package/dist/mcp/UserContextCache.js +163 -0
- package/dist/mcp/auth.d.ts +2 -0
- package/dist/mcp/auth.js +29 -0
- package/dist/mcp/hailer-clients.d.ts +42 -0
- package/dist/mcp/hailer-clients.js +246 -0
- package/dist/mcp/signal-handler.d.ts +45 -0
- package/dist/mcp/signal-handler.js +317 -0
- package/dist/mcp/tool-registry.d.ts +100 -0
- package/dist/mcp/tool-registry.js +306 -0
- package/dist/mcp/tools/activity.d.ts +15 -0
- package/dist/mcp/tools/activity.js +955 -0
- package/dist/mcp/tools/app.d.ts +20 -0
- package/dist/mcp/tools/app.js +1488 -0
- package/dist/mcp/tools/discussion.d.ts +19 -0
- package/dist/mcp/tools/discussion.js +950 -0
- package/dist/mcp/tools/file.d.ts +15 -0
- package/dist/mcp/tools/file.js +119 -0
- package/dist/mcp/tools/insight.d.ts +17 -0
- package/dist/mcp/tools/insight.js +806 -0
- package/dist/mcp/tools/skill.d.ts +10 -0
- package/dist/mcp/tools/skill.js +279 -0
- package/dist/mcp/tools/user.d.ts +10 -0
- package/dist/mcp/tools/user.js +108 -0
- package/dist/mcp/tools/workflow-template.d.ts +19 -0
- package/dist/mcp/tools/workflow-template.js +822 -0
- package/dist/mcp/tools/workflow.d.ts +18 -0
- package/dist/mcp/tools/workflow.js +1362 -0
- package/dist/mcp/utils/api-errors.d.ts +45 -0
- package/dist/mcp/utils/api-errors.js +160 -0
- package/dist/mcp/utils/data-transformers.d.ts +102 -0
- package/dist/mcp/utils/data-transformers.js +194 -0
- package/dist/mcp/utils/file-upload.d.ts +33 -0
- package/dist/mcp/utils/file-upload.js +148 -0
- package/dist/mcp/utils/hailer-api-client.d.ts +120 -0
- package/dist/mcp/utils/hailer-api-client.js +323 -0
- package/dist/mcp/utils/index.d.ts +13 -0
- package/dist/mcp/utils/index.js +39 -0
- package/dist/mcp/utils/logger.d.ts +42 -0
- package/dist/mcp/utils/logger.js +103 -0
- package/dist/mcp/utils/types.d.ts +286 -0
- package/dist/mcp/utils/types.js +7 -0
- package/dist/mcp/workspace-cache.d.ts +42 -0
- package/dist/mcp/workspace-cache.js +97 -0
- package/dist/mcp-server.d.ts +42 -0
- package/dist/mcp-server.js +280 -0
- package/package.json +56 -0
- package/tsconfig.json +23 -0
|
@@ -0,0 +1,603 @@
|
|
|
1
|
+
# Comprehensive Testing Guide
|
|
2
|
+
|
|
3
|
+
Step-by-step guide for testing skills at all levels: unit, integration, and end-to-end.
|
|
4
|
+
|
|
5
|
+
## Testing Workflow
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
┌──────────────────────────────────────────────────────┐
|
|
9
|
+
│ 1. Create Skill │
|
|
10
|
+
│ - Write SKILL.md │
|
|
11
|
+
│ - Add references │
|
|
12
|
+
│ - Define keywords │
|
|
13
|
+
└──────────────────────────────────────────────────────┘
|
|
14
|
+
↓
|
|
15
|
+
┌──────────────────────────────────────────────────────┐
|
|
16
|
+
│ 2. Unit Tests (Isolated) │
|
|
17
|
+
│ - Test file structure │
|
|
18
|
+
│ - Test content validity │
|
|
19
|
+
│ - Test loading mechanism │
|
|
20
|
+
└──────────────────────────────────────────────────────┘
|
|
21
|
+
↓
|
|
22
|
+
┌──────────────────────────────────────────────────────┐
|
|
23
|
+
│ 3. Integration Tests (Skill System) │
|
|
24
|
+
│ - Test with SkillLoader │
|
|
25
|
+
│ - Test with SkillManager │
|
|
26
|
+
│ - Test keyword matching │
|
|
27
|
+
│ - Test caching behavior │
|
|
28
|
+
└──────────────────────────────────────────────────────┘
|
|
29
|
+
↓
|
|
30
|
+
┌──────────────────────────────────────────────────────┐
|
|
31
|
+
│ 4. E2E Tests (Full Agent) │
|
|
32
|
+
│ - Test with real messages │
|
|
33
|
+
│ - Test with LLM providers │
|
|
34
|
+
│ - Test prompt enhancement │
|
|
35
|
+
│ - Validate agent responses │
|
|
36
|
+
└──────────────────────────────────────────────────────┘
|
|
37
|
+
↓
|
|
38
|
+
┌──────────────────────────────────────────────────────┐
|
|
39
|
+
│ 5. Performance Tests │
|
|
40
|
+
│ - Measure load times │
|
|
41
|
+
│ - Measure cache efficiency │
|
|
42
|
+
│ - Test under load │
|
|
43
|
+
└──────────────────────────────────────────────────────┘
|
|
44
|
+
↓
|
|
45
|
+
┌──────────────────────────────────────────────────────┐
|
|
46
|
+
│ 6. Quality Review │
|
|
47
|
+
│ - Manual content review │
|
|
48
|
+
│ - Test with real users │
|
|
49
|
+
│ - Gather feedback │
|
|
50
|
+
└──────────────────────────────────────────────────────┘
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
## Level 1: Unit Tests
|
|
54
|
+
|
|
55
|
+
### Testing File Structure
|
|
56
|
+
|
|
57
|
+
```typescript
|
|
58
|
+
import * as fs from 'fs/promises';
|
|
59
|
+
import * as path from 'path';
|
|
60
|
+
|
|
61
|
+
describe('Skill Structure', () => {
|
|
62
|
+
const skillPath = path.join(__dirname, '../../.claude/skills/my-skill');
|
|
63
|
+
|
|
64
|
+
it('should have SKILL.md in root', async () => {
|
|
65
|
+
const skillMd = path.join(skillPath, 'SKILL.md');
|
|
66
|
+
const exists = await fs.stat(skillMd)
|
|
67
|
+
.then(() => true)
|
|
68
|
+
.catch(() => false);
|
|
69
|
+
expect(exists).toBe(true);
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it('should have references directory (optional)', async () => {
|
|
73
|
+
const refsPath = path.join(skillPath, 'references');
|
|
74
|
+
const exists = await fs.stat(refsPath)
|
|
75
|
+
.then(() => true)
|
|
76
|
+
.catch(() => false);
|
|
77
|
+
// Optional, so just log if missing
|
|
78
|
+
if (!exists) {
|
|
79
|
+
console.log('Note: No references directory found');
|
|
80
|
+
}
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('should contain valid markdown', async () => {
|
|
84
|
+
const skillMd = path.join(skillPath, 'SKILL.md');
|
|
85
|
+
const content = await fs.readFile(skillMd, 'utf-8');
|
|
86
|
+
|
|
87
|
+
// Basic markdown validation
|
|
88
|
+
expect(content.length).toBeGreaterThan(100);
|
|
89
|
+
expect(content).toMatch(/^#\s+/m); // Has at least one heading
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### Testing Content Quality
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
describe('Skill Content Quality', () => {
|
|
98
|
+
let skillContent: string;
|
|
99
|
+
|
|
100
|
+
beforeAll(async () => {
|
|
101
|
+
const skillMd = path.join(__dirname, '../../.claude/skills/my-skill/SKILL.md');
|
|
102
|
+
skillContent = await fs.readFile(skillMd, 'utf-8');
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it('should have clear title', () => {
|
|
106
|
+
const titleMatch = skillContent.match(/^#\s+(.+)$/m);
|
|
107
|
+
expect(titleMatch).toBeTruthy();
|
|
108
|
+
expect(titleMatch![1].length).toBeGreaterThan(3);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it('should have overview section', () => {
|
|
112
|
+
expect(skillContent.toLowerCase()).toMatch(/overview|introduction|about/);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('should contain examples', () => {
|
|
116
|
+
expect(skillContent).toMatch(/```/); // Has code blocks
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
it('should not be too short', () => {
|
|
120
|
+
expect(skillContent.length).toBeGreaterThan(500);
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
it('should not be excessively long', () => {
|
|
124
|
+
expect(skillContent.length).toBeLessThan(100000); // 100KB
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
it('should have actionable content', () => {
|
|
128
|
+
const actionWords = ['how to', 'step', 'example', 'guide', 'use'];
|
|
129
|
+
const hasActionWords = actionWords.some(word =>
|
|
130
|
+
skillContent.toLowerCase().includes(word)
|
|
131
|
+
);
|
|
132
|
+
expect(hasActionWords).toBe(true);
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Testing Reference Files
|
|
138
|
+
|
|
139
|
+
```typescript
|
|
140
|
+
describe('Skill References', () => {
|
|
141
|
+
const refsPath = path.join(__dirname, '../../.claude/skills/my-skill/references');
|
|
142
|
+
|
|
143
|
+
it('should load all reference files', async () => {
|
|
144
|
+
const files = await fs.readdir(refsPath).catch(() => []);
|
|
145
|
+
const mdFiles = files.filter(f => f.endsWith('.md'));
|
|
146
|
+
|
|
147
|
+
for (const file of mdFiles) {
|
|
148
|
+
const content = await fs.readFile(path.join(refsPath, file), 'utf-8');
|
|
149
|
+
expect(content.length).toBeGreaterThan(0);
|
|
150
|
+
console.log(` ✅ Loaded ${file} (${content.length} chars)`);
|
|
151
|
+
}
|
|
152
|
+
});
|
|
153
|
+
});
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
## Level 2: Integration Tests
|
|
157
|
+
|
|
158
|
+
### Testing with SkillLoader
|
|
159
|
+
|
|
160
|
+
```typescript
|
|
161
|
+
import { SkillLoader } from '../src/client/skill-loader';
|
|
162
|
+
import * as path from 'path';
|
|
163
|
+
|
|
164
|
+
describe('SkillLoader Integration', () => {
|
|
165
|
+
let loader: SkillLoader;
|
|
166
|
+
|
|
167
|
+
beforeAll(() => {
|
|
168
|
+
const skillsPath = path.join(__dirname, '../.claude/skills');
|
|
169
|
+
loader = new SkillLoader(skillsPath);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
afterEach(() => {
|
|
173
|
+
loader.clearCache();
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it('should load skill successfully', async () => {
|
|
177
|
+
const skill = await loader.load('my-skill');
|
|
178
|
+
|
|
179
|
+
expect(skill.name).toBe('my-skill');
|
|
180
|
+
expect(skill.content).toBeTruthy();
|
|
181
|
+
expect(skill.description).toBeTruthy();
|
|
182
|
+
expect(skill.loadedAt).toBeGreaterThan(0);
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
it('should include references in content', async () => {
|
|
186
|
+
const skill = await loader.load('my-skill');
|
|
187
|
+
|
|
188
|
+
// References are appended to main content
|
|
189
|
+
// If references exist, content should be larger
|
|
190
|
+
expect(skill.content.length).toBeGreaterThan(500);
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
it('should cache skills after first load', async () => {
|
|
194
|
+
// First load
|
|
195
|
+
const start1 = performance.now();
|
|
196
|
+
await loader.load('my-skill');
|
|
197
|
+
const firstLoadTime = performance.now() - start1;
|
|
198
|
+
|
|
199
|
+
// Second load (cached)
|
|
200
|
+
const start2 = performance.now();
|
|
201
|
+
await loader.load('my-skill');
|
|
202
|
+
const cachedLoadTime = performance.now() - start2;
|
|
203
|
+
|
|
204
|
+
console.log(`First load: ${firstLoadTime.toFixed(2)}ms`);
|
|
205
|
+
console.log(`Cached load: ${cachedLoadTime.toFixed(2)}ms`);
|
|
206
|
+
|
|
207
|
+
expect(cachedLoadTime).toBeLessThan(firstLoadTime / 5);
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
it('should handle missing skill gracefully', async () => {
|
|
211
|
+
await expect(
|
|
212
|
+
loader.load('non-existent-skill')
|
|
213
|
+
).rejects.toThrow(/Failed to load skill/);
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
it('should clear cache correctly', async () => {
|
|
217
|
+
await loader.load('my-skill');
|
|
218
|
+
loader.clearCache('my-skill');
|
|
219
|
+
|
|
220
|
+
// Next load should be from disk again
|
|
221
|
+
const start = performance.now();
|
|
222
|
+
await loader.load('my-skill');
|
|
223
|
+
const loadTime = performance.now() - start;
|
|
224
|
+
|
|
225
|
+
// Should take longer than cached load
|
|
226
|
+
expect(loadTime).toBeGreaterThan(1);
|
|
227
|
+
});
|
|
228
|
+
});
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
### Testing with SkillManager
|
|
232
|
+
|
|
233
|
+
```typescript
|
|
234
|
+
import { SkillManager } from '../src/client/skill-manager';
|
|
235
|
+
import { SkillLoader } from '../src/client/skill-loader';
|
|
236
|
+
|
|
237
|
+
describe('SkillManager Integration', () => {
|
|
238
|
+
let manager: SkillManager;
|
|
239
|
+
let loader: SkillLoader;
|
|
240
|
+
|
|
241
|
+
beforeAll(() => {
|
|
242
|
+
const skillsPath = path.join(__dirname, '../.claude/skills');
|
|
243
|
+
loader = new SkillLoader(skillsPath);
|
|
244
|
+
manager = new SkillManager(loader);
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
describe('Keyword Matching', () => {
|
|
248
|
+
it('should match skill by primary keywords', async () => {
|
|
249
|
+
const testCases = [
|
|
250
|
+
{
|
|
251
|
+
message: 'How do I create a workflow?',
|
|
252
|
+
expectedSkill: 'mcp-tools',
|
|
253
|
+
minConfidence: 0.5
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
message: 'List all activities in the workspace',
|
|
257
|
+
expectedSkill: 'hailer-api',
|
|
258
|
+
minConfidence: 0.5
|
|
259
|
+
},
|
|
260
|
+
{
|
|
261
|
+
message: 'Build a new agent system',
|
|
262
|
+
expectedSkill: 'agent-building',
|
|
263
|
+
minConfidence: 0.5
|
|
264
|
+
},
|
|
265
|
+
];
|
|
266
|
+
|
|
267
|
+
for (const { message, expectedSkill, minConfidence } of testCases) {
|
|
268
|
+
const guidance = await manager.analyzeRequest(message);
|
|
269
|
+
|
|
270
|
+
expect(guidance.skills).toContain(expectedSkill);
|
|
271
|
+
expect(guidance.confidence).toBeGreaterThanOrEqual(minConfidence);
|
|
272
|
+
|
|
273
|
+
console.log(` ✅ "${message}" → ${expectedSkill} (${(guidance.confidence * 100).toFixed(0)}%)`);
|
|
274
|
+
}
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
it('should handle messages with no skill match', async () => {
|
|
278
|
+
const guidance = await manager.analyzeRequest('Hello world xyz123');
|
|
279
|
+
|
|
280
|
+
expect(guidance.skills).toHaveLength(0);
|
|
281
|
+
expect(guidance.confidence).toBeLessThan(0.5);
|
|
282
|
+
expect(guidance.guidance).toContain('General assistance');
|
|
283
|
+
});
|
|
284
|
+
|
|
285
|
+
it('should return skill content', async () => {
|
|
286
|
+
const guidance = await manager.analyzeRequest('create workflow');
|
|
287
|
+
|
|
288
|
+
expect(guidance.skillContent).toBeTruthy();
|
|
289
|
+
expect(guidance.skillContent!.length).toBeGreaterThan(1000);
|
|
290
|
+
});
|
|
291
|
+
|
|
292
|
+
it('should recommend relevant tools', async () => {
|
|
293
|
+
const guidance = await manager.analyzeRequest('workflow schema');
|
|
294
|
+
|
|
295
|
+
expect(guidance.recommendedTools).toBeTruthy();
|
|
296
|
+
expect(guidance.recommendedTools.length).toBeGreaterThan(0);
|
|
297
|
+
});
|
|
298
|
+
});
|
|
299
|
+
|
|
300
|
+
describe('Multiple Keyword Matches', () => {
|
|
301
|
+
it('should prioritize highest scoring skill', async () => {
|
|
302
|
+
// Message with multiple potential matches
|
|
303
|
+
const guidance = await manager.analyzeRequest(
|
|
304
|
+
'workflow activity field schema'
|
|
305
|
+
);
|
|
306
|
+
|
|
307
|
+
// Should pick one skill, not multiple
|
|
308
|
+
expect(guidance.skills.length).toBeGreaterThan(0);
|
|
309
|
+
expect(guidance.skills.length).toBeLessThanOrEqual(2);
|
|
310
|
+
});
|
|
311
|
+
});
|
|
312
|
+
|
|
313
|
+
describe('Edge Cases', () => {
|
|
314
|
+
it('should handle empty message', async () => {
|
|
315
|
+
const guidance = await manager.analyzeRequest('');
|
|
316
|
+
expect(guidance.skills).toHaveLength(0);
|
|
317
|
+
});
|
|
318
|
+
|
|
319
|
+
it('should handle very long message', async () => {
|
|
320
|
+
const longMessage = 'workflow '.repeat(100);
|
|
321
|
+
const guidance = await manager.analyzeRequest(longMessage);
|
|
322
|
+
expect(guidance.skills.length).toBeGreaterThan(0);
|
|
323
|
+
});
|
|
324
|
+
|
|
325
|
+
it('should handle special characters', async () => {
|
|
326
|
+
const guidance = await manager.analyzeRequest('workflow !@#$%^&*()');
|
|
327
|
+
expect(guidance.skills.length).toBeGreaterThan(0);
|
|
328
|
+
});
|
|
329
|
+
});
|
|
330
|
+
});
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
## Level 3: End-to-End Tests
|
|
334
|
+
|
|
335
|
+
### Testing with Mock LLM Provider
|
|
336
|
+
|
|
337
|
+
```typescript
|
|
338
|
+
import { McpClient } from '../src/client/mcp-client';
|
|
339
|
+
import { ChatMessage } from '../src/client/types';
|
|
340
|
+
|
|
341
|
+
describe('E2E: Skill System with Agent', () => {
|
|
342
|
+
let mockClient: McpClient;
|
|
343
|
+
|
|
344
|
+
beforeAll(() => {
|
|
345
|
+
// Setup mock MCP client with test config
|
|
346
|
+
mockClient = new McpClient({
|
|
347
|
+
enabled: true,
|
|
348
|
+
mcpServerUrl: 'http://localhost:3030/api/mcp',
|
|
349
|
+
mcpServerApiKey: 'test-key',
|
|
350
|
+
providers: [], // No actual providers needed for this test
|
|
351
|
+
mcpAgentIds: ['test-agent-id'],
|
|
352
|
+
botConfigs: [],
|
|
353
|
+
enableDirectMessages: true
|
|
354
|
+
});
|
|
355
|
+
});
|
|
356
|
+
|
|
357
|
+
it('should enhance prompt with skill content', async () => {
|
|
358
|
+
// This test verifies the integration works but doesn't call LLM
|
|
359
|
+
// You'd need to mock the provider or use a test double
|
|
360
|
+
|
|
361
|
+
const message: ChatMessage = {
|
|
362
|
+
id: 'test-msg-1',
|
|
363
|
+
content: 'How do I create a workflow schema?',
|
|
364
|
+
timestamp: Date.now(),
|
|
365
|
+
discussionId: 'test-discussion',
|
|
366
|
+
userId: 'test-user',
|
|
367
|
+
userName: 'Test User',
|
|
368
|
+
workspaceId: 'test-workspace',
|
|
369
|
+
mentionedOrDirectMessagedBotId: 'test-agent-id'
|
|
370
|
+
};
|
|
371
|
+
|
|
372
|
+
// In a real test, you'd mock the provider and verify
|
|
373
|
+
// that it receives the skill content in the system prompt
|
|
374
|
+
// For now, just verify the system initializes
|
|
375
|
+
expect(mockClient).toBeTruthy();
|
|
376
|
+
});
|
|
377
|
+
});
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
### Testing Skill Impact on Responses
|
|
381
|
+
|
|
382
|
+
```typescript
|
|
383
|
+
describe('E2E: Skill Impact', () => {
|
|
384
|
+
// These tests require a real or mocked LLM provider
|
|
385
|
+
// They verify that skills actually improve agent responses
|
|
386
|
+
|
|
387
|
+
it('should provide better responses with skill loaded', async () => {
|
|
388
|
+
// Compare response quality with and without skill
|
|
389
|
+
// This is typically done manually or with LLM-as-judge
|
|
390
|
+
|
|
391
|
+
const messageWithSkill = 'explain workflow schemas';
|
|
392
|
+
// Response should include detailed workflow schema info
|
|
393
|
+
|
|
394
|
+
const messageWithoutSkill = 'explain xyz123abc';
|
|
395
|
+
// Response should be generic
|
|
396
|
+
|
|
397
|
+
// Manual verification or LLM-as-judge scoring
|
|
398
|
+
});
|
|
399
|
+
});
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
## Level 4: Performance Tests
|
|
403
|
+
|
|
404
|
+
### Load Time Benchmarks
|
|
405
|
+
|
|
406
|
+
```typescript
|
|
407
|
+
describe('Performance: Skill Loading', () => {
|
|
408
|
+
let loader: SkillLoader;
|
|
409
|
+
|
|
410
|
+
beforeAll(() => {
|
|
411
|
+
const skillsPath = path.join(__dirname, '../.claude/skills');
|
|
412
|
+
loader = new SkillLoader(skillsPath);
|
|
413
|
+
});
|
|
414
|
+
|
|
415
|
+
it('should load within performance budget', async () => {
|
|
416
|
+
const iterations = 10;
|
|
417
|
+
const times: number[] = [];
|
|
418
|
+
|
|
419
|
+
for (let i = 0; i < iterations; i++) {
|
|
420
|
+
loader.clearCache();
|
|
421
|
+
const start = performance.now();
|
|
422
|
+
await loader.load('my-skill');
|
|
423
|
+
times.push(performance.now() - start);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
const avgTime = times.reduce((a, b) => a + b) / times.length;
|
|
427
|
+
const maxTime = Math.max(...times);
|
|
428
|
+
|
|
429
|
+
console.log(`Average load time: ${avgTime.toFixed(2)}ms`);
|
|
430
|
+
console.log(`Max load time: ${maxTime.toFixed(2)}ms`);
|
|
431
|
+
|
|
432
|
+
expect(avgTime).toBeLessThan(100); // Should average < 100ms
|
|
433
|
+
expect(maxTime).toBeLessThan(150); // Should never exceed 150ms
|
|
434
|
+
});
|
|
435
|
+
|
|
436
|
+
it('should cache effectively', async () => {
|
|
437
|
+
await loader.load('my-skill');
|
|
438
|
+
|
|
439
|
+
const iterations = 100;
|
|
440
|
+
const start = performance.now();
|
|
441
|
+
|
|
442
|
+
for (let i = 0; i < iterations; i++) {
|
|
443
|
+
await loader.load('my-skill');
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
const totalTime = performance.now() - start;
|
|
447
|
+
const avgTime = totalTime / iterations;
|
|
448
|
+
|
|
449
|
+
console.log(`Cached load average: ${avgTime.toFixed(2)}ms`);
|
|
450
|
+
|
|
451
|
+
expect(avgTime).toBeLessThan(10); // Cached loads should be < 10ms
|
|
452
|
+
});
|
|
453
|
+
});
|
|
454
|
+
```
|
|
455
|
+
|
|
456
|
+
### Memory Usage
|
|
457
|
+
|
|
458
|
+
```typescript
|
|
459
|
+
describe('Performance: Memory Usage', () => {
|
|
460
|
+
it('should not leak memory', async () => {
|
|
461
|
+
const loader = new SkillLoader(skillsPath);
|
|
462
|
+
|
|
463
|
+
const initialMemory = process.memoryUsage().heapUsed;
|
|
464
|
+
|
|
465
|
+
// Load and clear many times
|
|
466
|
+
for (let i = 0; i < 100; i++) {
|
|
467
|
+
await loader.load('my-skill');
|
|
468
|
+
loader.clearCache();
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
// Force GC if available
|
|
472
|
+
if (global.gc) {
|
|
473
|
+
global.gc();
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
const finalMemory = process.memoryUsage().heapUsed;
|
|
477
|
+
const growth = finalMemory - initialMemory;
|
|
478
|
+
const growthMB = growth / 1024 / 1024;
|
|
479
|
+
|
|
480
|
+
console.log(`Memory growth: ${growthMB.toFixed(2)} MB`);
|
|
481
|
+
|
|
482
|
+
// Should not grow significantly
|
|
483
|
+
expect(growthMB).toBeLessThan(10);
|
|
484
|
+
});
|
|
485
|
+
});
|
|
486
|
+
```
|
|
487
|
+
|
|
488
|
+
## Level 5: Quality Review
|
|
489
|
+
|
|
490
|
+
### Manual Testing Checklist
|
|
491
|
+
|
|
492
|
+
```markdown
|
|
493
|
+
## Manual Testing for {Skill Name}
|
|
494
|
+
|
|
495
|
+
### Content Quality
|
|
496
|
+
- [ ] Read through entire skill content
|
|
497
|
+
- [ ] Verify all examples are accurate
|
|
498
|
+
- [ ] Check for typos and grammar
|
|
499
|
+
- [ ] Ensure logical flow and structure
|
|
500
|
+
- [ ] Verify technical accuracy
|
|
501
|
+
|
|
502
|
+
### Usability
|
|
503
|
+
- [ ] Test with 5 different user queries
|
|
504
|
+
- [ ] Verify skill triggers appropriately
|
|
505
|
+
- [ ] Check that content is helpful to LLM
|
|
506
|
+
- [ ] Ensure examples are clear
|
|
507
|
+
- [ ] Validate recommended tools
|
|
508
|
+
|
|
509
|
+
### Integration
|
|
510
|
+
- [ ] Test in development environment
|
|
511
|
+
- [ ] Verify works with OpenAI provider
|
|
512
|
+
- [ ] Verify works with Anthropic provider
|
|
513
|
+
- [ ] Check log messages are clear
|
|
514
|
+
- [ ] Ensure no errors in production
|
|
515
|
+
|
|
516
|
+
### Performance
|
|
517
|
+
- [ ] Measure first load time
|
|
518
|
+
- [ ] Measure cached load time
|
|
519
|
+
- [ ] Check skill size is reasonable
|
|
520
|
+
- [ ] Verify caching works
|
|
521
|
+
- [ ] No performance regressions
|
|
522
|
+
```
|
|
523
|
+
|
|
524
|
+
### User Acceptance Testing
|
|
525
|
+
|
|
526
|
+
```typescript
|
|
527
|
+
// Script to run UAT with real users
|
|
528
|
+
async function runUserAcceptanceTest() {
|
|
529
|
+
console.log('🧑🤝🧑 User Acceptance Testing\n');
|
|
530
|
+
|
|
531
|
+
const testQueries = [
|
|
532
|
+
'How do I work with workflow schemas?',
|
|
533
|
+
'Show me examples of using the API',
|
|
534
|
+
'Help me build an agent',
|
|
535
|
+
// Add more based on skill purpose
|
|
536
|
+
];
|
|
537
|
+
|
|
538
|
+
console.log('Test these queries with the agent:');
|
|
539
|
+
testQueries.forEach((q, i) => {
|
|
540
|
+
console.log(`${i + 1}. ${q}`);
|
|
541
|
+
});
|
|
542
|
+
|
|
543
|
+
console.log('\nFor each query, verify:');
|
|
544
|
+
console.log(' - Skill loads correctly');
|
|
545
|
+
console.log(' - Response is helpful');
|
|
546
|
+
console.log(' - Information is accurate');
|
|
547
|
+
console.log(' - Examples are clear');
|
|
548
|
+
console.log(' - User is satisfied\n');
|
|
549
|
+
}
|
|
550
|
+
```
|
|
551
|
+
|
|
552
|
+
## Continuous Improvement
|
|
553
|
+
|
|
554
|
+
### Monitoring in Production
|
|
555
|
+
|
|
556
|
+
```typescript
|
|
557
|
+
// Add to skill-loader.ts for production monitoring
|
|
558
|
+
private logSkillMetrics(skillName: string, loadTime: number, fromCache: boolean) {
|
|
559
|
+
this.logger.info('Skill loaded', {
|
|
560
|
+
skillName,
|
|
561
|
+
loadTime,
|
|
562
|
+
fromCache,
|
|
563
|
+
contentSize: this.cache.get(skillName)?.content.length || 0
|
|
564
|
+
});
|
|
565
|
+
|
|
566
|
+
// Send to monitoring service
|
|
567
|
+
if (process.env.NODE_ENV === 'production') {
|
|
568
|
+
metrics.timing('skill.load.time', loadTime, { skill: skillName, cached: fromCache });
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
```
|
|
572
|
+
|
|
573
|
+
### Feedback Collection
|
|
574
|
+
|
|
575
|
+
```typescript
|
|
576
|
+
// Collect feedback on skill effectiveness
|
|
577
|
+
interface SkillFeedback {
|
|
578
|
+
skillName: string;
|
|
579
|
+
userQuery: string;
|
|
580
|
+
wasHelpful: boolean;
|
|
581
|
+
comments?: string;
|
|
582
|
+
timestamp: number;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
function logSkillFeedback(feedback: SkillFeedback) {
|
|
586
|
+
// Store feedback for analysis
|
|
587
|
+
console.log('Skill Feedback:', feedback);
|
|
588
|
+
// Send to analytics service
|
|
589
|
+
}
|
|
590
|
+
```
|
|
591
|
+
|
|
592
|
+
## Summary
|
|
593
|
+
|
|
594
|
+
Follow this testing workflow for every skill:
|
|
595
|
+
|
|
596
|
+
1. **Unit Tests**: Structure and content validation
|
|
597
|
+
2. **Integration Tests**: Works with skill system components
|
|
598
|
+
3. **E2E Tests**: Works in full agent context
|
|
599
|
+
4. **Performance Tests**: Meets load time and memory budgets
|
|
600
|
+
5. **Manual Review**: Content quality and usability
|
|
601
|
+
6. **UAT**: Real users validate effectiveness
|
|
602
|
+
7. **Production Monitoring**: Track performance and issues
|
|
603
|
+
8. **Continuous Improvement**: Iterate based on feedback
|