@hailer/mcp 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (163) hide show
  1. package/.claude/commands/tool-builder.md +37 -0
  2. package/.claude/commands/ws-pull.md +44 -0
  3. package/.claude/settings.json +8 -0
  4. package/.claude/settings.local.json +49 -0
  5. package/.claude/skills/activity-api/SKILL.md +96 -0
  6. package/.claude/skills/activity-api/references/activity-endpoints.md +845 -0
  7. package/.claude/skills/add-app-member-skill/SKILL.md +977 -0
  8. package/.claude/skills/agent-building/SKILL.md +243 -0
  9. package/.claude/skills/agent-building/references/architecture-patterns.md +446 -0
  10. package/.claude/skills/agent-building/references/code-examples.md +587 -0
  11. package/.claude/skills/agent-building/references/implementation-guide.md +619 -0
  12. package/.claude/skills/app-api/SKILL.md +219 -0
  13. package/.claude/skills/app-api/references/app-endpoints.md +759 -0
  14. package/.claude/skills/building-hailer-apps-skill/SKILL.md +548 -0
  15. package/.claude/skills/create-app-skill/SKILL.md +1101 -0
  16. package/.claude/skills/create-insight-skill/SKILL.md +1317 -0
  17. package/.claude/skills/get-insight-data-skill/SKILL.md +1053 -0
  18. package/.claude/skills/hailer-api/SKILL.md +283 -0
  19. package/.claude/skills/hailer-api/references/activities.md +620 -0
  20. package/.claude/skills/hailer-api/references/authentication.md +216 -0
  21. package/.claude/skills/hailer-api/references/datasets.md +437 -0
  22. package/.claude/skills/hailer-api/references/files.md +301 -0
  23. package/.claude/skills/hailer-api/references/insights.md +469 -0
  24. package/.claude/skills/hailer-api/references/workflows.md +720 -0
  25. package/.claude/skills/hailer-api/references/workspaces-users.md +445 -0
  26. package/.claude/skills/insight-api/SKILL.md +185 -0
  27. package/.claude/skills/insight-api/references/insight-endpoints.md +514 -0
  28. package/.claude/skills/install-workflow-skill/SKILL.md +1056 -0
  29. package/.claude/skills/list-apps-skill/SKILL.md +1010 -0
  30. package/.claude/skills/list-workflows-minimal-skill/SKILL.md +992 -0
  31. package/.claude/skills/local-first-skill/SKILL.md +570 -0
  32. package/.claude/skills/mcp-tools/SKILL.md +419 -0
  33. package/.claude/skills/mcp-tools/references/api-endpoints.md +499 -0
  34. package/.claude/skills/mcp-tools/references/data-structures.md +554 -0
  35. package/.claude/skills/mcp-tools/references/implementation-patterns.md +717 -0
  36. package/.claude/skills/preview-insight-skill/SKILL.md +1290 -0
  37. package/.claude/skills/publish-hailer-app-skill/SKILL.md +453 -0
  38. package/.claude/skills/remove-app-member-skill/SKILL.md +671 -0
  39. package/.claude/skills/remove-app-skill/SKILL.md +985 -0
  40. package/.claude/skills/remove-insight-skill/SKILL.md +1011 -0
  41. package/.claude/skills/remove-workflow-skill/SKILL.md +920 -0
  42. package/.claude/skills/scaffold-hailer-app-skill/SKILL.md +1034 -0
  43. package/.claude/skills/skill-testing/README.md +137 -0
  44. package/.claude/skills/skill-testing/SKILL.md +348 -0
  45. package/.claude/skills/skill-testing/references/test-patterns.md +705 -0
  46. package/.claude/skills/skill-testing/references/testing-guide.md +603 -0
  47. package/.claude/skills/skill-testing/references/validation-checklist.md +537 -0
  48. package/.claude/skills/tool-builder/SKILL.md +328 -0
  49. package/.claude/skills/update-app-skill/SKILL.md +970 -0
  50. package/.claude/skills/update-workflow-field-skill/SKILL.md +1098 -0
  51. package/.env.example +81 -0
  52. package/.mcp.json +13 -0
  53. package/README.md +297 -0
  54. package/dist/app.d.ts +4 -0
  55. package/dist/app.js +74 -0
  56. package/dist/cli.d.ts +3 -0
  57. package/dist/cli.js +5 -0
  58. package/dist/client/adaptive-documentation-bot.d.ts +108 -0
  59. package/dist/client/adaptive-documentation-bot.js +475 -0
  60. package/dist/client/adaptive-documentation-types.d.ts +66 -0
  61. package/dist/client/adaptive-documentation-types.js +9 -0
  62. package/dist/client/agent-activity-bot.d.ts +51 -0
  63. package/dist/client/agent-activity-bot.js +166 -0
  64. package/dist/client/agent-tracker.d.ts +499 -0
  65. package/dist/client/agent-tracker.js +659 -0
  66. package/dist/client/description-updater.d.ts +56 -0
  67. package/dist/client/description-updater.js +259 -0
  68. package/dist/client/log-parser.d.ts +72 -0
  69. package/dist/client/log-parser.js +387 -0
  70. package/dist/client/mcp-client.d.ts +50 -0
  71. package/dist/client/mcp-client.js +532 -0
  72. package/dist/client/message-processor.d.ts +35 -0
  73. package/dist/client/message-processor.js +352 -0
  74. package/dist/client/multi-bot-manager.d.ts +24 -0
  75. package/dist/client/multi-bot-manager.js +74 -0
  76. package/dist/client/providers/anthropic-provider.d.ts +19 -0
  77. package/dist/client/providers/anthropic-provider.js +631 -0
  78. package/dist/client/providers/llm-provider.d.ts +47 -0
  79. package/dist/client/providers/llm-provider.js +367 -0
  80. package/dist/client/providers/openai-provider.d.ts +23 -0
  81. package/dist/client/providers/openai-provider.js +621 -0
  82. package/dist/client/simple-llm-caller.d.ts +19 -0
  83. package/dist/client/simple-llm-caller.js +100 -0
  84. package/dist/client/skill-generator.d.ts +81 -0
  85. package/dist/client/skill-generator.js +386 -0
  86. package/dist/client/test-adaptive-bot.d.ts +9 -0
  87. package/dist/client/test-adaptive-bot.js +82 -0
  88. package/dist/client/token-pricing.d.ts +38 -0
  89. package/dist/client/token-pricing.js +127 -0
  90. package/dist/client/token-tracker.d.ts +232 -0
  91. package/dist/client/token-tracker.js +457 -0
  92. package/dist/client/token-usage-bot.d.ts +53 -0
  93. package/dist/client/token-usage-bot.js +153 -0
  94. package/dist/client/tool-executor.d.ts +69 -0
  95. package/dist/client/tool-executor.js +159 -0
  96. package/dist/client/tool-schema-loader.d.ts +60 -0
  97. package/dist/client/tool-schema-loader.js +178 -0
  98. package/dist/client/types.d.ts +69 -0
  99. package/dist/client/types.js +7 -0
  100. package/dist/config.d.ts +162 -0
  101. package/dist/config.js +296 -0
  102. package/dist/core.d.ts +26 -0
  103. package/dist/core.js +147 -0
  104. package/dist/lib/context-manager.d.ts +111 -0
  105. package/dist/lib/context-manager.js +431 -0
  106. package/dist/lib/logger.d.ts +74 -0
  107. package/dist/lib/logger.js +277 -0
  108. package/dist/lib/materialize.d.ts +3 -0
  109. package/dist/lib/materialize.js +101 -0
  110. package/dist/lib/normalizedName.d.ts +7 -0
  111. package/dist/lib/normalizedName.js +48 -0
  112. package/dist/lib/prompt-length-manager.d.ts +81 -0
  113. package/dist/lib/prompt-length-manager.js +457 -0
  114. package/dist/lib/terminal-prompt.d.ts +9 -0
  115. package/dist/lib/terminal-prompt.js +108 -0
  116. package/dist/mcp/UserContextCache.d.ts +56 -0
  117. package/dist/mcp/UserContextCache.js +163 -0
  118. package/dist/mcp/auth.d.ts +2 -0
  119. package/dist/mcp/auth.js +29 -0
  120. package/dist/mcp/hailer-clients.d.ts +42 -0
  121. package/dist/mcp/hailer-clients.js +246 -0
  122. package/dist/mcp/signal-handler.d.ts +45 -0
  123. package/dist/mcp/signal-handler.js +317 -0
  124. package/dist/mcp/tool-registry.d.ts +100 -0
  125. package/dist/mcp/tool-registry.js +306 -0
  126. package/dist/mcp/tools/activity.d.ts +15 -0
  127. package/dist/mcp/tools/activity.js +955 -0
  128. package/dist/mcp/tools/app.d.ts +20 -0
  129. package/dist/mcp/tools/app.js +1488 -0
  130. package/dist/mcp/tools/discussion.d.ts +19 -0
  131. package/dist/mcp/tools/discussion.js +950 -0
  132. package/dist/mcp/tools/file.d.ts +15 -0
  133. package/dist/mcp/tools/file.js +119 -0
  134. package/dist/mcp/tools/insight.d.ts +17 -0
  135. package/dist/mcp/tools/insight.js +806 -0
  136. package/dist/mcp/tools/skill.d.ts +10 -0
  137. package/dist/mcp/tools/skill.js +279 -0
  138. package/dist/mcp/tools/user.d.ts +10 -0
  139. package/dist/mcp/tools/user.js +108 -0
  140. package/dist/mcp/tools/workflow-template.d.ts +19 -0
  141. package/dist/mcp/tools/workflow-template.js +822 -0
  142. package/dist/mcp/tools/workflow.d.ts +18 -0
  143. package/dist/mcp/tools/workflow.js +1362 -0
  144. package/dist/mcp/utils/api-errors.d.ts +45 -0
  145. package/dist/mcp/utils/api-errors.js +160 -0
  146. package/dist/mcp/utils/data-transformers.d.ts +102 -0
  147. package/dist/mcp/utils/data-transformers.js +194 -0
  148. package/dist/mcp/utils/file-upload.d.ts +33 -0
  149. package/dist/mcp/utils/file-upload.js +148 -0
  150. package/dist/mcp/utils/hailer-api-client.d.ts +120 -0
  151. package/dist/mcp/utils/hailer-api-client.js +323 -0
  152. package/dist/mcp/utils/index.d.ts +13 -0
  153. package/dist/mcp/utils/index.js +39 -0
  154. package/dist/mcp/utils/logger.d.ts +42 -0
  155. package/dist/mcp/utils/logger.js +103 -0
  156. package/dist/mcp/utils/types.d.ts +286 -0
  157. package/dist/mcp/utils/types.js +7 -0
  158. package/dist/mcp/workspace-cache.d.ts +42 -0
  159. package/dist/mcp/workspace-cache.js +97 -0
  160. package/dist/mcp-server.d.ts +42 -0
  161. package/dist/mcp-server.js +280 -0
  162. package/package.json +56 -0
  163. package/tsconfig.json +23 -0
@@ -0,0 +1,603 @@
1
+ # Comprehensive Testing Guide
2
+
3
+ Step-by-step guide for testing skills at all levels: unit, integration, and end-to-end.
4
+
5
+ ## Testing Workflow
6
+
7
+ ```
8
+ ┌──────────────────────────────────────────────────────┐
9
+ │ 1. Create Skill │
10
+ │ - Write SKILL.md │
11
+ │ - Add references │
12
+ │ - Define keywords │
13
+ └──────────────────────────────────────────────────────┘
14
+
15
+ ┌──────────────────────────────────────────────────────┐
16
+ │ 2. Unit Tests (Isolated) │
17
+ │ - Test file structure │
18
+ │ - Test content validity │
19
+ │ - Test loading mechanism │
20
+ └──────────────────────────────────────────────────────┘
21
+
22
+ ┌──────────────────────────────────────────────────────┐
23
+ │ 3. Integration Tests (Skill System) │
24
+ │ - Test with SkillLoader │
25
+ │ - Test with SkillManager │
26
+ │ - Test keyword matching │
27
+ │ - Test caching behavior │
28
+ └──────────────────────────────────────────────────────┘
29
+
30
+ ┌──────────────────────────────────────────────────────┐
31
+ │ 4. E2E Tests (Full Agent) │
32
+ │ - Test with real messages │
33
+ │ - Test with LLM providers │
34
+ │ - Test prompt enhancement │
35
+ │ - Validate agent responses │
36
+ └──────────────────────────────────────────────────────┘
37
+
38
+ ┌──────────────────────────────────────────────────────┐
39
+ │ 5. Performance Tests │
40
+ │ - Measure load times │
41
+ │ - Measure cache efficiency │
42
+ │ - Test under load │
43
+ └──────────────────────────────────────────────────────┘
44
+
45
+ ┌──────────────────────────────────────────────────────┐
46
+ │ 6. Quality Review │
47
+ │ - Manual content review │
48
+ │ - Test with real users │
49
+ │ - Gather feedback │
50
+ └──────────────────────────────────────────────────────┘
51
+ ```
52
+
53
+ ## Level 1: Unit Tests
54
+
55
+ ### Testing File Structure
56
+
57
+ ```typescript
58
+ import * as fs from 'fs/promises';
59
+ import * as path from 'path';
60
+
61
+ describe('Skill Structure', () => {
62
+ const skillPath = path.join(__dirname, '../../.claude/skills/my-skill');
63
+
64
+ it('should have SKILL.md in root', async () => {
65
+ const skillMd = path.join(skillPath, 'SKILL.md');
66
+ const exists = await fs.stat(skillMd)
67
+ .then(() => true)
68
+ .catch(() => false);
69
+ expect(exists).toBe(true);
70
+ });
71
+
72
+ it('should have references directory (optional)', async () => {
73
+ const refsPath = path.join(skillPath, 'references');
74
+ const exists = await fs.stat(refsPath)
75
+ .then(() => true)
76
+ .catch(() => false);
77
+ // Optional, so just log if missing
78
+ if (!exists) {
79
+ console.log('Note: No references directory found');
80
+ }
81
+ });
82
+
83
+ it('should contain valid markdown', async () => {
84
+ const skillMd = path.join(skillPath, 'SKILL.md');
85
+ const content = await fs.readFile(skillMd, 'utf-8');
86
+
87
+ // Basic markdown validation
88
+ expect(content.length).toBeGreaterThan(100);
89
+ expect(content).toMatch(/^#\s+/m); // Has at least one heading
90
+ });
91
+ });
92
+ ```
93
+
94
+ ### Testing Content Quality
95
+
96
+ ```typescript
97
+ describe('Skill Content Quality', () => {
98
+ let skillContent: string;
99
+
100
+ beforeAll(async () => {
101
+ const skillMd = path.join(__dirname, '../../.claude/skills/my-skill/SKILL.md');
102
+ skillContent = await fs.readFile(skillMd, 'utf-8');
103
+ });
104
+
105
+ it('should have clear title', () => {
106
+ const titleMatch = skillContent.match(/^#\s+(.+)$/m);
107
+ expect(titleMatch).toBeTruthy();
108
+ expect(titleMatch![1].length).toBeGreaterThan(3);
109
+ });
110
+
111
+ it('should have overview section', () => {
112
+ expect(skillContent.toLowerCase()).toMatch(/overview|introduction|about/);
113
+ });
114
+
115
+ it('should contain examples', () => {
116
+ expect(skillContent).toMatch(/```/); // Has code blocks
117
+ });
118
+
119
+ it('should not be too short', () => {
120
+ expect(skillContent.length).toBeGreaterThan(500);
121
+ });
122
+
123
+ it('should not be excessively long', () => {
124
+ expect(skillContent.length).toBeLessThan(100000); // 100KB
125
+ });
126
+
127
+ it('should have actionable content', () => {
128
+ const actionWords = ['how to', 'step', 'example', 'guide', 'use'];
129
+ const hasActionWords = actionWords.some(word =>
130
+ skillContent.toLowerCase().includes(word)
131
+ );
132
+ expect(hasActionWords).toBe(true);
133
+ });
134
+ });
135
+ ```
136
+
137
+ ### Testing Reference Files
138
+
139
+ ```typescript
140
+ describe('Skill References', () => {
141
+ const refsPath = path.join(__dirname, '../../.claude/skills/my-skill/references');
142
+
143
+ it('should load all reference files', async () => {
144
+ const files = await fs.readdir(refsPath).catch(() => []);
145
+ const mdFiles = files.filter(f => f.endsWith('.md'));
146
+
147
+ for (const file of mdFiles) {
148
+ const content = await fs.readFile(path.join(refsPath, file), 'utf-8');
149
+ expect(content.length).toBeGreaterThan(0);
150
+ console.log(` ✅ Loaded ${file} (${content.length} chars)`);
151
+ }
152
+ });
153
+ });
154
+ ```
155
+
156
+ ## Level 2: Integration Tests
157
+
158
+ ### Testing with SkillLoader
159
+
160
+ ```typescript
161
+ import { SkillLoader } from '../src/client/skill-loader';
162
+ import * as path from 'path';
163
+
164
+ describe('SkillLoader Integration', () => {
165
+ let loader: SkillLoader;
166
+
167
+ beforeAll(() => {
168
+ const skillsPath = path.join(__dirname, '../.claude/skills');
169
+ loader = new SkillLoader(skillsPath);
170
+ });
171
+
172
+ afterEach(() => {
173
+ loader.clearCache();
174
+ });
175
+
176
+ it('should load skill successfully', async () => {
177
+ const skill = await loader.load('my-skill');
178
+
179
+ expect(skill.name).toBe('my-skill');
180
+ expect(skill.content).toBeTruthy();
181
+ expect(skill.description).toBeTruthy();
182
+ expect(skill.loadedAt).toBeGreaterThan(0);
183
+ });
184
+
185
+ it('should include references in content', async () => {
186
+ const skill = await loader.load('my-skill');
187
+
188
+ // References are appended to main content
189
+ // If references exist, content should be larger
190
+ expect(skill.content.length).toBeGreaterThan(500);
191
+ });
192
+
193
+ it('should cache skills after first load', async () => {
194
+ // First load
195
+ const start1 = performance.now();
196
+ await loader.load('my-skill');
197
+ const firstLoadTime = performance.now() - start1;
198
+
199
+ // Second load (cached)
200
+ const start2 = performance.now();
201
+ await loader.load('my-skill');
202
+ const cachedLoadTime = performance.now() - start2;
203
+
204
+ console.log(`First load: ${firstLoadTime.toFixed(2)}ms`);
205
+ console.log(`Cached load: ${cachedLoadTime.toFixed(2)}ms`);
206
+
207
+ expect(cachedLoadTime).toBeLessThan(firstLoadTime / 5);
208
+ });
209
+
210
+ it('should handle missing skill gracefully', async () => {
211
+ await expect(
212
+ loader.load('non-existent-skill')
213
+ ).rejects.toThrow(/Failed to load skill/);
214
+ });
215
+
216
+ it('should clear cache correctly', async () => {
217
+ await loader.load('my-skill');
218
+ loader.clearCache('my-skill');
219
+
220
+ // Next load should be from disk again
221
+ const start = performance.now();
222
+ await loader.load('my-skill');
223
+ const loadTime = performance.now() - start;
224
+
225
+ // Should take longer than cached load
226
+ expect(loadTime).toBeGreaterThan(1);
227
+ });
228
+ });
229
+ ```
230
+
231
+ ### Testing with SkillManager
232
+
233
+ ```typescript
234
+ import { SkillManager } from '../src/client/skill-manager';
235
+ import { SkillLoader } from '../src/client/skill-loader';
236
+
237
+ describe('SkillManager Integration', () => {
238
+ let manager: SkillManager;
239
+ let loader: SkillLoader;
240
+
241
+ beforeAll(() => {
242
+ const skillsPath = path.join(__dirname, '../.claude/skills');
243
+ loader = new SkillLoader(skillsPath);
244
+ manager = new SkillManager(loader);
245
+ });
246
+
247
+ describe('Keyword Matching', () => {
248
+ it('should match skill by primary keywords', async () => {
249
+ const testCases = [
250
+ {
251
+ message: 'How do I create a workflow?',
252
+ expectedSkill: 'mcp-tools',
253
+ minConfidence: 0.5
254
+ },
255
+ {
256
+ message: 'List all activities in the workspace',
257
+ expectedSkill: 'hailer-api',
258
+ minConfidence: 0.5
259
+ },
260
+ {
261
+ message: 'Build a new agent system',
262
+ expectedSkill: 'agent-building',
263
+ minConfidence: 0.5
264
+ },
265
+ ];
266
+
267
+ for (const { message, expectedSkill, minConfidence } of testCases) {
268
+ const guidance = await manager.analyzeRequest(message);
269
+
270
+ expect(guidance.skills).toContain(expectedSkill);
271
+ expect(guidance.confidence).toBeGreaterThanOrEqual(minConfidence);
272
+
273
+ console.log(` ✅ "${message}" → ${expectedSkill} (${(guidance.confidence * 100).toFixed(0)}%)`);
274
+ }
275
+ });
276
+
277
+ it('should handle messages with no skill match', async () => {
278
+ const guidance = await manager.analyzeRequest('Hello world xyz123');
279
+
280
+ expect(guidance.skills).toHaveLength(0);
281
+ expect(guidance.confidence).toBeLessThan(0.5);
282
+ expect(guidance.guidance).toContain('General assistance');
283
+ });
284
+
285
+ it('should return skill content', async () => {
286
+ const guidance = await manager.analyzeRequest('create workflow');
287
+
288
+ expect(guidance.skillContent).toBeTruthy();
289
+ expect(guidance.skillContent!.length).toBeGreaterThan(1000);
290
+ });
291
+
292
+ it('should recommend relevant tools', async () => {
293
+ const guidance = await manager.analyzeRequest('workflow schema');
294
+
295
+ expect(guidance.recommendedTools).toBeTruthy();
296
+ expect(guidance.recommendedTools.length).toBeGreaterThan(0);
297
+ });
298
+ });
299
+
300
+ describe('Multiple Keyword Matches', () => {
301
+ it('should prioritize highest scoring skill', async () => {
302
+ // Message with multiple potential matches
303
+ const guidance = await manager.analyzeRequest(
304
+ 'workflow activity field schema'
305
+ );
306
+
307
+ // Should pick one skill, not multiple
308
+ expect(guidance.skills.length).toBeGreaterThan(0);
309
+ expect(guidance.skills.length).toBeLessThanOrEqual(2);
310
+ });
311
+ });
312
+
313
+ describe('Edge Cases', () => {
314
+ it('should handle empty message', async () => {
315
+ const guidance = await manager.analyzeRequest('');
316
+ expect(guidance.skills).toHaveLength(0);
317
+ });
318
+
319
+ it('should handle very long message', async () => {
320
+ const longMessage = 'workflow '.repeat(100);
321
+ const guidance = await manager.analyzeRequest(longMessage);
322
+ expect(guidance.skills.length).toBeGreaterThan(0);
323
+ });
324
+
325
+ it('should handle special characters', async () => {
326
+ const guidance = await manager.analyzeRequest('workflow !@#$%^&*()');
327
+ expect(guidance.skills.length).toBeGreaterThan(0);
328
+ });
329
+ });
330
+ });
331
+ ```
332
+
333
+ ## Level 3: End-to-End Tests
334
+
335
+ ### Testing with Mock LLM Provider
336
+
337
+ ```typescript
338
+ import { McpClient } from '../src/client/mcp-client';
339
+ import { ChatMessage } from '../src/client/types';
340
+
341
+ describe('E2E: Skill System with Agent', () => {
342
+ let mockClient: McpClient;
343
+
344
+ beforeAll(() => {
345
+ // Setup mock MCP client with test config
346
+ mockClient = new McpClient({
347
+ enabled: true,
348
+ mcpServerUrl: 'http://localhost:3030/api/mcp',
349
+ mcpServerApiKey: 'test-key',
350
+ providers: [], // No actual providers needed for this test
351
+ mcpAgentIds: ['test-agent-id'],
352
+ botConfigs: [],
353
+ enableDirectMessages: true
354
+ });
355
+ });
356
+
357
+ it('should enhance prompt with skill content', async () => {
358
+ // This test verifies the integration works but doesn't call LLM
359
+ // You'd need to mock the provider or use a test double
360
+
361
+ const message: ChatMessage = {
362
+ id: 'test-msg-1',
363
+ content: 'How do I create a workflow schema?',
364
+ timestamp: Date.now(),
365
+ discussionId: 'test-discussion',
366
+ userId: 'test-user',
367
+ userName: 'Test User',
368
+ workspaceId: 'test-workspace',
369
+ mentionedOrDirectMessagedBotId: 'test-agent-id'
370
+ };
371
+
372
+ // In a real test, you'd mock the provider and verify
373
+ // that it receives the skill content in the system prompt
374
+ // For now, just verify the system initializes
375
+ expect(mockClient).toBeTruthy();
376
+ });
377
+ });
378
+ ```
379
+
380
+ ### Testing Skill Impact on Responses
381
+
382
+ ```typescript
383
+ describe('E2E: Skill Impact', () => {
384
+ // These tests require a real or mocked LLM provider
385
+ // They verify that skills actually improve agent responses
386
+
387
+ it('should provide better responses with skill loaded', async () => {
388
+ // Compare response quality with and without skill
389
+ // This is typically done manually or with LLM-as-judge
390
+
391
+ const messageWithSkill = 'explain workflow schemas';
392
+ // Response should include detailed workflow schema info
393
+
394
+ const messageWithoutSkill = 'explain xyz123abc';
395
+ // Response should be generic
396
+
397
+ // Manual verification or LLM-as-judge scoring
398
+ });
399
+ });
400
+ ```
401
+
402
+ ## Level 4: Performance Tests
403
+
404
+ ### Load Time Benchmarks
405
+
406
+ ```typescript
407
+ describe('Performance: Skill Loading', () => {
408
+ let loader: SkillLoader;
409
+
410
+ beforeAll(() => {
411
+ const skillsPath = path.join(__dirname, '../.claude/skills');
412
+ loader = new SkillLoader(skillsPath);
413
+ });
414
+
415
+ it('should load within performance budget', async () => {
416
+ const iterations = 10;
417
+ const times: number[] = [];
418
+
419
+ for (let i = 0; i < iterations; i++) {
420
+ loader.clearCache();
421
+ const start = performance.now();
422
+ await loader.load('my-skill');
423
+ times.push(performance.now() - start);
424
+ }
425
+
426
+ const avgTime = times.reduce((a, b) => a + b) / times.length;
427
+ const maxTime = Math.max(...times);
428
+
429
+ console.log(`Average load time: ${avgTime.toFixed(2)}ms`);
430
+ console.log(`Max load time: ${maxTime.toFixed(2)}ms`);
431
+
432
+ expect(avgTime).toBeLessThan(100); // Should average < 100ms
433
+ expect(maxTime).toBeLessThan(150); // Should never exceed 150ms
434
+ });
435
+
436
+ it('should cache effectively', async () => {
437
+ await loader.load('my-skill');
438
+
439
+ const iterations = 100;
440
+ const start = performance.now();
441
+
442
+ for (let i = 0; i < iterations; i++) {
443
+ await loader.load('my-skill');
444
+ }
445
+
446
+ const totalTime = performance.now() - start;
447
+ const avgTime = totalTime / iterations;
448
+
449
+ console.log(`Cached load average: ${avgTime.toFixed(2)}ms`);
450
+
451
+ expect(avgTime).toBeLessThan(10); // Cached loads should be < 10ms
452
+ });
453
+ });
454
+ ```
455
+
456
+ ### Memory Usage
457
+
458
+ ```typescript
459
+ describe('Performance: Memory Usage', () => {
460
+ it('should not leak memory', async () => {
461
+ const loader = new SkillLoader(skillsPath);
462
+
463
+ const initialMemory = process.memoryUsage().heapUsed;
464
+
465
+ // Load and clear many times
466
+ for (let i = 0; i < 100; i++) {
467
+ await loader.load('my-skill');
468
+ loader.clearCache();
469
+ }
470
+
471
+ // Force GC if available
472
+ if (global.gc) {
473
+ global.gc();
474
+ }
475
+
476
+ const finalMemory = process.memoryUsage().heapUsed;
477
+ const growth = finalMemory - initialMemory;
478
+ const growthMB = growth / 1024 / 1024;
479
+
480
+ console.log(`Memory growth: ${growthMB.toFixed(2)} MB`);
481
+
482
+ // Should not grow significantly
483
+ expect(growthMB).toBeLessThan(10);
484
+ });
485
+ });
486
+ ```
487
+
488
+ ## Level 5: Quality Review
489
+
490
+ ### Manual Testing Checklist
491
+
492
+ ```markdown
493
+ ## Manual Testing for {Skill Name}
494
+
495
+ ### Content Quality
496
+ - [ ] Read through entire skill content
497
+ - [ ] Verify all examples are accurate
498
+ - [ ] Check for typos and grammar
499
+ - [ ] Ensure logical flow and structure
500
+ - [ ] Verify technical accuracy
501
+
502
+ ### Usability
503
+ - [ ] Test with 5 different user queries
504
+ - [ ] Verify skill triggers appropriately
505
+ - [ ] Check that content is helpful to LLM
506
+ - [ ] Ensure examples are clear
507
+ - [ ] Validate recommended tools
508
+
509
+ ### Integration
510
+ - [ ] Test in development environment
511
+ - [ ] Verify works with OpenAI provider
512
+ - [ ] Verify works with Anthropic provider
513
+ - [ ] Check log messages are clear
514
+ - [ ] Ensure no errors in production
515
+
516
+ ### Performance
517
+ - [ ] Measure first load time
518
+ - [ ] Measure cached load time
519
+ - [ ] Check skill size is reasonable
520
+ - [ ] Verify caching works
521
+ - [ ] No performance regressions
522
+ ```
523
+
524
+ ### User Acceptance Testing
525
+
526
+ ```typescript
527
+ // Script to run UAT with real users
528
+ async function runUserAcceptanceTest() {
529
+ console.log('🧑‍🤝‍🧑 User Acceptance Testing\n');
530
+
531
+ const testQueries = [
532
+ 'How do I work with workflow schemas?',
533
+ 'Show me examples of using the API',
534
+ 'Help me build an agent',
535
+ // Add more based on skill purpose
536
+ ];
537
+
538
+ console.log('Test these queries with the agent:');
539
+ testQueries.forEach((q, i) => {
540
+ console.log(`${i + 1}. ${q}`);
541
+ });
542
+
543
+ console.log('\nFor each query, verify:');
544
+ console.log(' - Skill loads correctly');
545
+ console.log(' - Response is helpful');
546
+ console.log(' - Information is accurate');
547
+ console.log(' - Examples are clear');
548
+ console.log(' - User is satisfied\n');
549
+ }
550
+ ```
551
+
552
+ ## Continuous Improvement
553
+
554
+ ### Monitoring in Production
555
+
556
+ ```typescript
557
+ // Add to skill-loader.ts for production monitoring
558
+ private logSkillMetrics(skillName: string, loadTime: number, fromCache: boolean) {
559
+ this.logger.info('Skill loaded', {
560
+ skillName,
561
+ loadTime,
562
+ fromCache,
563
+ contentSize: this.cache.get(skillName)?.content.length || 0
564
+ });
565
+
566
+ // Send to monitoring service
567
+ if (process.env.NODE_ENV === 'production') {
568
+ metrics.timing('skill.load.time', loadTime, { skill: skillName, cached: fromCache });
569
+ }
570
+ }
571
+ ```
572
+
573
+ ### Feedback Collection
574
+
575
+ ```typescript
576
+ // Collect feedback on skill effectiveness
577
+ interface SkillFeedback {
578
+ skillName: string;
579
+ userQuery: string;
580
+ wasHelpful: boolean;
581
+ comments?: string;
582
+ timestamp: number;
583
+ }
584
+
585
+ function logSkillFeedback(feedback: SkillFeedback) {
586
+ // Store feedback for analysis
587
+ console.log('Skill Feedback:', feedback);
588
+ // Send to analytics service
589
+ }
590
+ ```
591
+
592
+ ## Summary
593
+
594
+ Follow this testing workflow for every skill:
595
+
596
+ 1. **Unit Tests**: Structure and content validation
597
+ 2. **Integration Tests**: Works with skill system components
598
+ 3. **E2E Tests**: Works in full agent context
599
+ 4. **Performance Tests**: Meets load time and memory budgets
600
+ 5. **Manual Review**: Content quality and usability
601
+ 6. **UAT**: Real users validate effectiveness
602
+ 7. **Production Monitoring**: Track performance and issues
603
+ 8. **Continuous Improvement**: Iterate based on feedback