@bluecopa/harness 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/README.md +212 -117
  2. package/dist/arc/index.d.ts +796 -0
  3. package/dist/arc/index.js +2863 -0
  4. package/dist/arc/index.js.map +1 -0
  5. package/dist/observability/otel.d.ts +36 -0
  6. package/dist/observability/otel.js +73 -0
  7. package/dist/observability/otel.js.map +1 -0
  8. package/dist/shared-types-DRxnerLT.d.ts +138 -0
  9. package/dist/skills/index.d.ts +67 -0
  10. package/dist/skills/index.js +282 -0
  11. package/dist/skills/index.js.map +1 -0
  12. package/package.json +26 -2
  13. package/AGENTS.md +0 -18
  14. package/docs/guides/observability.md +0 -32
  15. package/docs/guides/providers.md +0 -51
  16. package/docs/guides/skills.md +0 -25
  17. package/docs/security/skill-sandbox-threat-model.md +0 -20
  18. package/src/agent/create-agent.ts +0 -884
  19. package/src/agent/create-tools.ts +0 -33
  20. package/src/agent/step-executor.ts +0 -15
  21. package/src/agent/types.ts +0 -57
  22. package/src/context/llm-compaction-strategy.ts +0 -37
  23. package/src/context/prepare-step.ts +0 -65
  24. package/src/context/token-tracker.ts +0 -26
  25. package/src/extracted/manifest.json +0 -10
  26. package/src/extracted/prompts/compaction.md +0 -5
  27. package/src/extracted/prompts/system.md +0 -5
  28. package/src/extracted/tools.json +0 -82
  29. package/src/hooks/hook-runner.ts +0 -22
  30. package/src/hooks/tool-wrappers.ts +0 -64
  31. package/src/interfaces/compaction-strategy.ts +0 -18
  32. package/src/interfaces/hooks.ts +0 -24
  33. package/src/interfaces/sandbox-provider.ts +0 -29
  34. package/src/interfaces/session-store.ts +0 -48
  35. package/src/interfaces/tool-provider.ts +0 -70
  36. package/src/loop/bridge.ts +0 -363
  37. package/src/loop/context-store.ts +0 -207
  38. package/src/loop/lcm-tool-loop.ts +0 -163
  39. package/src/loop/vercel-agent-loop.ts +0 -279
  40. package/src/observability/context.ts +0 -17
  41. package/src/observability/metrics.ts +0 -27
  42. package/src/observability/otel.ts +0 -105
  43. package/src/observability/tracing.ts +0 -13
  44. package/src/optimization/agent-evaluator.ts +0 -40
  45. package/src/optimization/config-serializer.ts +0 -16
  46. package/src/optimization/optimization-runner.ts +0 -39
  47. package/src/optimization/trace-collector.ts +0 -33
  48. package/src/permissions/permission-manager.ts +0 -34
  49. package/src/providers/composite-tool-provider.ts +0 -72
  50. package/src/providers/control-plane-e2b-executor.ts +0 -218
  51. package/src/providers/e2b-tool-provider.ts +0 -68
  52. package/src/providers/local-tool-provider.ts +0 -190
  53. package/src/providers/skill-sandbox-provider.ts +0 -46
  54. package/src/sessions/file-session-store.ts +0 -61
  55. package/src/sessions/in-memory-session-store.ts +0 -39
  56. package/src/sessions/session-manager.ts +0 -44
  57. package/src/skills/skill-loader.ts +0 -52
  58. package/src/skills/skill-manager.ts +0 -175
  59. package/src/skills/skill-router.ts +0 -99
  60. package/src/skills/skill-types.ts +0 -26
  61. package/src/subagents/subagent-manager.ts +0 -22
  62. package/src/subagents/task-tool.ts +0 -13
  63. package/tests/integration/agent-loop-basic.spec.ts +0 -56
  64. package/tests/integration/agent-skill-default-from-sandbox.spec.ts +0 -66
  65. package/tests/integration/concurrency-single-turn.spec.ts +0 -35
  66. package/tests/integration/otel-metrics-emission.spec.ts +0 -62
  67. package/tests/integration/otel-trace-propagation.spec.ts +0 -48
  68. package/tests/integration/parity-benchmark.spec.ts +0 -45
  69. package/tests/integration/provider-local-smoke.spec.ts +0 -63
  70. package/tests/integration/session-resume.spec.ts +0 -30
  71. package/tests/integration/skill-install-rollback.spec.ts +0 -64
  72. package/tests/integration/skill-sandbox-file-blob.spec.ts +0 -54
  73. package/tests/integration/skills-progressive-disclosure.spec.ts +0 -61
  74. package/tests/integration/streaming-compaction-boundary.spec.ts +0 -43
  75. package/tests/integration/structured-messages-agent.spec.ts +0 -265
  76. package/tests/integration/subagent-isolation.spec.ts +0 -24
  77. package/tests/security/skill-sandbox-isolation.spec.ts +0 -51
  78. package/tests/unit/create-tools-schema-parity.spec.ts +0 -22
  79. package/tests/unit/extracted-manifest.spec.ts +0 -41
  80. package/tests/unit/interfaces-contract.spec.ts +0 -101
  81. package/tests/unit/structured-messages.spec.ts +0 -176
  82. package/tests/unit/token-tracker.spec.ts +0 -22
  83. package/tsconfig.json +0 -14
  84. package/vitest.config.ts +0 -7
@@ -1,66 +0,0 @@
1
- import { mkdtemp, rm, writeFile } from 'node:fs/promises';
2
- import { tmpdir } from 'node:os';
3
- import { join } from 'node:path';
4
- import { afterEach, describe, expect, it } from 'vitest';
5
-
6
- import { createAgent } from '../../src/agent/create-agent';
7
- import type { SandboxFileBlob, SandboxProvider } from '../../src/interfaces/sandbox-provider';
8
- import { LocalToolProvider } from '../../src/providers/local-tool-provider';
9
-
10
- const tempDirs: string[] = [];
11
-
12
- afterEach(async () => {
13
- await Promise.all(tempDirs.map((dir) => rm(dir, { recursive: true, force: true })));
14
- tempDirs.length = 0;
15
- });
16
-
17
- class NoopSandbox implements SandboxProvider {
18
- async exec() {
19
- return { exitCode: 0, stdout: '', stderr: '' };
20
- }
21
-
22
- async readSandboxFile(): Promise<SandboxFileBlob> {
23
- return { data: new Uint8Array() };
24
- }
25
-
26
- async writeSandboxFile(): Promise<void> {}
27
- }
28
-
29
- describe('agent skill defaults from sandbox provider', () => {
30
- it('auto-enables SkillManager behavior when sandboxProvider is provided', async () => {
31
- const dir = await mkdtemp(join(tmpdir(), 'harness-agent-skill-default-'));
32
- tempDirs.push(dir);
33
-
34
- const provider = new LocalToolProvider(dir);
35
- const skillPath = join(dir, 'sample-skill.md');
36
- const indexPath = join(dir, 'skills-index.json');
37
-
38
- await writeFile(
39
- skillPath,
40
- ['---', 'name: docx-skill', 'description: creates docx files', '---', '', 'Use this skill for docx creation.'].join('\n'),
41
- 'utf8'
42
- );
43
- await writeFile(
44
- indexPath,
45
- JSON.stringify([{ name: 'docx-skill', description: 'creates docx files', path: skillPath }]),
46
- 'utf8'
47
- );
48
-
49
- const agent = createAgent({
50
- toolProvider: provider,
51
- sandboxProvider: new NoopSandbox(),
52
- skillIndexPath: indexPath,
53
- async nextAction(messages) {
54
- return {
55
- type: 'final',
56
- content: messages[0]?.content ?? ''
57
- };
58
- }
59
- });
60
-
61
- const result = await agent.run('please use docx-skill');
62
- expect(result.output).toContain('Skill selected: docx-skill');
63
- expect(result.output).toContain('Use this skill for docx creation.');
64
- });
65
- });
66
-
@@ -1,35 +0,0 @@
1
- import { describe, expect, it } from 'vitest';
2
-
3
- import { SingleFlightStepExecutor } from '../../src/agent/step-executor';
4
-
5
- describe('single-flight step executor', () => {
6
- it('serializes concurrent tasks in submission order', async () => {
7
- const executor = new SingleFlightStepExecutor();
8
- const calls: string[] = [];
9
-
10
- const a = executor.run(async () => {
11
- calls.push('a-start');
12
- await new Promise((resolve) => setTimeout(resolve, 10));
13
- calls.push('a-end');
14
- return 'a';
15
- });
16
-
17
- const b = executor.run(async () => {
18
- calls.push('b-start');
19
- await new Promise((resolve) => setTimeout(resolve, 1));
20
- calls.push('b-end');
21
- return 'b';
22
- });
23
-
24
- const c = executor.run(async () => {
25
- calls.push('c-start');
26
- calls.push('c-end');
27
- return 'c';
28
- });
29
-
30
- const result = await Promise.all([a, b, c]);
31
-
32
- expect(result).toEqual(['a', 'b', 'c']);
33
- expect(calls).toEqual(['a-start', 'a-end', 'b-start', 'b-end', 'c-start', 'c-end']);
34
- });
35
- });
@@ -1,62 +0,0 @@
1
- import { describe, expect, it } from 'vitest';
2
-
3
- import { createAgent } from '../../src/agent/create-agent';
4
- import { HookRunner } from '../../src/hooks/hook-runner';
5
- import { withHooks } from '../../src/hooks/tool-wrappers';
6
- import { HarnessTelemetry } from '../../src/observability/otel';
7
- import { PermissionManager } from '../../src/permissions/permission-manager';
8
- import { LocalToolProvider } from '../../src/providers/local-tool-provider';
9
-
10
- describe('otel metrics emission', () => {
11
- it('emits step and tool metrics when telemetry is enabled', async () => {
12
- const telemetry = new HarnessTelemetry(true);
13
- const provider = withHooks(
14
- new LocalToolProvider(),
15
- new HookRunner(),
16
- new PermissionManager('allow_all'),
17
- telemetry
18
- );
19
-
20
- let turn = 0;
21
- const agent = createAgent({
22
- toolProvider: provider,
23
- telemetry,
24
- async nextAction() {
25
- turn += 1;
26
- if (turn === 1) {
27
- return { type: 'tool', name: 'Bash', args: { command: 'node -e "console.log(7)"' } };
28
- }
29
- return { type: 'final', content: 'done' };
30
- }
31
- });
32
-
33
- await agent.run('metrics please');
34
-
35
- const metrics = telemetry.getMetrics();
36
- expect(metrics.some((m) => m.name === 'agent_steps_total')).toBe(true);
37
- expect(metrics.some((m) => m.name === 'tool_calls_total')).toBe(true);
38
- expect(metrics.some((m) => m.name === 'tool_call_duration_ms')).toBe(true);
39
- });
40
-
41
- it('emits nothing when telemetry is disabled', async () => {
42
- const telemetry = new HarnessTelemetry(false);
43
- const provider = withHooks(
44
- new LocalToolProvider(),
45
- new HookRunner(),
46
- new PermissionManager('allow_all'),
47
- telemetry
48
- );
49
-
50
- const agent = createAgent({
51
- toolProvider: provider,
52
- telemetry,
53
- async nextAction() {
54
- return { type: 'final', content: 'done' };
55
- }
56
- });
57
-
58
- await agent.run('disabled please');
59
- expect(telemetry.getSpans()).toEqual([]);
60
- expect(telemetry.getMetrics()).toEqual([]);
61
- });
62
- });
@@ -1,48 +0,0 @@
1
- import { describe, expect, it } from 'vitest';
2
-
3
- import { createAgent } from '../../src/agent/create-agent';
4
- import { HookRunner } from '../../src/hooks/hook-runner';
5
- import { withHooks } from '../../src/hooks/tool-wrappers';
6
- import { PermissionManager } from '../../src/permissions/permission-manager';
7
- import { LocalToolProvider } from '../../src/providers/local-tool-provider';
8
- import { HarnessTelemetry } from '../../src/observability/otel';
9
-
10
- describe('otel trace propagation', () => {
11
- it('emits parent-child spans for run, step, and tool calls', async () => {
12
- const telemetry = new HarnessTelemetry(true);
13
- const wrappedProvider = withHooks(
14
- new LocalToolProvider(),
15
- new HookRunner(),
16
- new PermissionManager('allow_all'),
17
- telemetry
18
- );
19
-
20
- let turn = 0;
21
- const agent = createAgent({
22
- toolProvider: wrappedProvider,
23
- telemetry,
24
- async nextAction() {
25
- turn += 1;
26
- if (turn === 1) {
27
- return { type: 'tool', name: 'Bash', args: { command: 'node -e "console.log(1)"' } };
28
- }
29
- return { type: 'final', content: 'done' };
30
- }
31
- });
32
-
33
- await agent.run('trace please');
34
-
35
- const spans = telemetry.getSpans();
36
- const run = spans.find((span) => span.name === 'agent.run');
37
- const step = spans.find((span) => span.name === 'agent.step');
38
- const tool = spans.find((span) => span.name === 'tool.call');
39
-
40
- expect(run).toBeDefined();
41
- expect(step).toBeDefined();
42
- expect(tool).toBeDefined();
43
- expect(step?.parentSpanId).toBe(run?.spanId);
44
- expect(tool?.parentSpanId).toBe(run?.spanId);
45
- expect(run?.traceId).toBe(step?.traceId);
46
- expect(step?.traceId).toBe(tool?.traceId);
47
- });
48
- });
@@ -1,45 +0,0 @@
1
- import { describe, expect, it } from 'vitest';
2
-
3
- import { createAgent } from '../../src/agent/create-agent';
4
- import { LocalToolProvider } from '../../src/providers/local-tool-provider';
5
- import { AgentEvaluator } from '../../src/optimization/agent-evaluator';
6
- import { OptimizationRunner } from '../../src/optimization/optimization-runner';
7
-
8
- describe('parity benchmark foundation', () => {
9
- it('evaluates a candidate and produces scored trace output', async () => {
10
- const agent = createAgent({
11
- toolProvider: new LocalToolProvider(),
12
- async nextAction() {
13
- return { type: 'final', content: '42' };
14
- }
15
- });
16
-
17
- const evaluator = new AgentEvaluator((prompt) => agent.run(prompt));
18
- const result = await evaluator.evaluate({ prompt: 'return 42', expectedContains: '42' });
19
-
20
- expect(result.score).toBe(1);
21
- expect(result.trace.events.length).toBe(2);
22
- expect(result.output).toBe('42');
23
- });
24
-
25
- it('selects the best candidate by average score', async () => {
26
- const runner = new OptimizationRunner();
27
-
28
- const result = await runner.run(
29
- [
30
- {
31
- id: 'good',
32
- run: async () => ({ output: '42', steps: 1, messages: [] })
33
- },
34
- {
35
- id: 'bad',
36
- run: async () => ({ output: '0', steps: 1, messages: [] })
37
- }
38
- ],
39
- [{ prompt: 'return 42', expectedContains: '42' }]
40
- );
41
-
42
- expect(result.bestCandidateId).toBe('good');
43
- expect(result.bestScore).toBe(1);
44
- });
45
- });
@@ -1,63 +0,0 @@
1
- import { mkdtemp, readFile as readFsFile, rm } from 'node:fs/promises';
2
- import { tmpdir } from 'node:os';
3
- import { join } from 'node:path';
4
- import { afterEach, describe, expect, it } from 'vitest';
5
-
6
- import { CompositeToolProvider } from '../../src/providers/composite-tool-provider';
7
- import { LocalToolProvider } from '../../src/providers/local-tool-provider';
8
-
9
- const tempDirs: string[] = [];
10
-
11
- afterEach(async () => {
12
- await Promise.all(
13
- tempDirs.map(async (dir) => {
14
- await rm(dir, { recursive: true, force: true });
15
- })
16
- );
17
- tempDirs.length = 0;
18
- });
19
-
20
- describe('local provider smoke', () => {
21
- it('supports write, read, edit, glob, grep, and bash', async () => {
22
- const dir = await mkdtemp(join(tmpdir(), 'harness-local-'));
23
- tempDirs.push(dir);
24
-
25
- const provider = new LocalToolProvider(dir);
26
-
27
- const writeResult = await provider.writeFile('src/app.ts', 'const x = 1;\n');
28
- expect(writeResult.success).toBe(true);
29
-
30
- const readResult = await provider.readFile('src/app.ts');
31
- expect(readResult.output).toContain('const x = 1;');
32
-
33
- const editResult = await provider.editFile('src/app.ts', 'const x = 1;', 'const x = 2;');
34
- expect(editResult.success).toBe(true);
35
-
36
- const globResult = await provider.glob('src/*.ts');
37
- expect(globResult.output).toContain('src/app.ts');
38
-
39
- const grepResult = await provider.grep('x = 2', 'src');
40
- expect(grepResult.output).toContain('src/app.ts:1:const x = 2;');
41
-
42
- const bashResult = await provider.bash('node -e "console.log(42)"');
43
- expect(bashResult.success).toBe(true);
44
- expect(bashResult.output).toContain('42');
45
-
46
- const fileContent = await readFsFile(join(dir, 'src/app.ts'), 'utf8');
47
- expect(fileContent).toContain('const x = 2;');
48
- });
49
-
50
- it('composite provider routes capabilities correctly', async () => {
51
- const dir = await mkdtemp(join(tmpdir(), 'harness-composite-'));
52
- tempDirs.push(dir);
53
-
54
- const local = new LocalToolProvider(dir);
55
- const composite = new CompositeToolProvider([local]);
56
-
57
- expect(composite.capabilities().bash).toBe(true);
58
-
59
- await composite.writeFile('file.txt', 'hello');
60
- const out = await composite.readFile('file.txt');
61
- expect(out.output).toBe('hello');
62
- });
63
- });
@@ -1,30 +0,0 @@
1
- import { mkdtemp, rm } from 'node:fs/promises';
2
- import { tmpdir } from 'node:os';
3
- import { join } from 'node:path';
4
- import { afterEach, describe, expect, it } from 'vitest';
5
-
6
- import { FileSessionStore } from '../../src/sessions/file-session-store';
7
- import { SessionManager } from '../../src/sessions/session-manager';
8
-
9
- const tempDirs: string[] = [];
10
-
11
- afterEach(async () => {
12
- await Promise.all(tempDirs.map((dir) => rm(dir, { recursive: true, force: true })));
13
- tempDirs.length = 0;
14
- });
15
-
16
- describe('session resume', () => {
17
- it('creates, checkpoints, and resumes a session', async () => {
18
- const dir = await mkdtemp(join(tmpdir(), 'harness-session-'));
19
- tempDirs.push(dir);
20
-
21
- const manager = new SessionManager(new FileSessionStore(dir));
22
- const session = await manager.create('build harness');
23
- await manager.appendMessage(session.id, { role: 'assistant', content: 'starting' });
24
-
25
- const resumed = await manager.resume(session.id);
26
- expect(resumed).not.toBeNull();
27
- expect(resumed?.messages.length).toBe(2);
28
- expect(resumed?.messages[1]?.content).toBe('starting');
29
- });
30
- });
@@ -1,64 +0,0 @@
1
- import { mkdtemp, rm, writeFile } from 'node:fs/promises';
2
- import { tmpdir } from 'node:os';
3
- import { join } from 'node:path';
4
- import { afterEach, describe, expect, it } from 'vitest';
5
-
6
- import type { SandboxProvider } from '../../src/interfaces/sandbox-provider';
7
- import { SkillManager } from '../../src/skills/skill-manager';
8
-
9
- const tempDirs: string[] = [];
10
-
11
- afterEach(async () => {
12
- await Promise.all(tempDirs.map((dir) => rm(dir, { recursive: true, force: true })));
13
- tempDirs.length = 0;
14
- });
15
-
16
- class FailingSandbox implements SandboxProvider {
17
- async exec() {
18
- return { exitCode: 1, stdout: '', stderr: 'install failed' };
19
- }
20
-
21
- async readSandboxFile() {
22
- return { data: new Uint8Array() };
23
- }
24
-
25
- async writeSandboxFile() {}
26
- }
27
-
28
- describe('skill install rollback/degraded state', () => {
29
- it('marks skill as degraded on dependency install failure', async () => {
30
- const dir = await mkdtemp(join(tmpdir(), 'harness-skill-install-'));
31
- tempDirs.push(dir);
32
-
33
- const skillPath = join(dir, 'skill.md');
34
- const indexPath = join(dir, 'skills.json');
35
-
36
- await writeFile(
37
- skillPath,
38
- [
39
- '---',
40
- 'name: failing',
41
- 'description: failing skill',
42
- 'python_deps: totally-broken-package',
43
- '---',
44
- '',
45
- 'instructions'
46
- ].join('\n'),
47
- 'utf8'
48
- );
49
- await writeFile(
50
- indexPath,
51
- JSON.stringify([{ name: 'failing', description: 'failing skill', path: skillPath }]),
52
- 'utf8'
53
- );
54
-
55
- const manager = new SkillManager(new FailingSandbox());
56
- await manager.discover(indexPath);
57
-
58
- const invoked = await manager.invoke('failing');
59
- expect(invoked.instructions).toContain('instructions');
60
-
61
- await expect(manager.installDependencies('failing')).rejects.toThrow('install failed');
62
- expect(manager.getInstallState('failing')).toBe('degraded');
63
- });
64
- });
@@ -1,54 +0,0 @@
1
- import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
2
- import { tmpdir } from 'node:os';
3
- import { join } from 'node:path';
4
- import { afterEach, describe, expect, it } from 'vitest';
5
-
6
- import type { SandboxFileBlob, SandboxProvider } from '../../src/interfaces/sandbox-provider';
7
-
8
- const tempDirs: string[] = [];
9
-
10
- afterEach(async () => {
11
- await Promise.all(tempDirs.map((dir) => rm(dir, { recursive: true, force: true })));
12
- tempDirs.length = 0;
13
- });
14
-
15
- class BlobSandbox implements SandboxProvider {
16
- constructor(private readonly root: string) {}
17
-
18
- async exec() {
19
- return { exitCode: 0, stdout: '', stderr: '' };
20
- }
21
-
22
- async readSandboxFile(path: string): Promise<SandboxFileBlob> {
23
- const bytes = await readFile(join(this.root, path));
24
- return {
25
- data: new Uint8Array(bytes),
26
- mimeType: 'application/octet-stream',
27
- filename: path.split('/').pop() ?? path,
28
- };
29
- }
30
-
31
- async writeSandboxFile(path: string, content: SandboxFileBlob): Promise<void> {
32
- await writeFile(join(this.root, path), Buffer.from(content.data));
33
- }
34
- }
35
-
36
- describe('skill sandbox file blob contract', () => {
37
- it('roundtrips binary artifact bytes with mime metadata', async () => {
38
- const dir = await mkdtemp(join(tmpdir(), 'harness-skill-blob-'));
39
- tempDirs.push(dir);
40
-
41
- const sandbox = new BlobSandbox(dir);
42
- const payload = new Uint8Array([0, 255, 10, 20, 30, 40, 50, 60]);
43
- await sandbox.writeSandboxFile('artifact.bin', {
44
- data: payload,
45
- mimeType: 'application/octet-stream',
46
- filename: 'artifact.bin'
47
- });
48
-
49
- const result = await sandbox.readSandboxFile('artifact.bin');
50
- expect(result.mimeType).toBe('application/octet-stream');
51
- expect(result.filename).toBe('artifact.bin');
52
- expect(Array.from(result.data)).toEqual(Array.from(payload));
53
- });
54
- });
@@ -1,61 +0,0 @@
1
- import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises';
2
- import { tmpdir } from 'node:os';
3
- import { join } from 'node:path';
4
- import { afterEach, describe, expect, it } from 'vitest';
5
-
6
- import type { SandboxProvider } from '../../src/interfaces/sandbox-provider';
7
- import { SkillManager } from '../../src/skills/skill-manager';
8
-
9
- const tempDirs: string[] = [];
10
-
11
- afterEach(async () => {
12
- await Promise.all(tempDirs.map((dir) => rm(dir, { recursive: true, force: true })));
13
- tempDirs.length = 0;
14
- });
15
-
16
- class MockSandbox implements SandboxProvider {
17
- async exec() {
18
- return { exitCode: 0, stdout: '', stderr: '' };
19
- }
20
-
21
- async readSandboxFile(path: string) {
22
- const raw = await readFile(path, 'utf8');
23
- return {
24
- data: Buffer.from(raw, 'utf8')
25
- };
26
- }
27
-
28
- async writeSandboxFile(path: string, content: { data: Uint8Array }) {
29
- await writeFile(path, Buffer.from(content.data).toString('utf8'), 'utf8');
30
- }
31
- }
32
-
33
- describe('skill progressive disclosure', () => {
34
- it('keeps summary available and loads instructions only on invoke', async () => {
35
- const dir = await mkdtemp(join(tmpdir(), 'harness-skills-'));
36
- tempDirs.push(dir);
37
-
38
- const skillPath = join(dir, 'skill.md');
39
- const indexPath = join(dir, 'skills.json');
40
-
41
- await writeFile(
42
- skillPath,
43
- ['---', 'name: sample', 'description: sample skill', '---', '', 'full instruction text'].join('\n'),
44
- 'utf8'
45
- );
46
- await writeFile(
47
- indexPath,
48
- JSON.stringify([{ name: 'sample', description: 'sample skill', path: skillPath }]),
49
- 'utf8'
50
- );
51
-
52
- const manager = new SkillManager(new MockSandbox());
53
- const summaries = await manager.discover(indexPath);
54
-
55
- expect(summaries[0]?.name).toBe('sample');
56
- expect(manager.getSkillSummaryForPrompt()[0]?.description).toBe('sample skill');
57
-
58
- const invoked = await manager.invoke('sample');
59
- expect(invoked.instructions).toContain('full instruction text');
60
- });
61
- });
@@ -1,43 +0,0 @@
1
- import { describe, expect, it } from 'vitest';
2
-
3
- import { LLMCompactionStrategy } from '../../src/context/llm-compaction-strategy';
4
- import { prepareStep } from '../../src/context/prepare-step';
5
- import { TokenTracker } from '../../src/context/token-tracker';
6
-
7
- describe('streaming compaction boundary', () => {
8
- it('defers compaction while streaming and compacts before next non-streaming step', async () => {
9
- const tokenTracker = new TokenTracker({ estimator: (text) => text.length });
10
- const strategy = new LLMCompactionStrategy(0.5);
11
-
12
- const messages = [
13
- { role: 'user' as const, content: 'hello there' },
14
- { role: 'assistant' as const, content: 'response one' },
15
- { role: 'user' as const, content: 'please continue with extra detail' }
16
- ];
17
-
18
- const streaming = await prepareStep({
19
- messages,
20
- maxTokens: 10,
21
- isStreaming: true,
22
- state: {},
23
- tokenTracker,
24
- compactionStrategy: strategy
25
- });
26
-
27
- expect(streaming.state.pendingCompaction).toBe(true);
28
- expect(streaming.messages).toEqual(messages);
29
-
30
- const nextStep = await prepareStep({
31
- messages: streaming.messages,
32
- maxTokens: 10,
33
- isStreaming: false,
34
- state: streaming.state,
35
- tokenTracker,
36
- compactionStrategy: strategy
37
- });
38
-
39
- expect(nextStep.state.pendingCompaction).toBe(false);
40
- expect(nextStep.messages.length).toBeLessThan(messages.length + 1);
41
- expect(nextStep.messages[0]?.content).toContain('Compaction summary');
42
- });
43
- });