@poncho-ai/harness 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import { mkdir, mkdtemp, writeFile } from "node:fs/promises";
2
+ import { createServer } from "node:http";
2
3
  import { tmpdir } from "node:os";
3
4
  import { join } from "node:path";
4
5
  import { describe, expect, it, vi } from "vitest";
@@ -219,7 +220,9 @@ model:
219
220
  `---
220
221
  name: summarize
221
222
  description: Summarize long text into concise output
222
- allowed-tools: summarize_text
223
+ tools:
224
+ mcp:
225
+ - linear/list_issues
223
226
  ---
224
227
 
225
228
  # Summarize Skill
@@ -413,6 +416,63 @@ description: Safe skill
413
416
  });
414
417
  });
415
418
 
419
+ it("enforces scripts denylist policy from config", async () => {
420
+ const dir = await mkdtemp(join(tmpdir(), "poncho-harness-script-policy-"));
421
+ await writeFile(
422
+ join(dir, "AGENT.md"),
423
+ `---
424
+ name: script-policy-agent
425
+ model:
426
+ provider: anthropic
427
+ name: claude-opus-4-5
428
+ ---
429
+
430
+ # Script Policy Agent
431
+ `,
432
+ "utf8",
433
+ );
434
+ await writeFile(
435
+ join(dir, "poncho.config.js"),
436
+ `export default {
437
+ scripts: {
438
+ mode: "denylist",
439
+ exclude: ["math/scripts/add.ts"]
440
+ }
441
+ };
442
+ `,
443
+ "utf8",
444
+ );
445
+ await mkdir(join(dir, "skills", "math", "scripts"), { recursive: true });
446
+ await writeFile(
447
+ join(dir, "skills", "math", "SKILL.md"),
448
+ `---
449
+ name: math
450
+ description: Math scripts
451
+ ---
452
+
453
+ # Math
454
+ `,
455
+ "utf8",
456
+ );
457
+ await writeFile(
458
+ join(dir, "skills", "math", "scripts", "add.ts"),
459
+ "export default async function run() { return { ok: true }; }\n",
460
+ "utf8",
461
+ );
462
+ const harness = new AgentHarness({ workingDir: dir });
463
+ await harness.initialize();
464
+ const listScripts = harness.listTools().find((tool) => tool.name === "list_skill_scripts");
465
+ const runScript = harness.listTools().find((tool) => tool.name === "run_skill_script");
466
+ expect(listScripts).toBeDefined();
467
+ expect(runScript).toBeDefined();
468
+ const listed = await listScripts!.handler({ skill: "math" });
469
+ expect(listed).toEqual({ skill: "math", scripts: [] });
470
+ const result = await runScript!.handler({ skill: "math", script: "add.ts" });
471
+ expect(result).toMatchObject({
472
+ error: expect.stringContaining("is not allowed by policy"),
473
+ });
474
+ });
475
+
416
476
  it("injects local authoring guidance only in development environment", async () => {
417
477
  const dir = await mkdtemp(join(tmpdir(), "poncho-harness-dev-guidance-"));
418
478
  await writeFile(
@@ -677,7 +737,7 @@ model:
677
737
  `---
678
738
  name: summarize
679
739
  description: Summarize text
680
- allowed-tools: summarize_text read_file
740
+ allowed-tools: linear/list_issues linear/get_issue
681
741
  ---
682
742
 
683
743
  # Summarize
@@ -688,11 +748,12 @@ allowed-tools: summarize_text read_file
688
748
  const metadata = await loadSkillMetadata(dir);
689
749
  expect(metadata).toHaveLength(1);
690
750
  expect(metadata[0]?.name).toBe("summarize");
691
- expect(metadata[0]?.tools).toEqual(["summarize_text", "read_file"]);
751
+ expect(metadata[0]?.tools.mcp).toEqual(["linear/list_issues", "linear/get_issue"]);
752
+ expect(metadata[0]?.tools.scripts).toEqual([]);
692
753
  });
693
754
 
694
- it("keeps backward compatibility with legacy tools list frontmatter", async () => {
695
- const dir = await mkdtemp(join(tmpdir(), "poncho-harness-legacy-tools-"));
755
+ it("fails when SKILL.md includes invalid non-slash tool patterns", async () => {
756
+ const dir = await mkdtemp(join(tmpdir(), "poncho-harness-invalid-tools-"));
696
757
  await mkdir(join(dir, "skills", "legacy"), { recursive: true });
697
758
  await writeFile(
698
759
  join(dir, "skills", "legacy", "SKILL.md"),
@@ -700,7 +761,8 @@ allowed-tools: summarize_text read_file
700
761
  name: legacy
701
762
  description: Legacy skill
702
763
  tools:
703
- - legacy_tool
764
+ mcp:
765
+ - legacy_tool
704
766
  ---
705
767
 
706
768
  # Legacy
@@ -708,9 +770,374 @@ tools:
708
770
  "utf8",
709
771
  );
710
772
 
711
- const metadata = await loadSkillMetadata(dir);
712
- expect(metadata).toHaveLength(1);
713
- expect(metadata[0]?.name).toBe("legacy");
714
- expect(metadata[0]?.tools).toEqual(["legacy_tool"]);
773
+ await expect(loadSkillMetadata(dir)).rejects.toThrow(
774
+ /Invalid MCP tool pattern/,
775
+ );
776
+ });
777
+
778
+ it("registers MCP tools dynamically for stacked active skills and supports deactivation", async () => {
779
+ process.env.LINEAR_TOKEN = "token-123";
780
+ const mcpServer = createServer(async (req, res) => {
781
+ if (req.method === "DELETE") {
782
+ res.statusCode = 200;
783
+ res.end();
784
+ return;
785
+ }
786
+ const chunks: Buffer[] = [];
787
+ for await (const chunk of req) chunks.push(Buffer.from(chunk));
788
+ const body = Buffer.concat(chunks).toString("utf8");
789
+ const payload = body.trim().length > 0 ? (JSON.parse(body) as any) : {};
790
+ if (payload.method === "initialize") {
791
+ res.setHeader("Content-Type", "application/json");
792
+ res.setHeader("Mcp-Session-Id", "sess");
793
+ res.end(
794
+ JSON.stringify({
795
+ jsonrpc: "2.0",
796
+ id: payload.id,
797
+ result: {
798
+ protocolVersion: "2025-03-26",
799
+ capabilities: { tools: { listChanged: true } },
800
+ serverInfo: { name: "remote", version: "1.0.0" },
801
+ },
802
+ }),
803
+ );
804
+ return;
805
+ }
806
+ if (payload.method === "notifications/initialized") {
807
+ res.statusCode = 202;
808
+ res.end();
809
+ return;
810
+ }
811
+ if (payload.method === "tools/list") {
812
+ res.setHeader("Content-Type", "application/json");
813
+ res.end(
814
+ JSON.stringify({
815
+ jsonrpc: "2.0",
816
+ id: payload.id,
817
+ result: {
818
+ tools: [
819
+ { name: "a", inputSchema: { type: "object", properties: {} } },
820
+ { name: "b", inputSchema: { type: "object", properties: {} } },
821
+ ],
822
+ },
823
+ }),
824
+ );
825
+ return;
826
+ }
827
+ if (payload.method === "tools/call") {
828
+ res.setHeader("Content-Type", "application/json");
829
+ res.end(
830
+ JSON.stringify({
831
+ jsonrpc: "2.0",
832
+ id: payload.id,
833
+ result: { result: { ok: true } },
834
+ }),
835
+ );
836
+ return;
837
+ }
838
+ res.statusCode = 404;
839
+ res.end();
840
+ });
841
+ await new Promise<void>((resolveOpen) => mcpServer.listen(0, () => resolveOpen()));
842
+ const address = mcpServer.address();
843
+ if (!address || typeof address === "string") throw new Error("Unexpected address");
844
+ const dir = await mkdtemp(join(tmpdir(), "poncho-harness-stacked-activation-"));
845
+ await writeFile(
846
+ join(dir, "AGENT.md"),
847
+ `---
848
+ name: stacked-agent
849
+ model:
850
+ provider: anthropic
851
+ name: claude-opus-4-5
852
+ ---
853
+
854
+ # Stacked Agent
855
+ `,
856
+ "utf8",
857
+ );
858
+ await writeFile(
859
+ join(dir, "poncho.config.js"),
860
+ `export default {
861
+ mcp: [
862
+ {
863
+ name: "remote",
864
+ url: "http://127.0.0.1:${address.port}/mcp",
865
+ auth: { type: "bearer", tokenEnv: "LINEAR_TOKEN" },
866
+ tools: { mode: "allowlist", include: ["remote/*"] }
867
+ }
868
+ ]
869
+ };
870
+ `,
871
+ "utf8",
872
+ );
873
+ await mkdir(join(dir, "skills", "skill-a"), { recursive: true });
874
+ await mkdir(join(dir, "skills", "skill-b"), { recursive: true });
875
+ await writeFile(
876
+ join(dir, "skills", "skill-a", "SKILL.md"),
877
+ `---
878
+ name: skill-a
879
+ description: A
880
+ tools:
881
+ mcp:
882
+ - remote/a
883
+ ---
884
+ # A
885
+ `,
886
+ "utf8",
887
+ );
888
+ await writeFile(
889
+ join(dir, "skills", "skill-b", "SKILL.md"),
890
+ `---
891
+ name: skill-b
892
+ description: B
893
+ tools:
894
+ mcp:
895
+ - remote/b
896
+ ---
897
+ # B
898
+ `,
899
+ "utf8",
900
+ );
901
+ const harness = new AgentHarness({ workingDir: dir });
902
+ await harness.initialize();
903
+ expect(harness.listTools().map((tool) => tool.name)).not.toContain("remote/a");
904
+ expect(harness.listTools().map((tool) => tool.name)).not.toContain("remote/b");
905
+ const activate = harness.listTools().find((tool) => tool.name === "activate_skill");
906
+ const deactivate = harness.listTools().find((tool) => tool.name === "deactivate_skill");
907
+ expect(activate).toBeDefined();
908
+ expect(deactivate).toBeDefined();
909
+ await activate!.handler({ name: "skill-a" }, {} as any);
910
+ expect(harness.listTools().map((tool) => tool.name)).toContain("remote/a");
911
+ expect(harness.listTools().map((tool) => tool.name)).not.toContain("remote/b");
912
+ await activate!.handler({ name: "skill-b" }, {} as any);
913
+ const afterStack = harness.listTools().map((tool) => tool.name);
914
+ expect(afterStack).toContain("remote/a");
915
+ expect(afterStack).toContain("remote/b");
916
+ await deactivate!.handler({ name: "skill-a" }, {} as any);
917
+ const afterDeactivate = harness.listTools().map((tool) => tool.name);
918
+ expect(afterDeactivate).not.toContain("remote/a");
919
+ expect(afterDeactivate).toContain("remote/b");
920
+ await harness.shutdown();
921
+ await new Promise<void>((resolveClose) => mcpServer.close(() => resolveClose()));
922
+ });
923
+
924
+ it("allows in-flight MCP calls to finish after skill deactivation", async () => {
925
+ process.env.LINEAR_TOKEN = "token-123";
926
+ const mcpServer = createServer(async (req, res) => {
927
+ if (req.method === "DELETE") {
928
+ res.statusCode = 200;
929
+ res.end();
930
+ return;
931
+ }
932
+ const chunks: Buffer[] = [];
933
+ for await (const chunk of req) chunks.push(Buffer.from(chunk));
934
+ const body = Buffer.concat(chunks).toString("utf8");
935
+ const payload = body.trim().length > 0 ? (JSON.parse(body) as any) : {};
936
+ if (payload.method === "initialize") {
937
+ res.setHeader("Content-Type", "application/json");
938
+ res.setHeader("Mcp-Session-Id", "sess");
939
+ res.end(
940
+ JSON.stringify({
941
+ jsonrpc: "2.0",
942
+ id: payload.id,
943
+ result: {
944
+ protocolVersion: "2025-03-26",
945
+ capabilities: { tools: { listChanged: true } },
946
+ serverInfo: { name: "remote", version: "1.0.0" },
947
+ },
948
+ }),
949
+ );
950
+ return;
951
+ }
952
+ if (payload.method === "notifications/initialized") {
953
+ res.statusCode = 202;
954
+ res.end();
955
+ return;
956
+ }
957
+ if (payload.method === "tools/list") {
958
+ res.setHeader("Content-Type", "application/json");
959
+ res.end(
960
+ JSON.stringify({
961
+ jsonrpc: "2.0",
962
+ id: payload.id,
963
+ result: { tools: [{ name: "slow", inputSchema: { type: "object", properties: {} } }] },
964
+ }),
965
+ );
966
+ return;
967
+ }
968
+ if (payload.method === "tools/call") {
969
+ await new Promise((resolveTimer) => setTimeout(resolveTimer, 25));
970
+ res.setHeader("Content-Type", "application/json");
971
+ res.end(JSON.stringify({ jsonrpc: "2.0", id: payload.id, result: { result: { done: true } } }));
972
+ return;
973
+ }
974
+ res.statusCode = 404;
975
+ res.end();
976
+ });
977
+ await new Promise<void>((resolveOpen) => mcpServer.listen(0, () => resolveOpen()));
978
+ const address = mcpServer.address();
979
+ if (!address || typeof address === "string") throw new Error("Unexpected address");
980
+ const dir = await mkdtemp(join(tmpdir(), "poncho-harness-inflight-"));
981
+ await writeFile(
982
+ join(dir, "AGENT.md"),
983
+ `---
984
+ name: inflight-agent
985
+ model:
986
+ provider: anthropic
987
+ name: claude-opus-4-5
988
+ ---
989
+
990
+ # Inflight
991
+ `,
992
+ "utf8",
993
+ );
994
+ await writeFile(
995
+ join(dir, "poncho.config.js"),
996
+ `export default {
997
+ mcp: [
998
+ {
999
+ name: "remote",
1000
+ url: "http://127.0.0.1:${address.port}/mcp",
1001
+ auth: { type: "bearer", tokenEnv: "LINEAR_TOKEN" },
1002
+ tools: { mode: "allowlist", include: ["remote/*"] }
1003
+ }
1004
+ ]
1005
+ };
1006
+ `,
1007
+ "utf8",
1008
+ );
1009
+ await mkdir(join(dir, "skills", "skill-slow"), { recursive: true });
1010
+ await writeFile(
1011
+ join(dir, "skills", "skill-slow", "SKILL.md"),
1012
+ `---
1013
+ name: skill-slow
1014
+ description: Slow
1015
+ tools:
1016
+ mcp:
1017
+ - remote/slow
1018
+ ---
1019
+ # Slow
1020
+ `,
1021
+ "utf8",
1022
+ );
1023
+ const harness = new AgentHarness({ workingDir: dir });
1024
+ await harness.initialize();
1025
+ const activate = harness.listTools().find((tool) => tool.name === "activate_skill");
1026
+ const deactivate = harness.listTools().find((tool) => tool.name === "deactivate_skill");
1027
+ await activate!.handler({ name: "skill-slow" }, {} as any);
1028
+ const slowTool = harness.listTools().find((tool) => tool.name === "remote/slow");
1029
+ expect(slowTool).toBeDefined();
1030
+ const inFlight = slowTool!.handler({}, {} as any);
1031
+ await deactivate!.handler({ name: "skill-slow" }, {} as any);
1032
+ const output = await inFlight;
1033
+ expect(output).toEqual({ done: true });
1034
+ await harness.shutdown();
1035
+ await new Promise<void>((resolveClose) => mcpServer.close(() => resolveClose()));
1036
+ });
1037
+
1038
+ it("sanitizes tool names sent to model providers when MCP tools include slashes", async () => {
1039
+ process.env.LINEAR_TOKEN = "token-123";
1040
+ const mcpServer = createServer(async (req, res) => {
1041
+ if (req.method === "DELETE") {
1042
+ res.statusCode = 200;
1043
+ res.end();
1044
+ return;
1045
+ }
1046
+ const chunks: Buffer[] = [];
1047
+ for await (const chunk of req) chunks.push(Buffer.from(chunk));
1048
+ const body = Buffer.concat(chunks).toString("utf8");
1049
+ const payload = body.trim().length > 0 ? (JSON.parse(body) as any) : {};
1050
+ if (payload.method === "initialize") {
1051
+ res.setHeader("Content-Type", "application/json");
1052
+ res.setHeader("Mcp-Session-Id", "sess");
1053
+ res.end(
1054
+ JSON.stringify({
1055
+ jsonrpc: "2.0",
1056
+ id: payload.id,
1057
+ result: {
1058
+ protocolVersion: "2025-03-26",
1059
+ capabilities: { tools: { listChanged: true } },
1060
+ serverInfo: { name: "linear", version: "1.0.0" },
1061
+ },
1062
+ }),
1063
+ );
1064
+ return;
1065
+ }
1066
+ if (payload.method === "notifications/initialized") {
1067
+ res.statusCode = 202;
1068
+ res.end();
1069
+ return;
1070
+ }
1071
+ if (payload.method === "tools/list") {
1072
+ res.setHeader("Content-Type", "application/json");
1073
+ res.end(
1074
+ JSON.stringify({
1075
+ jsonrpc: "2.0",
1076
+ id: payload.id,
1077
+ result: {
1078
+ tools: [{ name: "list_issues", inputSchema: { type: "object", properties: {} } }],
1079
+ },
1080
+ }),
1081
+ );
1082
+ return;
1083
+ }
1084
+ res.setHeader("Content-Type", "application/json");
1085
+ res.end(JSON.stringify({ jsonrpc: "2.0", id: payload.id, result: {} }));
1086
+ });
1087
+ await new Promise<void>((resolveOpen) => mcpServer.listen(0, () => resolveOpen()));
1088
+ const address = mcpServer.address();
1089
+ if (!address || typeof address === "string") throw new Error("Unexpected address");
1090
+ const dir = await mkdtemp(join(tmpdir(), "poncho-harness-tool-name-sanitize-"));
1091
+ await writeFile(
1092
+ join(dir, "AGENT.md"),
1093
+ `---
1094
+ name: sanitize-agent
1095
+ model:
1096
+ provider: anthropic
1097
+ name: claude-opus-4-5
1098
+ tools:
1099
+ mcp:
1100
+ - linear/*
1101
+ ---
1102
+
1103
+ # Sanitize
1104
+ `,
1105
+ "utf8",
1106
+ );
1107
+ await writeFile(
1108
+ join(dir, "poncho.config.js"),
1109
+ `export default {
1110
+ mcp: [
1111
+ {
1112
+ name: "linear",
1113
+ url: "http://127.0.0.1:${address.port}/mcp",
1114
+ auth: { type: "bearer", tokenEnv: "LINEAR_TOKEN" },
1115
+ tools: { mode: "allowlist", include: ["linear/*"] }
1116
+ }
1117
+ ]
1118
+ };
1119
+ `,
1120
+ "utf8",
1121
+ );
1122
+ const harness = new AgentHarness({ workingDir: dir });
1123
+ await harness.initialize();
1124
+ const mockedGenerate = vi.fn().mockResolvedValueOnce({
1125
+ text: "done",
1126
+ toolCalls: [],
1127
+ usage: { input: 5, output: 5 },
1128
+ rawContent: [],
1129
+ });
1130
+ (harness as unknown as { modelClient: { generate: unknown } }).modelClient = {
1131
+ generate: mockedGenerate,
1132
+ };
1133
+ for await (const _event of harness.run({ task: "hello" })) {
1134
+ // consume events
1135
+ }
1136
+ const firstCall = mockedGenerate.mock.calls[0]?.[0] as
1137
+ | { tools?: Array<{ name: string }> }
1138
+ | undefined;
1139
+ expect(firstCall?.tools?.some((tool) => tool.name.includes("/"))).toBe(false);
1140
+ await harness.shutdown();
1141
+ await new Promise<void>((resolveClose) => mcpServer.close(() => resolveClose()));
715
1142
  });
716
1143
  });