pi-ui-extend 0.1.38 → 0.1.41
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/app/app.d.ts +0 -1
- package/dist/app/app.js +28 -21
- package/dist/app/constants.js +1 -1
- package/dist/app/input/input-action-controller.d.ts +1 -0
- package/dist/app/input/input-action-controller.js +3 -0
- package/dist/app/input/input-controller.d.ts +1 -0
- package/dist/app/input/input-controller.js +40 -12
- package/dist/app/model/model-usage-status.js +4 -2
- package/dist/app/process.js +11 -0
- package/dist/app/rendering/conversation-tool-renderer.js +4 -6
- package/dist/app/session/request-history.js +2 -0
- package/dist/app/session/session-event-controller.d.ts +13 -0
- package/dist/app/session/session-event-controller.js +27 -0
- package/dist/app/session/tabs-controller.d.ts +8 -0
- package/dist/app/session/tabs-controller.js +37 -6
- package/dist/app/workspace/workspace-actions-controller.d.ts +1 -0
- package/dist/app/workspace/workspace-actions-controller.js +2 -1
- package/dist/bundled-extensions/terminal-bell/index.js +55 -1
- package/dist/config.js +1 -1
- package/dist/default-pix-config.js +1 -1
- package/dist/markdown-format.js +14 -25
- package/dist/terminal-width.d.ts +14 -0
- package/dist/terminal-width.js +31 -2
- package/dist/theme.js +2 -2
- package/external/pi-tools-suite/README.md +34 -9
- package/external/pi-tools-suite/package.json +3 -3
- package/external/pi-tools-suite/src/async-subagents/async-subagents.sample.jsonc +35 -21
- package/external/pi-tools-suite/src/async-subagents/commands.ts +1 -1
- package/external/pi-tools-suite/src/async-subagents/core/agent-strategy.ts +2 -2
- package/external/pi-tools-suite/src/async-subagents/core/config.ts +70 -12
- package/external/pi-tools-suite/src/async-subagents/core/routing.ts +1 -1
- package/external/pi-tools-suite/src/async-subagents/core/spawn.ts +1 -1
- package/external/pi-tools-suite/src/async-subagents/core/types.ts +1 -1
- package/external/pi-tools-suite/src/async-subagents/index.ts +6 -6
- package/external/pi-tools-suite/src/async-subagents/lib.ts +1 -1
- package/external/pi-tools-suite/src/async-subagents/tools/spawn.ts +4 -2
- package/external/pi-tools-suite/src/async-subagents/tools/subagents.ts +2 -2
- package/external/pi-tools-suite/src/{glm-coding-discipline → coding-discipline}/index.ts +17 -8
- package/external/pi-tools-suite/src/config.ts +1 -1
- package/external/pi-tools-suite/src/dcp/auto-compress.ts +368 -0
- package/external/pi-tools-suite/src/dcp/compress-tool.ts +3 -0
- package/external/pi-tools-suite/src/dcp/config.ts +23 -0
- package/external/pi-tools-suite/src/dcp/index.ts +112 -7
- package/external/pi-tools-suite/src/dcp/prompts.ts +8 -0
- package/external/pi-tools-suite/src/dcp/state.ts +41 -0
- package/external/pi-tools-suite/src/default-pi-tools-suite-config.ts +30 -22
- package/external/pi-tools-suite/src/index.ts +2 -1
- package/external/pi-tools-suite/src/session-name/index.ts +37 -0
- package/external/pi-tools-suite/src/tool-descriptions.ts +16 -4
- package/package.json +4 -4
- package/skills/skill-creator/SKILL.md +36 -40
- package/skills/skill-creator/eval-viewer/viewer.html +2 -2
- package/skills/skill-creator/references/schemas.md +1 -1
- package/skills/skill-creator/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-314.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-314.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-314.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/package_skill.cpython-314.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-314.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-314.pyc +0 -0
- package/skills/skill-creator/scripts/__pycache__/utils.cpython-314.pyc +0 -0
- package/skills/skill-creator/scripts/generate_report.py +1 -1
- package/skills/skill-creator/scripts/improve_description.py +14 -24
- package/skills/skill-creator/scripts/run_eval.py +89 -82
|
@@ -1,16 +1,19 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
2
|
"""Run trigger evaluation for a skill description.
|
|
3
3
|
|
|
4
|
-
Tests whether a skill's description causes
|
|
4
|
+
Tests whether a skill's description causes pi to trigger (read the skill)
|
|
5
5
|
for a set of queries. Outputs results as JSON.
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import argparse
|
|
9
9
|
import json
|
|
10
10
|
import os
|
|
11
|
+
import re
|
|
11
12
|
import select
|
|
13
|
+
import shutil
|
|
12
14
|
import subprocess
|
|
13
15
|
import sys
|
|
16
|
+
import tempfile
|
|
14
17
|
import time
|
|
15
18
|
import uuid
|
|
16
19
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
|
@@ -20,82 +23,92 @@ from scripts.utils import parse_skill_md
|
|
|
20
23
|
|
|
21
24
|
|
|
22
25
|
def find_project_root() -> Path:
|
|
23
|
-
"""
|
|
26
|
+
"""Return the working directory pi should run in.
|
|
24
27
|
|
|
25
|
-
|
|
26
|
-
|
|
28
|
+
Unlike Claude Code, pi has no `.claude/` project marker that controls
|
|
29
|
+
skill discovery — skills are loaded explicitly via `--skill` (or from
|
|
30
|
+
pi's own skill locations). We simply use the current directory so the
|
|
31
|
+
agent sees the same relative paths the user would.
|
|
27
32
|
"""
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
+
return Path.cwd()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _safe_skill_name(raw: str, unique_id: str) -> str:
|
|
37
|
+
"""Build a frontmatter-valid skill name (lowercase, hyphens, a-z0-9)."""
|
|
38
|
+
base = re.sub(r"[^a-z0-9]+", "-", (raw or "skill").lower()).strip("-") or "skill"
|
|
39
|
+
return f"{base}-{unique_id}"
|
|
33
40
|
|
|
34
41
|
|
|
35
42
|
def run_single_query(
|
|
36
43
|
query: str,
|
|
37
44
|
skill_name: str,
|
|
38
45
|
skill_description: str,
|
|
46
|
+
skill_body: str,
|
|
39
47
|
timeout: int,
|
|
40
48
|
project_root: str,
|
|
41
49
|
model: str | None = None,
|
|
42
50
|
) -> bool:
|
|
43
51
|
"""Run a single query and return whether the skill was triggered.
|
|
44
52
|
|
|
45
|
-
Creates a
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
53
|
+
Creates a throwaway skill directory whose SKILL.md carries the
|
|
54
|
+
description under test, then runs `pi -p --mode json --skill <dir>`.
|
|
55
|
+
We watch the JSON event stream for a `read` tool call targeting that
|
|
56
|
+
SKILL.md, which is how pi loads a skill once the model decides to use
|
|
57
|
+
it. As soon as we see it, we return True and kill the process so the
|
|
58
|
+
run doesn't keep executing the skill.
|
|
50
59
|
"""
|
|
51
60
|
unique_id = uuid.uuid4().hex[:8]
|
|
52
|
-
clean_name =
|
|
53
|
-
|
|
54
|
-
|
|
61
|
+
clean_name = _safe_skill_name(skill_name, unique_id)
|
|
62
|
+
temp_skill_dir = Path(tempfile.mkdtemp(prefix=f"pi-skill-eval-{unique_id}-"))
|
|
63
|
+
skill_md_path = temp_skill_dir / "SKILL.md"
|
|
55
64
|
|
|
56
65
|
try:
|
|
57
|
-
|
|
58
|
-
#
|
|
66
|
+
# Write a SKILL.md with the description under test. The body is the
|
|
67
|
+
# real skill body so the model behaves naturally if it does read it,
|
|
68
|
+
# but the triggering decision is driven solely by the description.
|
|
59
69
|
indented_desc = "\n ".join(skill_description.split("\n"))
|
|
60
|
-
|
|
70
|
+
skill_md_content = (
|
|
61
71
|
f"---\n"
|
|
72
|
+
f"name: {clean_name}\n"
|
|
62
73
|
f"description: |\n"
|
|
63
74
|
f" {indented_desc}\n"
|
|
64
75
|
f"---\n\n"
|
|
65
|
-
f"
|
|
66
|
-
f"This skill handles: {skill_description}\n"
|
|
76
|
+
f"{skill_body.strip()}\n"
|
|
67
77
|
)
|
|
68
|
-
|
|
78
|
+
skill_md_path.write_text(skill_md_content)
|
|
69
79
|
|
|
70
80
|
cmd = [
|
|
71
|
-
"
|
|
72
|
-
"-p",
|
|
73
|
-
"--
|
|
74
|
-
|
|
75
|
-
|
|
81
|
+
"pi",
|
|
82
|
+
"-p", "--mode", "json",
|
|
83
|
+
"--no-session",
|
|
84
|
+
# Only the skill under test should be available, so its
|
|
85
|
+
# description is what gets evaluated in isolation. Explicit
|
|
86
|
+
# --skill paths still load even with --no-skills.
|
|
87
|
+
"--no-skills",
|
|
88
|
+
"--skill", str(temp_skill_dir),
|
|
89
|
+
query,
|
|
76
90
|
]
|
|
77
91
|
if model:
|
|
78
92
|
cmd.extend(["--model", model])
|
|
79
93
|
|
|
80
|
-
# Remove CLAUDECODE env var to allow nesting claude -p inside a
|
|
81
|
-
# Claude Code session. The guard is for interactive terminal conflicts;
|
|
82
|
-
# programmatic subprocess usage is safe.
|
|
83
|
-
env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
|
|
84
|
-
|
|
85
94
|
process = subprocess.Popen(
|
|
86
95
|
cmd,
|
|
87
96
|
stdout=subprocess.PIPE,
|
|
88
97
|
stderr=subprocess.DEVNULL,
|
|
89
98
|
cwd=project_root,
|
|
90
|
-
env=env,
|
|
91
99
|
)
|
|
92
100
|
|
|
93
101
|
triggered = False
|
|
94
102
|
start_time = time.time()
|
|
95
103
|
buffer = ""
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
104
|
+
|
|
105
|
+
def _targets_skill(path: str) -> bool:
|
|
106
|
+
"""True if a read target points at the temp skill's SKILL.md."""
|
|
107
|
+
if not path:
|
|
108
|
+
return False
|
|
109
|
+
# The temp dir name embeds unique_id, so this is unique per run
|
|
110
|
+
# and survives absolute/relative/tilde variations.
|
|
111
|
+
return unique_id in path or clean_name in path
|
|
99
112
|
|
|
100
113
|
try:
|
|
101
114
|
while time.time() - start_time < timeout:
|
|
@@ -125,66 +138,42 @@ def run_single_query(
|
|
|
125
138
|
except json.JSONDecodeError:
|
|
126
139
|
continue
|
|
127
140
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
if
|
|
134
|
-
|
|
135
|
-
if
|
|
136
|
-
|
|
137
|
-
if
|
|
138
|
-
pending_tool_name = tool_name
|
|
139
|
-
accumulated_json = ""
|
|
140
|
-
else:
|
|
141
|
-
return False
|
|
142
|
-
|
|
143
|
-
elif se_type == "content_block_delta" and pending_tool_name:
|
|
144
|
-
delta = se.get("delta", {})
|
|
145
|
-
if delta.get("type") == "input_json_delta":
|
|
146
|
-
accumulated_json += delta.get("partial_json", "")
|
|
147
|
-
if clean_name in accumulated_json:
|
|
141
|
+
etype = event.get("type")
|
|
142
|
+
|
|
143
|
+
# Fully-formed tool call (fires before execution).
|
|
144
|
+
if etype == "message_update":
|
|
145
|
+
ame = event.get("assistantMessageEvent", {})
|
|
146
|
+
if ame.get("type") == "toolcall_end":
|
|
147
|
+
tool_call = ame.get("toolCall", {})
|
|
148
|
+
if tool_call.get("name") == "read":
|
|
149
|
+
path = (tool_call.get("arguments") or {}).get("path", "")
|
|
150
|
+
if _targets_skill(path):
|
|
148
151
|
return True
|
|
149
152
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
elif
|
|
158
|
-
message = event.get("message", {})
|
|
159
|
-
for content_item in message.get("content", []):
|
|
160
|
-
if content_item.get("type") != "tool_use":
|
|
161
|
-
continue
|
|
162
|
-
tool_name = content_item.get("name", "")
|
|
163
|
-
tool_input = content_item.get("input", {})
|
|
164
|
-
if tool_name == "Skill" and clean_name in tool_input.get("skill", ""):
|
|
165
|
-
triggered = True
|
|
166
|
-
elif tool_name == "Read" and clean_name in tool_input.get("file_path", ""):
|
|
167
|
-
triggered = True
|
|
168
|
-
return triggered
|
|
169
|
-
|
|
170
|
-
elif event.get("type") == "result":
|
|
153
|
+
# Tool actually started executing — redundant but robust.
|
|
154
|
+
elif etype == "tool_execution_start":
|
|
155
|
+
if event.get("toolName") == "read":
|
|
156
|
+
path = (event.get("args") or {}).get("path", "")
|
|
157
|
+
if _targets_skill(path):
|
|
158
|
+
return True
|
|
159
|
+
|
|
160
|
+
elif etype == "agent_end":
|
|
171
161
|
return triggered
|
|
172
162
|
finally:
|
|
173
|
-
# Clean up process on any exit path (return, exception, timeout)
|
|
174
163
|
if process.poll() is None:
|
|
175
164
|
process.kill()
|
|
176
165
|
process.wait()
|
|
177
166
|
|
|
178
167
|
return triggered
|
|
179
168
|
finally:
|
|
180
|
-
|
|
181
|
-
command_file.unlink()
|
|
169
|
+
shutil.rmtree(temp_skill_dir, ignore_errors=True)
|
|
182
170
|
|
|
183
171
|
|
|
184
172
|
def run_eval(
|
|
185
173
|
eval_set: list[dict],
|
|
186
174
|
skill_name: str,
|
|
187
175
|
description: str,
|
|
176
|
+
skill_body: str,
|
|
188
177
|
num_workers: int,
|
|
189
178
|
timeout: int,
|
|
190
179
|
project_root: Path,
|
|
@@ -204,6 +193,7 @@ def run_eval(
|
|
|
204
193
|
item["query"],
|
|
205
194
|
skill_name,
|
|
206
195
|
description,
|
|
196
|
+
skill_body,
|
|
207
197
|
timeout,
|
|
208
198
|
str(project_root),
|
|
209
199
|
model,
|
|
@@ -256,6 +246,21 @@ def run_eval(
|
|
|
256
246
|
}
|
|
257
247
|
|
|
258
248
|
|
|
249
|
+
def extract_skill_body(skill_path: Path, full_content: str) -> str:
|
|
250
|
+
"""Return the SKILL.md body (everything after the frontmatter)."""
|
|
251
|
+
lines = full_content.split("\n")
|
|
252
|
+
if not lines or lines[0].strip() != "---":
|
|
253
|
+
return full_content
|
|
254
|
+
end_idx = None
|
|
255
|
+
for i, line in enumerate(lines[1:], start=1):
|
|
256
|
+
if line.strip() == "---":
|
|
257
|
+
end_idx = i
|
|
258
|
+
break
|
|
259
|
+
if end_idx is None:
|
|
260
|
+
return full_content
|
|
261
|
+
return "\n".join(lines[end_idx + 1:])
|
|
262
|
+
|
|
263
|
+
|
|
259
264
|
def main():
|
|
260
265
|
parser = argparse.ArgumentParser(description="Run trigger evaluation for a skill description")
|
|
261
266
|
parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
|
|
@@ -265,7 +270,7 @@ def main():
|
|
|
265
270
|
parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
|
|
266
271
|
parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
|
|
267
272
|
parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
|
|
268
|
-
parser.add_argument("--model", default=None, help="Model to use for
|
|
273
|
+
parser.add_argument("--model", default=None, help="Model to use for pi -p (default: user's configured model)")
|
|
269
274
|
parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
|
|
270
275
|
args = parser.parse_args()
|
|
271
276
|
|
|
@@ -278,6 +283,7 @@ def main():
|
|
|
278
283
|
|
|
279
284
|
name, original_description, content = parse_skill_md(skill_path)
|
|
280
285
|
description = args.description or original_description
|
|
286
|
+
skill_body = extract_skill_body(skill_path, content)
|
|
281
287
|
project_root = find_project_root()
|
|
282
288
|
|
|
283
289
|
if args.verbose:
|
|
@@ -287,6 +293,7 @@ def main():
|
|
|
287
293
|
eval_set=eval_set,
|
|
288
294
|
skill_name=name,
|
|
289
295
|
description=description,
|
|
296
|
+
skill_body=skill_body,
|
|
290
297
|
num_workers=args.num_workers,
|
|
291
298
|
timeout=args.timeout,
|
|
292
299
|
project_root=project_root,
|