pi-ui-extend 0.1.39 → 0.1.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/dist/app/app.js +6 -0
  2. package/dist/app/cli/update.js +17 -7
  3. package/dist/app/constants.js +1 -1
  4. package/dist/app/input/input-action-controller.d.ts +1 -0
  5. package/dist/app/input/input-action-controller.js +3 -0
  6. package/dist/app/process.js +11 -0
  7. package/dist/app/rendering/conversation-tool-renderer.js +4 -6
  8. package/dist/bundled-extensions/terminal-bell/index.js +54 -0
  9. package/dist/config.js +1 -1
  10. package/dist/default-pix-config.js +1 -1
  11. package/external/pi-tools-suite/README.md +1 -1
  12. package/external/pi-tools-suite/package.json +3 -3
  13. package/external/pi-tools-suite/src/coding-discipline/index.ts +228 -68
  14. package/package.json +5 -5
  15. package/skills/skill-creator/SKILL.md +44 -41
  16. package/skills/skill-creator/eval-viewer/viewer.html +2 -2
  17. package/skills/skill-creator/references/schemas.md +1 -1
  18. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  19. package/skills/skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-314.pyc +0 -0
  20. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-314.pyc +0 -0
  21. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-314.pyc +0 -0
  22. package/skills/skill-creator/scripts/__pycache__/package_skill.cpython-314.pyc +0 -0
  23. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-314.pyc +0 -0
  24. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-314.pyc +0 -0
  25. package/skills/skill-creator/scripts/__pycache__/utils.cpython-314.pyc +0 -0
  26. package/skills/skill-creator/scripts/generate_report.py +1 -1
  27. package/skills/skill-creator/scripts/improve_description.py +14 -24
  28. package/skills/skill-creator/scripts/run_eval.py +93 -82
  29. package/skills/skill-creator/scripts/run_loop.py +1 -0
@@ -1,16 +1,19 @@
1
1
  #!/usr/bin/env python3
2
2
  """Run trigger evaluation for a skill description.
3
3
 
4
- Tests whether a skill's description causes Claude to trigger (read the skill)
4
+ Tests whether a skill's description causes pi to trigger (read the skill)
5
5
  for a set of queries. Outputs results as JSON.
6
6
  """
7
7
 
8
8
  import argparse
9
9
  import json
10
10
  import os
11
+ import re
11
12
  import select
13
+ import shutil
12
14
  import subprocess
13
15
  import sys
16
+ import tempfile
14
17
  import time
15
18
  import uuid
16
19
  from concurrent.futures import ProcessPoolExecutor, as_completed
@@ -20,82 +23,92 @@ from scripts.utils import parse_skill_md
20
23
 
21
24
 
22
25
  def find_project_root() -> Path:
23
- """Find the project root by walking up from cwd looking for .claude/.
26
+ """Return the working directory pi should run in.
24
27
 
25
- Mimics how Claude Code discovers its project root, so the command file
26
- we create ends up where claude -p will look for it.
28
+ Unlike Claude Code, pi has no `.claude/` project marker that controls
29
+ skill discovery skills are loaded explicitly via `--skill` (or from
30
+ pi's own skill locations). We simply use the current directory so the
31
+ agent sees the same relative paths the user would.
27
32
  """
28
- current = Path.cwd()
29
- for parent in [current, *current.parents]:
30
- if (parent / ".claude").is_dir():
31
- return parent
32
- return current
33
+ return Path.cwd()
34
+
35
+
36
+ def _safe_skill_name(raw: str, unique_id: str) -> str:
37
+ """Build a frontmatter-valid skill name (lowercase, hyphens, a-z0-9)."""
38
+ base = re.sub(r"[^a-z0-9]+", "-", (raw or "skill").lower()).strip("-") or "skill"
39
+ return f"{base}-{unique_id}"
33
40
 
34
41
 
35
42
  def run_single_query(
36
43
  query: str,
37
44
  skill_name: str,
38
45
  skill_description: str,
46
+ skill_body: str,
39
47
  timeout: int,
40
48
  project_root: str,
41
49
  model: str | None = None,
42
50
  ) -> bool:
43
51
  """Run a single query and return whether the skill was triggered.
44
52
 
45
- Creates a command file in .claude/commands/ so it appears in Claude's
46
- available_skills list, then runs `claude -p` with the raw query.
47
- Uses --include-partial-messages to detect triggering early from
48
- stream events (content_block_start) rather than waiting for the
49
- full assistant message, which only arrives after tool execution.
53
+ Creates a throwaway skill directory whose SKILL.md carries the
54
+ description under test, then runs `pi -p --mode json --skill <dir>`.
55
+ We watch the JSON event stream for a `read` tool call targeting that
56
+ SKILL.md, which is how pi loads a skill once the model decides to use
57
+ it. As soon as we see it, we return True and kill the process so the
58
+ run doesn't keep executing the skill.
50
59
  """
51
60
  unique_id = uuid.uuid4().hex[:8]
52
- clean_name = f"{skill_name}-skill-{unique_id}"
53
- project_commands_dir = Path(project_root) / ".claude" / "commands"
54
- command_file = project_commands_dir / f"{clean_name}.md"
61
+ clean_name = _safe_skill_name(skill_name, unique_id)
62
+ temp_skill_dir = Path(tempfile.mkdtemp(prefix=f"pi-skill-eval-{unique_id}-"))
63
+ skill_md_path = temp_skill_dir / "SKILL.md"
55
64
 
56
65
  try:
57
- project_commands_dir.mkdir(parents=True, exist_ok=True)
58
- # Use YAML block scalar to avoid breaking on quotes in description
66
+ # Write a SKILL.md with the description under test. The body is the
67
+ # real skill body so the model behaves naturally if it does read it,
68
+ # but the triggering decision is driven solely by the description.
59
69
  indented_desc = "\n ".join(skill_description.split("\n"))
60
- command_content = (
70
+ skill_md_content = (
61
71
  f"---\n"
72
+ f"name: {clean_name}\n"
62
73
  f"description: |\n"
63
74
  f" {indented_desc}\n"
64
75
  f"---\n\n"
65
- f"# {skill_name}\n\n"
66
- f"This skill handles: {skill_description}\n"
76
+ f"{skill_body.strip()}\n"
67
77
  )
68
- command_file.write_text(command_content)
78
+ skill_md_path.write_text(skill_md_content)
69
79
 
70
80
  cmd = [
71
- "claude",
72
- "-p", query,
73
- "--output-format", "stream-json",
74
- "--verbose",
75
- "--include-partial-messages",
81
+ "pi",
82
+ "-p", "--mode", "json",
83
+ "--no-session",
84
+ # Only the skill under test should be available, so its
85
+ # description is what gets evaluated in isolation. Explicit
86
+ # --skill paths still load even with --no-skills.
87
+ "--no-skills",
88
+ "--skill", str(temp_skill_dir),
89
+ query,
76
90
  ]
77
91
  if model:
78
92
  cmd.extend(["--model", model])
79
93
 
80
- # Remove CLAUDECODE env var to allow nesting claude -p inside a
81
- # Claude Code session. The guard is for interactive terminal conflicts;
82
- # programmatic subprocess usage is safe.
83
- env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
84
-
85
94
  process = subprocess.Popen(
86
95
  cmd,
87
96
  stdout=subprocess.PIPE,
88
97
  stderr=subprocess.DEVNULL,
89
98
  cwd=project_root,
90
- env=env,
91
99
  )
92
100
 
93
101
  triggered = False
94
102
  start_time = time.time()
95
103
  buffer = ""
96
- # Track state for stream event detection
97
- pending_tool_name = None
98
- accumulated_json = ""
104
+
105
+ def _targets_skill(path: str) -> bool:
106
+ """True if a read target points at the temp skill's SKILL.md."""
107
+ if not path:
108
+ return False
109
+ # The temp dir name embeds unique_id, so this is unique per run
110
+ # and survives absolute/relative/tilde variations.
111
+ return unique_id in path or clean_name in path
99
112
 
100
113
  try:
101
114
  while time.time() - start_time < timeout:
@@ -125,66 +138,46 @@ def run_single_query(
125
138
  except json.JSONDecodeError:
126
139
  continue
127
140
 
128
- # Early detection via stream events
129
- if event.get("type") == "stream_event":
130
- se = event.get("event", {})
131
- se_type = se.get("type", "")
132
-
133
- if se_type == "content_block_start":
134
- cb = se.get("content_block", {})
135
- if cb.get("type") == "tool_use":
136
- tool_name = cb.get("name", "")
137
- if tool_name in ("Skill", "Read"):
138
- pending_tool_name = tool_name
139
- accumulated_json = ""
140
- else:
141
- return False
142
-
143
- elif se_type == "content_block_delta" and pending_tool_name:
144
- delta = se.get("delta", {})
145
- if delta.get("type") == "input_json_delta":
146
- accumulated_json += delta.get("partial_json", "")
147
- if clean_name in accumulated_json:
141
+ etype = event.get("type")
142
+
143
+ # Fully-formed tool call (fires before execution).
144
+ if etype == "message_update":
145
+ ame = event.get("assistantMessageEvent", {})
146
+ if ame.get("type") == "toolcall_end":
147
+ tool_call = ame.get("toolCall", {})
148
+ # Tool name arrives capitalized ("Read"), not lowercase.
149
+ if (tool_call.get("name") or "").lower() == "read":
150
+ args = tool_call.get("arguments") or {}
151
+ # Read tool's real arg is `file_path`.
152
+ path = args.get("file_path", "") or args.get("path", "")
153
+ if _targets_skill(path):
148
154
  return True
149
155
 
150
- elif se_type in ("content_block_stop", "message_stop"):
151
- if pending_tool_name:
152
- return clean_name in accumulated_json
153
- if se_type == "message_stop":
154
- return False
155
-
156
- # Fallback: full assistant message
157
- elif event.get("type") == "assistant":
158
- message = event.get("message", {})
159
- for content_item in message.get("content", []):
160
- if content_item.get("type") != "tool_use":
161
- continue
162
- tool_name = content_item.get("name", "")
163
- tool_input = content_item.get("input", {})
164
- if tool_name == "Skill" and clean_name in tool_input.get("skill", ""):
165
- triggered = True
166
- elif tool_name == "Read" and clean_name in tool_input.get("file_path", ""):
167
- triggered = True
168
- return triggered
169
-
170
- elif event.get("type") == "result":
156
+ # Tool actually started executing — redundant but robust.
157
+ elif etype == "tool_execution_start":
158
+ if (event.get("toolName") or "").lower() == "read":
159
+ args = event.get("args") or {}
160
+ path = args.get("file_path", "") or args.get("path", "")
161
+ if _targets_skill(path):
162
+ return True
163
+
164
+ elif etype == "agent_end":
171
165
  return triggered
172
166
  finally:
173
- # Clean up process on any exit path (return, exception, timeout)
174
167
  if process.poll() is None:
175
168
  process.kill()
176
169
  process.wait()
177
170
 
178
171
  return triggered
179
172
  finally:
180
- if command_file.exists():
181
- command_file.unlink()
173
+ shutil.rmtree(temp_skill_dir, ignore_errors=True)
182
174
 
183
175
 
184
176
  def run_eval(
185
177
  eval_set: list[dict],
186
178
  skill_name: str,
187
179
  description: str,
180
+ skill_body: str,
188
181
  num_workers: int,
189
182
  timeout: int,
190
183
  project_root: Path,
@@ -204,6 +197,7 @@ def run_eval(
204
197
  item["query"],
205
198
  skill_name,
206
199
  description,
200
+ skill_body,
207
201
  timeout,
208
202
  str(project_root),
209
203
  model,
@@ -256,6 +250,21 @@ def run_eval(
256
250
  }
257
251
 
258
252
 
253
+ def extract_skill_body(skill_path: Path, full_content: str) -> str:
254
+ """Return the SKILL.md body (everything after the frontmatter)."""
255
+ lines = full_content.split("\n")
256
+ if not lines or lines[0].strip() != "---":
257
+ return full_content
258
+ end_idx = None
259
+ for i, line in enumerate(lines[1:], start=1):
260
+ if line.strip() == "---":
261
+ end_idx = i
262
+ break
263
+ if end_idx is None:
264
+ return full_content
265
+ return "\n".join(lines[end_idx + 1:])
266
+
267
+
259
268
  def main():
260
269
  parser = argparse.ArgumentParser(description="Run trigger evaluation for a skill description")
261
270
  parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
@@ -265,7 +274,7 @@ def main():
265
274
  parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
266
275
  parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
267
276
  parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
268
- parser.add_argument("--model", default=None, help="Model to use for claude -p (default: user's configured model)")
277
+ parser.add_argument("--model", default=None, help="Model to use for pi -p (default: user's configured model)")
269
278
  parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
270
279
  args = parser.parse_args()
271
280
 
@@ -278,6 +287,7 @@ def main():
278
287
 
279
288
  name, original_description, content = parse_skill_md(skill_path)
280
289
  description = args.description or original_description
290
+ skill_body = extract_skill_body(skill_path, content)
281
291
  project_root = find_project_root()
282
292
 
283
293
  if args.verbose:
@@ -287,6 +297,7 @@ def main():
287
297
  eval_set=eval_set,
288
298
  skill_name=name,
289
299
  description=description,
300
+ skill_body=skill_body,
290
301
  num_workers=args.num_workers,
291
302
  timeout=args.timeout,
292
303
  project_root=project_root,
@@ -90,6 +90,7 @@ def run_loop(
90
90
  eval_set=all_queries,
91
91
  skill_name=name,
92
92
  description=current_description,
93
+ skill_body=content,
93
94
  num_workers=num_workers,
94
95
  timeout=timeout,
95
96
  project_root=project_root,