pi-ui-extend 0.1.38 → 0.1.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/app/app.d.ts +0 -1
  2. package/dist/app/app.js +28 -21
  3. package/dist/app/constants.js +1 -1
  4. package/dist/app/input/input-action-controller.d.ts +1 -0
  5. package/dist/app/input/input-action-controller.js +3 -0
  6. package/dist/app/input/input-controller.d.ts +1 -0
  7. package/dist/app/input/input-controller.js +40 -12
  8. package/dist/app/model/model-usage-status.js +4 -2
  9. package/dist/app/process.js +11 -0
  10. package/dist/app/rendering/conversation-tool-renderer.js +4 -6
  11. package/dist/app/session/request-history.js +2 -0
  12. package/dist/app/session/session-event-controller.d.ts +13 -0
  13. package/dist/app/session/session-event-controller.js +27 -0
  14. package/dist/app/session/tabs-controller.d.ts +8 -0
  15. package/dist/app/session/tabs-controller.js +37 -6
  16. package/dist/app/workspace/workspace-actions-controller.d.ts +1 -0
  17. package/dist/app/workspace/workspace-actions-controller.js +2 -1
  18. package/dist/bundled-extensions/terminal-bell/index.js +55 -1
  19. package/dist/config.js +1 -1
  20. package/dist/default-pix-config.js +1 -1
  21. package/dist/markdown-format.js +14 -25
  22. package/dist/terminal-width.d.ts +14 -0
  23. package/dist/terminal-width.js +31 -2
  24. package/dist/theme.js +2 -2
  25. package/external/pi-tools-suite/README.md +34 -9
  26. package/external/pi-tools-suite/package.json +3 -3
  27. package/external/pi-tools-suite/src/async-subagents/async-subagents.sample.jsonc +35 -21
  28. package/external/pi-tools-suite/src/async-subagents/commands.ts +1 -1
  29. package/external/pi-tools-suite/src/async-subagents/core/agent-strategy.ts +2 -2
  30. package/external/pi-tools-suite/src/async-subagents/core/config.ts +70 -12
  31. package/external/pi-tools-suite/src/async-subagents/core/routing.ts +1 -1
  32. package/external/pi-tools-suite/src/async-subagents/core/spawn.ts +1 -1
  33. package/external/pi-tools-suite/src/async-subagents/core/types.ts +1 -1
  34. package/external/pi-tools-suite/src/async-subagents/index.ts +6 -6
  35. package/external/pi-tools-suite/src/async-subagents/lib.ts +1 -1
  36. package/external/pi-tools-suite/src/async-subagents/tools/spawn.ts +4 -2
  37. package/external/pi-tools-suite/src/async-subagents/tools/subagents.ts +2 -2
  38. package/external/pi-tools-suite/src/{glm-coding-discipline → coding-discipline}/index.ts +17 -8
  39. package/external/pi-tools-suite/src/config.ts +1 -1
  40. package/external/pi-tools-suite/src/dcp/auto-compress.ts +368 -0
  41. package/external/pi-tools-suite/src/dcp/compress-tool.ts +3 -0
  42. package/external/pi-tools-suite/src/dcp/config.ts +23 -0
  43. package/external/pi-tools-suite/src/dcp/index.ts +112 -7
  44. package/external/pi-tools-suite/src/dcp/prompts.ts +8 -0
  45. package/external/pi-tools-suite/src/dcp/state.ts +41 -0
  46. package/external/pi-tools-suite/src/default-pi-tools-suite-config.ts +30 -22
  47. package/external/pi-tools-suite/src/index.ts +2 -1
  48. package/external/pi-tools-suite/src/session-name/index.ts +37 -0
  49. package/external/pi-tools-suite/src/tool-descriptions.ts +16 -4
  50. package/package.json +4 -4
  51. package/skills/skill-creator/SKILL.md +36 -40
  52. package/skills/skill-creator/eval-viewer/viewer.html +2 -2
  53. package/skills/skill-creator/references/schemas.md +1 -1
  54. package/skills/skill-creator/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  55. package/skills/skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-314.pyc +0 -0
  56. package/skills/skill-creator/scripts/__pycache__/generate_report.cpython-314.pyc +0 -0
  57. package/skills/skill-creator/scripts/__pycache__/improve_description.cpython-314.pyc +0 -0
  58. package/skills/skill-creator/scripts/__pycache__/package_skill.cpython-314.pyc +0 -0
  59. package/skills/skill-creator/scripts/__pycache__/run_eval.cpython-314.pyc +0 -0
  60. package/skills/skill-creator/scripts/__pycache__/run_loop.cpython-314.pyc +0 -0
  61. package/skills/skill-creator/scripts/__pycache__/utils.cpython-314.pyc +0 -0
  62. package/skills/skill-creator/scripts/generate_report.py +1 -1
  63. package/skills/skill-creator/scripts/improve_description.py +14 -24
  64. package/skills/skill-creator/scripts/run_eval.py +89 -82
@@ -1,16 +1,19 @@
1
1
  #!/usr/bin/env python3
2
2
  """Run trigger evaluation for a skill description.
3
3
 
4
- Tests whether a skill's description causes Claude to trigger (read the skill)
4
+ Tests whether a skill's description causes pi to trigger (read the skill)
5
5
  for a set of queries. Outputs results as JSON.
6
6
  """
7
7
 
8
8
  import argparse
9
9
  import json
10
10
  import os
11
+ import re
11
12
  import select
13
+ import shutil
12
14
  import subprocess
13
15
  import sys
16
+ import tempfile
14
17
  import time
15
18
  import uuid
16
19
  from concurrent.futures import ProcessPoolExecutor, as_completed
@@ -20,82 +23,92 @@ from scripts.utils import parse_skill_md
20
23
 
21
24
 
22
25
  def find_project_root() -> Path:
23
- """Find the project root by walking up from cwd looking for .claude/.
26
+ """Return the working directory pi should run in.
24
27
 
25
- Mimics how Claude Code discovers its project root, so the command file
26
- we create ends up where claude -p will look for it.
28
+ Unlike Claude Code, pi has no `.claude/` project marker that controls
29
+ skill discovery skills are loaded explicitly via `--skill` (or from
30
+ pi's own skill locations). We simply use the current directory so the
31
+ agent sees the same relative paths the user would.
27
32
  """
28
- current = Path.cwd()
29
- for parent in [current, *current.parents]:
30
- if (parent / ".claude").is_dir():
31
- return parent
32
- return current
33
+ return Path.cwd()
34
+
35
+
36
+ def _safe_skill_name(raw: str, unique_id: str) -> str:
37
+ """Build a frontmatter-valid skill name (lowercase, hyphens, a-z0-9)."""
38
+ base = re.sub(r"[^a-z0-9]+", "-", (raw or "skill").lower()).strip("-") or "skill"
39
+ return f"{base}-{unique_id}"
33
40
 
34
41
 
35
42
  def run_single_query(
36
43
  query: str,
37
44
  skill_name: str,
38
45
  skill_description: str,
46
+ skill_body: str,
39
47
  timeout: int,
40
48
  project_root: str,
41
49
  model: str | None = None,
42
50
  ) -> bool:
43
51
  """Run a single query and return whether the skill was triggered.
44
52
 
45
- Creates a command file in .claude/commands/ so it appears in Claude's
46
- available_skills list, then runs `claude -p` with the raw query.
47
- Uses --include-partial-messages to detect triggering early from
48
- stream events (content_block_start) rather than waiting for the
49
- full assistant message, which only arrives after tool execution.
53
+ Creates a throwaway skill directory whose SKILL.md carries the
54
+ description under test, then runs `pi -p --mode json --skill <dir>`.
55
+ We watch the JSON event stream for a `read` tool call targeting that
56
+ SKILL.md, which is how pi loads a skill once the model decides to use
57
+ it. As soon as we see it, we return True and kill the process so the
58
+ run doesn't keep executing the skill.
50
59
  """
51
60
  unique_id = uuid.uuid4().hex[:8]
52
- clean_name = f"{skill_name}-skill-{unique_id}"
53
- project_commands_dir = Path(project_root) / ".claude" / "commands"
54
- command_file = project_commands_dir / f"{clean_name}.md"
61
+ clean_name = _safe_skill_name(skill_name, unique_id)
62
+ temp_skill_dir = Path(tempfile.mkdtemp(prefix=f"pi-skill-eval-{unique_id}-"))
63
+ skill_md_path = temp_skill_dir / "SKILL.md"
55
64
 
56
65
  try:
57
- project_commands_dir.mkdir(parents=True, exist_ok=True)
58
- # Use YAML block scalar to avoid breaking on quotes in description
66
+ # Write a SKILL.md with the description under test. The body is the
67
+ # real skill body so the model behaves naturally if it does read it,
68
+ # but the triggering decision is driven solely by the description.
59
69
  indented_desc = "\n ".join(skill_description.split("\n"))
60
- command_content = (
70
+ skill_md_content = (
61
71
  f"---\n"
72
+ f"name: {clean_name}\n"
62
73
  f"description: |\n"
63
74
  f" {indented_desc}\n"
64
75
  f"---\n\n"
65
- f"# {skill_name}\n\n"
66
- f"This skill handles: {skill_description}\n"
76
+ f"{skill_body.strip()}\n"
67
77
  )
68
- command_file.write_text(command_content)
78
+ skill_md_path.write_text(skill_md_content)
69
79
 
70
80
  cmd = [
71
- "claude",
72
- "-p", query,
73
- "--output-format", "stream-json",
74
- "--verbose",
75
- "--include-partial-messages",
81
+ "pi",
82
+ "-p", "--mode", "json",
83
+ "--no-session",
84
+ # Only the skill under test should be available, so its
85
+ # description is what gets evaluated in isolation. Explicit
86
+ # --skill paths still load even with --no-skills.
87
+ "--no-skills",
88
+ "--skill", str(temp_skill_dir),
89
+ query,
76
90
  ]
77
91
  if model:
78
92
  cmd.extend(["--model", model])
79
93
 
80
- # Remove CLAUDECODE env var to allow nesting claude -p inside a
81
- # Claude Code session. The guard is for interactive terminal conflicts;
82
- # programmatic subprocess usage is safe.
83
- env = {k: v for k, v in os.environ.items() if k != "CLAUDECODE"}
84
-
85
94
  process = subprocess.Popen(
86
95
  cmd,
87
96
  stdout=subprocess.PIPE,
88
97
  stderr=subprocess.DEVNULL,
89
98
  cwd=project_root,
90
- env=env,
91
99
  )
92
100
 
93
101
  triggered = False
94
102
  start_time = time.time()
95
103
  buffer = ""
96
- # Track state for stream event detection
97
- pending_tool_name = None
98
- accumulated_json = ""
104
+
105
+ def _targets_skill(path: str) -> bool:
106
+ """True if a read target points at the temp skill's SKILL.md."""
107
+ if not path:
108
+ return False
109
+ # The temp dir name embeds unique_id, so this is unique per run
110
+ # and survives absolute/relative/tilde variations.
111
+ return unique_id in path or clean_name in path
99
112
 
100
113
  try:
101
114
  while time.time() - start_time < timeout:
@@ -125,66 +138,42 @@ def run_single_query(
125
138
  except json.JSONDecodeError:
126
139
  continue
127
140
 
128
- # Early detection via stream events
129
- if event.get("type") == "stream_event":
130
- se = event.get("event", {})
131
- se_type = se.get("type", "")
132
-
133
- if se_type == "content_block_start":
134
- cb = se.get("content_block", {})
135
- if cb.get("type") == "tool_use":
136
- tool_name = cb.get("name", "")
137
- if tool_name in ("Skill", "Read"):
138
- pending_tool_name = tool_name
139
- accumulated_json = ""
140
- else:
141
- return False
142
-
143
- elif se_type == "content_block_delta" and pending_tool_name:
144
- delta = se.get("delta", {})
145
- if delta.get("type") == "input_json_delta":
146
- accumulated_json += delta.get("partial_json", "")
147
- if clean_name in accumulated_json:
141
+ etype = event.get("type")
142
+
143
+ # Fully-formed tool call (fires before execution).
144
+ if etype == "message_update":
145
+ ame = event.get("assistantMessageEvent", {})
146
+ if ame.get("type") == "toolcall_end":
147
+ tool_call = ame.get("toolCall", {})
148
+ if tool_call.get("name") == "read":
149
+ path = (tool_call.get("arguments") or {}).get("path", "")
150
+ if _targets_skill(path):
148
151
  return True
149
152
 
150
- elif se_type in ("content_block_stop", "message_stop"):
151
- if pending_tool_name:
152
- return clean_name in accumulated_json
153
- if se_type == "message_stop":
154
- return False
155
-
156
- # Fallback: full assistant message
157
- elif event.get("type") == "assistant":
158
- message = event.get("message", {})
159
- for content_item in message.get("content", []):
160
- if content_item.get("type") != "tool_use":
161
- continue
162
- tool_name = content_item.get("name", "")
163
- tool_input = content_item.get("input", {})
164
- if tool_name == "Skill" and clean_name in tool_input.get("skill", ""):
165
- triggered = True
166
- elif tool_name == "Read" and clean_name in tool_input.get("file_path", ""):
167
- triggered = True
168
- return triggered
169
-
170
- elif event.get("type") == "result":
153
+ # Tool actually started executing — redundant but robust.
154
+ elif etype == "tool_execution_start":
155
+ if event.get("toolName") == "read":
156
+ path = (event.get("args") or {}).get("path", "")
157
+ if _targets_skill(path):
158
+ return True
159
+
160
+ elif etype == "agent_end":
171
161
  return triggered
172
162
  finally:
173
- # Clean up process on any exit path (return, exception, timeout)
174
163
  if process.poll() is None:
175
164
  process.kill()
176
165
  process.wait()
177
166
 
178
167
  return triggered
179
168
  finally:
180
- if command_file.exists():
181
- command_file.unlink()
169
+ shutil.rmtree(temp_skill_dir, ignore_errors=True)
182
170
 
183
171
 
184
172
  def run_eval(
185
173
  eval_set: list[dict],
186
174
  skill_name: str,
187
175
  description: str,
176
+ skill_body: str,
188
177
  num_workers: int,
189
178
  timeout: int,
190
179
  project_root: Path,
@@ -204,6 +193,7 @@ def run_eval(
204
193
  item["query"],
205
194
  skill_name,
206
195
  description,
196
+ skill_body,
207
197
  timeout,
208
198
  str(project_root),
209
199
  model,
@@ -256,6 +246,21 @@ def run_eval(
256
246
  }
257
247
 
258
248
 
249
+ def extract_skill_body(skill_path: Path, full_content: str) -> str:
250
+ """Return the SKILL.md body (everything after the frontmatter)."""
251
+ lines = full_content.split("\n")
252
+ if not lines or lines[0].strip() != "---":
253
+ return full_content
254
+ end_idx = None
255
+ for i, line in enumerate(lines[1:], start=1):
256
+ if line.strip() == "---":
257
+ end_idx = i
258
+ break
259
+ if end_idx is None:
260
+ return full_content
261
+ return "\n".join(lines[end_idx + 1:])
262
+
263
+
259
264
  def main():
260
265
  parser = argparse.ArgumentParser(description="Run trigger evaluation for a skill description")
261
266
  parser.add_argument("--eval-set", required=True, help="Path to eval set JSON file")
@@ -265,7 +270,7 @@ def main():
265
270
  parser.add_argument("--timeout", type=int, default=30, help="Timeout per query in seconds")
266
271
  parser.add_argument("--runs-per-query", type=int, default=3, help="Number of runs per query")
267
272
  parser.add_argument("--trigger-threshold", type=float, default=0.5, help="Trigger rate threshold")
268
- parser.add_argument("--model", default=None, help="Model to use for claude -p (default: user's configured model)")
273
+ parser.add_argument("--model", default=None, help="Model to use for pi -p (default: user's configured model)")
269
274
  parser.add_argument("--verbose", action="store_true", help="Print progress to stderr")
270
275
  args = parser.parse_args()
271
276
 
@@ -278,6 +283,7 @@ def main():
278
283
 
279
284
  name, original_description, content = parse_skill_md(skill_path)
280
285
  description = args.description or original_description
286
+ skill_body = extract_skill_body(skill_path, content)
281
287
  project_root = find_project_root()
282
288
 
283
289
  if args.verbose:
@@ -287,6 +293,7 @@ def main():
287
293
  eval_set=eval_set,
288
294
  skill_name=name,
289
295
  description=description,
296
+ skill_body=skill_body,
290
297
  num_workers=args.num_workers,
291
298
  timeout=args.timeout,
292
299
  project_root=project_root,