@pugi/cli 0.1.0-beta.2 → 0.1.0-beta.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. package/THIRD_PARTY_NOTICES.md +40 -0
  2. package/assets/pugi-mascot.ansi +15 -40
  3. package/bin/run.js +33 -1
  4. package/dist/commands/jobs-watch.js +201 -0
  5. package/dist/commands/jobs.js +15 -0
  6. package/dist/core/agent-progress/cleanup.js +134 -0
  7. package/dist/core/agent-progress/schema.js +144 -0
  8. package/dist/core/agent-progress/writer.js +101 -0
  9. package/dist/core/compact/auto-trigger.js +96 -0
  10. package/dist/core/compact/buffer-rewriter.js +115 -0
  11. package/dist/core/compact/summarizer.js +196 -0
  12. package/dist/core/compact/token-counter.js +108 -0
  13. package/dist/core/consensus/diff-capture.js +73 -0
  14. package/dist/core/context/index.js +7 -0
  15. package/dist/core/context/markdown-traverse.js +255 -0
  16. package/dist/core/cost/rate-card.js +129 -0
  17. package/dist/core/cost/tracker.js +221 -0
  18. package/dist/core/denial-tracking/index.js +8 -0
  19. package/dist/core/denial-tracking/state.js +264 -0
  20. package/dist/core/diagnostics/probe-runner.js +93 -0
  21. package/dist/core/diagnostics/probes/api.js +46 -0
  22. package/dist/core/diagnostics/probes/auth.js +86 -0
  23. package/dist/core/diagnostics/probes/cli-version.js +127 -0
  24. package/dist/core/diagnostics/probes/config.js +72 -0
  25. package/dist/core/diagnostics/probes/denial-tracking.js +57 -0
  26. package/dist/core/diagnostics/probes/disk.js +81 -0
  27. package/dist/core/diagnostics/probes/git.js +65 -0
  28. package/dist/core/diagnostics/probes/mcp.js +75 -0
  29. package/dist/core/diagnostics/probes/node.js +59 -0
  30. package/dist/core/diagnostics/probes/pnpm.js +36 -0
  31. package/dist/core/diagnostics/probes/session.js +74 -0
  32. package/dist/core/diagnostics/probes/status-snapshot.js +442 -0
  33. package/dist/core/diagnostics/probes/workspace.js +63 -0
  34. package/dist/core/diagnostics/types.js +70 -0
  35. package/dist/core/edits/dispatch.js +218 -2
  36. package/dist/core/edits/journal.js +199 -0
  37. package/dist/core/edits/layer-d-ast.js +557 -14
  38. package/dist/core/edits/verify-hook.js +273 -0
  39. package/dist/core/edits/worktree.js +111 -18
  40. package/dist/core/engine/anvil-client.js +115 -5
  41. package/dist/core/engine/budgets.js +89 -0
  42. package/dist/core/engine/context-prefix.js +155 -0
  43. package/dist/core/engine/intent.js +260 -0
  44. package/dist/core/engine/native-pugi.js +744 -210
  45. package/dist/core/engine/prompts.js +61 -6
  46. package/dist/core/engine/strip-internal-fields.js +124 -0
  47. package/dist/core/engine/tool-bridge.js +818 -31
  48. package/dist/core/file-cache.js +113 -1
  49. package/dist/core/init/scaffold.js +195 -0
  50. package/dist/core/lsp/client.js +174 -29
  51. package/dist/core/mcp/client.js +75 -6
  52. package/dist/core/mcp/http-server.js +553 -0
  53. package/dist/core/mcp/permission.js +190 -0
  54. package/dist/core/mcp/registry.js +24 -2
  55. package/dist/core/mcp/server-tools.js +219 -0
  56. package/dist/core/mcp/server.js +397 -0
  57. package/dist/core/permissions/gate.js +187 -0
  58. package/dist/core/permissions/index.js +18 -0
  59. package/dist/core/permissions/mode.js +102 -0
  60. package/dist/core/permissions/state.js +160 -0
  61. package/dist/core/permissions/tool-class.js +93 -0
  62. package/dist/core/repl/codebase-survey.js +308 -0
  63. package/dist/core/repl/history.js +11 -1
  64. package/dist/core/repl/init-interview.js +457 -0
  65. package/dist/core/repl/model-pricing.js +135 -0
  66. package/dist/core/repl/onboarding-state.js +297 -0
  67. package/dist/core/repl/session.js +719 -29
  68. package/dist/core/repl/slash-commands.js +133 -9
  69. package/dist/core/retry-budget/budget.js +284 -0
  70. package/dist/core/retry-budget/index.js +5 -0
  71. package/dist/core/settings.js +71 -0
  72. package/dist/core/skills/defaults.js +457 -0
  73. package/dist/core/subagents/dispatcher-real.js +600 -0
  74. package/dist/core/subagents/dispatcher.js +113 -24
  75. package/dist/core/subagents/index.js +18 -5
  76. package/dist/core/subagents/isolation-matrix.js +213 -0
  77. package/dist/core/subagents/spawn.js +19 -4
  78. package/dist/core/transport/version-interceptor.js +166 -0
  79. package/dist/index.js +28 -0
  80. package/dist/runtime/bootstrap.js +190 -0
  81. package/dist/runtime/cli.js +1588 -266
  82. package/dist/runtime/commands/compact.js +296 -0
  83. package/dist/runtime/commands/cost.js +199 -0
  84. package/dist/runtime/commands/delegate.js +289 -0
  85. package/dist/runtime/commands/doctor.js +369 -0
  86. package/dist/runtime/commands/lsp.js +187 -5
  87. package/dist/runtime/commands/mcp.js +824 -0
  88. package/dist/runtime/commands/patch.js +17 -0
  89. package/dist/runtime/commands/permissions.js +87 -0
  90. package/dist/runtime/commands/report.js +299 -0
  91. package/dist/runtime/commands/review-consensus.js +17 -2
  92. package/dist/runtime/commands/roster.js +117 -0
  93. package/dist/runtime/commands/status.js +178 -0
  94. package/dist/runtime/commands/worktree.js +50 -6
  95. package/dist/runtime/headless.js +543 -0
  96. package/dist/runtime/load-hooks-or-exit.js +71 -0
  97. package/dist/runtime/plan-decompose.js +531 -0
  98. package/dist/runtime/version.js +65 -0
  99. package/dist/tools/agent-tool.js +206 -0
  100. package/dist/tools/apply-patch.js +281 -39
  101. package/dist/tools/ask-user-question.js +213 -0
  102. package/dist/tools/ask-user.js +115 -0
  103. package/dist/tools/file-tools.js +85 -14
  104. package/dist/tools/mcp-tool.js +260 -0
  105. package/dist/tools/multi-edit.js +361 -0
  106. package/dist/tools/registry.js +22 -2
  107. package/dist/tools/skill-tool.js +96 -0
  108. package/dist/tools/tasks.js +208 -0
  109. package/dist/tools/web-fetch.js +147 -2
  110. package/dist/tools/web-search.js +458 -0
  111. package/dist/tui/agent-progress-card.js +111 -0
  112. package/dist/tui/agent-tree.js +10 -0
  113. package/dist/tui/ask-modal.js +2 -2
  114. package/dist/tui/ask-user-question-prompt.js +192 -0
  115. package/dist/tui/compact-banner.js +54 -0
  116. package/dist/tui/conversation-pane.js +69 -8
  117. package/dist/tui/cost-table.js +111 -0
  118. package/dist/tui/doctor-table.js +31 -0
  119. package/dist/tui/input-box.js +1 -1
  120. package/dist/tui/markdown-render.js +4 -4
  121. package/dist/tui/repl-render.js +276 -37
  122. package/dist/tui/repl-splash.js +2 -2
  123. package/dist/tui/repl.js +25 -6
  124. package/dist/tui/splash.js +1 -1
  125. package/dist/tui/status-bar.js +94 -16
  126. package/dist/tui/status-table.js +7 -0
  127. package/dist/tui/tool-stream-pane.js +7 -0
  128. package/dist/tui/update-banner.js +20 -2
  129. package/docs/examples/codegraph.mcp.json +10 -0
  130. package/package.json +9 -6
@@ -0,0 +1,102 @@
1
+ /**
2
+ * Permission modes — canonical 4-mode taxonomy (Leak L6).
3
+ *
4
+ * Pugi historically shipped a 6-mode taxonomy in `@pugi/sdk`
5
+ * (`plan | ask | acceptEdits | auto | dontAsk | bypassPermissions`)
6
+ * which the legacy `core/permission.ts` engine maps tools onto. Claude
7
+ * Code, Codex, and the openclaude / openwork leaks all converge on a
8
+ * smaller, sharper 4-mode set:
9
+ *
10
+ * - `plan` — read-only proposal mode. Write/dispatch tools refused
11
+ * with a deterministic sentinel; the model is expected
12
+ * to surface a plan, not execute it.
13
+ * - `ask` — every tool execution prompts the operator. Default
14
+ * mode for new operators; the safe ground state.
15
+ * - `allow` — every tool executes without per-call prompts, BUT
16
+ * the policy hook layer (skill-steering, denial audit,
17
+ * destructive deny-list) still fires.
18
+ * - `bypass` — same as allow but ALSO skips policy hooks. Power-user
19
+ * mode for trusted scripted runs; surface a banner on
20
+ * entry so an operator who flips here by accident sees
21
+ * they have disengaged the audit layer.
22
+ *
23
+ * This module owns the union type, the canonical default, and the
24
+ * mode-resolution helper. The runtime gate (`gate.ts`) consumes it; the
25
+ * legacy 6-mode SDK enum remains the system-of-record for bash-class
26
+ * decisions inside `core/permission.ts` — the canonical 4-mode layer
27
+ * sits in front and short-circuits the dispatch decision before bash
28
+ * classification ever runs.
29
+ */
30
+ /**
31
+ * Closed list — useful for input validation and slash-command help.
32
+ */
33
+ export const PERMISSION_MODES = Object.freeze([
34
+ 'plan',
35
+ 'ask',
36
+ 'allow',
37
+ 'bypass',
38
+ ]);
39
+ /**
40
+ * Default mode applied when no `--mode` flag, no per-workspace session
41
+ * state, and no `defaultPermissionMode` in `~/.pugi/config.json`. We
42
+ * default cautious (`ask`) — an operator who has not configured anything
43
+ * is treated as a new operator who deserves visibility into every tool
44
+ * call.
45
+ */
46
+ export const DEFAULT_PERMISSION_MODE = 'ask';
47
+ /**
48
+ * Type guard for arbitrary string input (CLI flag, session.json
49
+ * deserialization). Returns false for casing variants — caller is
50
+ * expected to lowercase before testing.
51
+ */
52
+ export function isPermissionMode(value) {
53
+ return typeof value === 'string' && PERMISSION_MODES.includes(value);
54
+ }
55
+ /**
56
+ * Parse + validate a mode string. Returns null for invalid input so the
57
+ * caller can surface a typed error (`unknown mode: <value>`) instead of
58
+ * throwing from a parse helper.
59
+ */
60
+ export function parsePermissionMode(value) {
61
+ const lower = value.trim().toLowerCase();
62
+ return isPermissionMode(lower) ? lower : null;
63
+ }
64
+ /**
65
+ * Map the canonical 4-mode taxonomy to the legacy 6-mode SDK enum used
66
+ * by `core/permission.ts::evaluateBashPermission` and friends. The map
67
+ * is intentionally surjective on a narrower target — the canonical
68
+ * layer is the new public contract, the legacy layer is plumbing.
69
+ *
70
+ * plan -> 'plan' (read-only)
71
+ * ask -> 'ask' (prompt every action)
72
+ * allow -> 'auto' (allow non-destructive; deny destructive)
73
+ * bypass -> 'bypassPermissions' (allow everything except destructive override)
74
+ *
75
+ * Callers that need the legacy enum (existing bash classifier, settings
76
+ * persistence) should funnel through this helper so the mapping is in
77
+ * one place.
78
+ */
79
+ export function toLegacyMode(mode) {
80
+ switch (mode) {
81
+ case 'plan':
82
+ return 'plan';
83
+ case 'ask':
84
+ return 'ask';
85
+ case 'allow':
86
+ return 'auto';
87
+ case 'bypass':
88
+ return 'bypassPermissions';
89
+ }
90
+ }
91
+ /**
92
+ * One-line human-readable summary surfaced by the `/permissions` table
93
+ * and `pugi --help` text. Kept inline so the strings stay localizable
94
+ * via a single edit point.
95
+ */
96
+ export const PERMISSION_MODE_GLOSS = Object.freeze({
97
+ plan: 'Read-only — propose, never execute. Write + dispatch tools refused.',
98
+ ask: 'Prompt before every tool call. Default for new operators.',
99
+ allow: 'Execute tools without prompts. Policy hooks still fire.',
100
+ bypass: 'Execute tools without prompts AND skip policy hooks. Power-user only.',
101
+ });
102
+ //# sourceMappingURL=mode.js.map
@@ -0,0 +1,160 @@
1
+ /**
2
+ * Per-workspace permission-mode session state — Leak L6.
3
+ *
4
+ * State lives in `.pugi/session.json` under the workspace root. The
5
+ * file is read on first `getCurrentMode()` call (cached for the
6
+ * process lifetime) and written atomically via tmp+rename on
7
+ * `setCurrentMode()` so a kill mid-write does not corrupt the JSON.
8
+ *
9
+ * Resolution order for the effective mode on a fresh process:
10
+ * 1. CLI flag (`pugi --mode plan`) — passed via `resolveMode` arg;
11
+ * not read from disk here.
12
+ * 2. Workspace session state — `<root>/.pugi/session.json` field
13
+ * `permissionMode`.
14
+ * 3. Global config — `~/.pugi/config.json` field
15
+ * `defaultPermissionMode`.
16
+ * 4. Hard default `ask`.
17
+ *
18
+ * This module owns layers 2 + 3. The CLI arg parser owns layer 1; both
19
+ * funnel into `resolveMode()` which performs the merge.
20
+ */
21
+ import { existsSync, mkdirSync, readFileSync, renameSync, writeFileSync } from 'node:fs';
22
+ import { dirname, resolve } from 'node:path';
23
+ import { homedir } from 'node:os';
24
+ import { z } from 'zod';
25
+ import { DEFAULT_PERMISSION_MODE, isPermissionMode, parsePermissionMode, } from './mode.js';
26
+ const permissionModeEnum = z.enum(['plan', 'ask', 'allow', 'bypass']);
27
+ const sessionStateSchema = z
28
+ .object({
29
+ permissionMode: permissionModeEnum.optional(),
30
+ })
31
+ .partial()
32
+ .passthrough();
33
+ const globalConfigSchema = z
34
+ .object({
35
+ defaultPermissionMode: permissionModeEnum.optional(),
36
+ })
37
+ .partial()
38
+ .passthrough();
39
+ const SESSION_FILE = '.pugi/session.json';
40
+ /**
41
+ * Return the path to the workspace session-state file.
42
+ */
43
+ export function sessionStatePath(workspaceRoot) {
44
+ return resolve(workspaceRoot, SESSION_FILE);
45
+ }
46
+ /**
47
+ * Return the path to the user-global config file. Uses HOME env when
48
+ * present (test fixtures, CI) so we never accidentally hit the real
49
+ * user-global file in spec runs.
50
+ */
51
+ export function globalConfigPath(homeDir = homedir()) {
52
+ return resolve(homeDir, '.pugi/config.json');
53
+ }
54
+ /**
55
+ * Read the workspace's saved permission mode. Returns null when the
56
+ * file is absent OR the field is unset; the caller layers in CLI + env
57
+ * + global config defaults to produce the effective mode.
58
+ *
59
+ * Never throws on JSON parse / schema errors — a malformed session
60
+ * file should not break the gate. The defensive `try/catch` returns
61
+ * null and lets the caller fall through to the next layer.
62
+ */
63
+ export function getCurrentMode(workspaceRoot) {
64
+ const path = sessionStatePath(workspaceRoot);
65
+ if (!existsSync(path))
66
+ return null;
67
+ try {
68
+ const raw = readFileSync(path, 'utf8');
69
+ const parsed = sessionStateSchema.parse(JSON.parse(raw));
70
+ return isPermissionMode(parsed.permissionMode) ? parsed.permissionMode : null;
71
+ }
72
+ catch {
73
+ return null;
74
+ }
75
+ }
76
+ /**
77
+ * Persist the workspace's permission mode. Creates the `.pugi/` dir
78
+ * when missing; preserves any unrelated keys in the file (passthrough
79
+ * schema). Atomic tmp+rename so a kill mid-write does not corrupt the
80
+ * JSON.
81
+ */
82
+ export function setCurrentMode(workspaceRoot, mode) {
83
+ const path = sessionStatePath(workspaceRoot);
84
+ mkdirSync(dirname(path), { recursive: true });
85
+ const existing = existsSync(path)
86
+ ? safeParseObject(readFileSync(path, 'utf8'))
87
+ : {};
88
+ const next = { ...existing, permissionMode: mode };
89
+ const tmpPath = `${path}.tmp`;
90
+ writeFileSync(tmpPath, `${JSON.stringify(next, null, 2)}\n`, { encoding: 'utf8', mode: 0o600 });
91
+ renameSync(tmpPath, path);
92
+ }
93
+ /**
94
+ * Read `~/.pugi/config.json::defaultPermissionMode`. Returns null when
95
+ * the file is absent / the field is unset; same defensive behaviour
96
+ * as `getCurrentMode` — a malformed global config never breaks the gate.
97
+ */
98
+ export function getGlobalDefaultMode(homeDir = homedir()) {
99
+ const path = globalConfigPath(homeDir);
100
+ if (!existsSync(path))
101
+ return null;
102
+ try {
103
+ const raw = readFileSync(path, 'utf8');
104
+ const parsed = globalConfigSchema.parse(JSON.parse(raw));
105
+ return isPermissionMode(parsed.defaultPermissionMode) ? parsed.defaultPermissionMode : null;
106
+ }
107
+ catch {
108
+ return null;
109
+ }
110
+ }
111
+ /**
112
+ * Persist `~/.pugi/config.json::defaultPermissionMode`. Used by the
113
+ * `/permissions <mode> --persist` flow so a future fresh session
114
+ * defaults to the same mode without an explicit `--mode` flag.
115
+ */
116
+ export function setGlobalDefaultMode(mode, homeDir = homedir()) {
117
+ const path = globalConfigPath(homeDir);
118
+ mkdirSync(dirname(path), { recursive: true });
119
+ const existing = existsSync(path)
120
+ ? safeParseObject(readFileSync(path, 'utf8'))
121
+ : {};
122
+ const next = { ...existing, defaultPermissionMode: mode };
123
+ const tmpPath = `${path}.tmp`;
124
+ writeFileSync(tmpPath, `${JSON.stringify(next, null, 2)}\n`, { encoding: 'utf8', mode: 0o600 });
125
+ renameSync(tmpPath, path);
126
+ }
127
+ export function resolveMode(options) {
128
+ if (options.cliFlag) {
129
+ const flag = parsePermissionMode(options.cliFlag);
130
+ if (flag)
131
+ return flag;
132
+ }
133
+ const workspace = getCurrentMode(options.workspaceRoot);
134
+ if (workspace)
135
+ return workspace;
136
+ const global = getGlobalDefaultMode(options.homeDir);
137
+ if (global)
138
+ return global;
139
+ return DEFAULT_PERMISSION_MODE;
140
+ }
141
+ /**
142
+ * Defensive helper — parse JSON to an object; non-object payload (top-
143
+ * level array, primitive) collapses to an empty object so the merge
144
+ * doesn't surface a TypeError. The `setCurrentMode` / `setGlobalDefaultMode`
145
+ * helpers only write objects, so a non-object existing file is corrupted
146
+ * and we explicitly reset it rather than appending into a non-object.
147
+ */
148
+ function safeParseObject(raw) {
149
+ try {
150
+ const parsed = JSON.parse(raw);
151
+ if (parsed && typeof parsed === 'object' && !Array.isArray(parsed)) {
152
+ return parsed;
153
+ }
154
+ return {};
155
+ }
156
+ catch {
157
+ return {};
158
+ }
159
+ }
160
+ //# sourceMappingURL=state.js.map
@@ -0,0 +1,93 @@
1
+ /**
2
+ * Tool side-effect classification — Leak L6.
3
+ *
4
+ * Three classes drive the canonical 4-mode permission gate:
5
+ *
6
+ * - `read` — observe-only. Plan mode allows; ask still prompts;
7
+ * allow + bypass execute silently. Examples: read,
8
+ * grep, glob, web_fetch, web_search, skills_list.
9
+ * - `write` — mutates workspace, journal, or operator screen with
10
+ * visible side effects. Plan mode refuses; ask prompts;
11
+ * allow + bypass execute. Examples: write, edit, bash,
12
+ * multi_edit, task_*. `ask_user_question` is also
13
+ * classed as `write` because it interrupts the
14
+ * dispatcher's flow control and demands operator
15
+ * attention — plan mode should not prompt operators.
16
+ * - `dispatch` — spawns a child subagent or off-tree task. Plan mode
17
+ * refuses (a write-capable child violates plan-mode's
18
+ * read-only contract); ask prompts; allow + bypass
19
+ * execute. Example: `agent`.
20
+ *
21
+ * Unknown tool names default to `write` — deny-first safety. A stale
22
+ * schema entry that the gate has not been told about should not silently
23
+ * pass in plan mode just because the gate doesn't recognise it.
24
+ */
25
+ /**
26
+ * Closed map of every built-in tool name -> side-effect class. The
27
+ * source of truth for the four standard modes; mirrored against the
28
+ * `WIRED_TOOLS` set in `core/engine/tool-bridge.ts` so an unrecognised
29
+ * tool surfaces as the safe deny-first `write` default.
30
+ *
31
+ * MCP tools follow the `mcp__<server>__<tool>` namespace and are
32
+ * uniformly classed via `getToolClass` because per-tool annotations are
33
+ * not yet a part of the MCP spec — treating them as `write` is the
34
+ * conservative default until server-side metadata is trustworthy.
35
+ */
36
+ const BUILT_IN_TOOL_CLASSES = Object.freeze({
37
+ // Read-only observations.
38
+ read: 'read',
39
+ grep: 'read',
40
+ glob: 'read',
41
+ ls: 'read',
42
+ search: 'read',
43
+ web_fetch: 'read',
44
+ web_search: 'read',
45
+ file_cache_check: 'read',
46
+ skills_list: 'read',
47
+ skill: 'read',
48
+ task_get: 'read',
49
+ task_list: 'read',
50
+ // Mutating actions.
51
+ write: 'write',
52
+ edit: 'write',
53
+ multi_edit: 'write',
54
+ bash: 'write',
55
+ task_create: 'write',
56
+ task_update: 'write',
57
+ todo_write: 'write',
58
+ // `ask_user_question` halts the loop and demands operator attention.
59
+ // Plan mode should not interrupt — class as write so the gate refuses
60
+ // it in plan mode but ask + allow + bypass execute normally.
61
+ ask_user_question: 'write',
62
+ // Dispatch — spawn a child agent. Refused in plan mode regardless of
63
+ // the child's role tier (the engine adapter applies role-based
64
+ // capability filtering, but the gate refuses dispatch up front so a
65
+ // plan-mode session cannot leak a writeable child).
66
+ agent: 'dispatch',
67
+ pugi_delegate: 'dispatch',
68
+ sub_agent_spawn: 'dispatch',
69
+ });
70
+ const MCP_TOOL_PREFIX = 'mcp__';
71
+ /**
72
+ * Resolve the class for a tool name. Unknown names default to `write`
73
+ * (deny-first). MCP tools (any name prefixed with `mcp__`) default to
74
+ * `write` for the same conservative reason — the MCP spec lacks
75
+ * per-tool annotations today.
76
+ */
77
+ export function getToolClass(toolName) {
78
+ const builtIn = BUILT_IN_TOOL_CLASSES[toolName];
79
+ if (builtIn)
80
+ return builtIn;
81
+ if (toolName.startsWith(MCP_TOOL_PREFIX))
82
+ return 'write';
83
+ return 'write';
84
+ }
85
+ /**
86
+ * Expose the built-in class map for diagnostic surfaces (`pugi doctor`,
87
+ * test fixtures). Caller MUST NOT mutate — the object is already frozen
88
+ * so any attempt throws in strict mode.
89
+ */
90
+ export function listBuiltInToolClasses() {
91
+ return BUILT_IN_TOOL_CLASSES;
92
+ }
93
+ //# sourceMappingURL=tool-class.js.map
@@ -0,0 +1,308 @@
1
+ /**
2
+ * Codebase survey for the `/init` interview - Phase 2.
3
+ *
4
+ * Inspired by Claude Code's /init Phase 2 (the upstream spawns a
5
+ * Task-tool subagent to read manifest files + CI config + existing
6
+ * agent rules and produce a structured "what this repo is" digest).
7
+ * Independent implementation: Pugi's subagent infra is admin-api
8
+ * scheduled and not available in the local REPL boot path, so the
9
+ * survey runs as a direct filesystem scan instead. The shape of the
10
+ * output matches the upstream pattern - manifest, languages, build /
11
+ * test / lint commands, existing AI-tool configs - so the downstream
12
+ * Phase 3 / Phase 4 logic stays portable.
13
+ *
14
+ * # Design notes
15
+ *
16
+ * - Pure fs reads. No spawn, no network, no LLM call. Safe to run on
17
+ * every `/init` invocation without rate-limit concern.
18
+ * - Bounded: every read caps at 16 KB to defend against an enormous
19
+ * manifest pinning memory. Real package.json / pyproject.toml are
20
+ * well under that.
21
+ * - Defensive: a missing or unreadable file maps to `undefined` in the
22
+ * returned record. Phase 3 treats unknowns as "ask the operator".
23
+ * - Manifest grammar is closed: package.json (Node) and pyproject.toml
24
+ * (Python) are recognised explicitly because Pugi customers ship one
25
+ * of those nine times out of ten. Cargo.toml / go.mod / pom.xml are
26
+ * detected by filename only - we surface "rust"/"go"/"java" as the
27
+ * language hint but do not parse them, because the Phase 3 question
28
+ * set asks for build commands directly when the manifest is opaque.
29
+ *
30
+ * # What we collect
31
+ *
32
+ * 1. `manifest`: which manifest file was found (closed enum).
33
+ * 2. `languages`: deduped list inferred from manifest + file extension
34
+ * heuristics under the workspace root (one-level deep scan).
35
+ * 3. `packageManager`: pnpm / npm / yarn (Node only) or `unknown`.
36
+ * 4. `buildCommand` / `testCommand` / `lintCommand`: parsed out of
37
+ * `package.json` scripts when a Node manifest is present.
38
+ * 5. `aiToolConfigs`: a record of which sibling-agent config files
39
+ * already exist (CLAUDE.md, AGENTS.md, .cursorrules,
40
+ * .github/copilot-instructions.md, .windsurfrules, .clinerules,
41
+ * .mcp.json). Phase 4 mines these for "important parts" without
42
+ * duplicating them into PUGI.md.
43
+ * 6. `hasReadme`, `hasGit`, `hasCi`: simple booleans for the
44
+ * gap-question logic.
45
+ * 7. `hasExistingPugiMd`: true when re-running `/init` against a
46
+ * workspace that already produced PUGI.md.
47
+ *
48
+ * # Why not spawn a Pugi subagent
49
+ *
50
+ * The Pugi subagent dispatcher (apps/pugi-cli/src/core/subagents/)
51
+ * speaks to admin-api over the SSE transport. Running the codebase
52
+ * survey through that path would (a) burn a tenant token quota on
53
+ * every `/init`, (b) require an online connection, and (c) round-trip
54
+ * structured data through the persona prompt - which is the wrong
55
+ * tool for "list which files exist". A direct fs scan is faster,
56
+ * deterministic, and works offline. The upstream Task-tool decision
57
+ * makes sense in a hosted product where every operation is metered;
58
+ * Pugi runs locally so we keep the survey local too.
59
+ */
60
+ import { existsSync, readdirSync, readFileSync, statSync } from 'node:fs';
61
+ import { join } from 'node:path';
62
+ /**
63
+ * Maximum bytes the survey reads from any single file. Real manifests
64
+ * are tiny (<8 KB); this cap defends against a hostile or accidental
65
+ * gigabyte JSON pinning the REPL boot.
66
+ */
67
+ const MAX_READ_BYTES = 16 * 1024;
68
+ /**
69
+ * Top-level directory scan budget. The survey looks at the workspace
70
+ * root + at most this many entries when inferring languages from file
71
+ * extensions. Deep walks are not needed - the manifest is the source
72
+ * of truth for the active stack.
73
+ */
74
+ const MAX_TOP_LEVEL_ENTRIES = 200;
75
+ /**
76
+ * Run a codebase survey against `workspaceRoot`. Pure: returns a
77
+ * snapshot record; the caller decides how to render it.
78
+ */
79
+ export function surveyCodebase(workspaceRoot) {
80
+ const errors = [];
81
+ const manifest = detectManifest(workspaceRoot);
82
+ const packageJson = manifest === 'package.json'
83
+ ? safeReadJson(join(workspaceRoot, 'package.json'), errors)
84
+ : undefined;
85
+ const packageManager = inferPackageManager(workspaceRoot, packageJson);
86
+ const languages = inferLanguages(workspaceRoot, manifest, errors);
87
+ const scripts = (packageJson && typeof packageJson === 'object' && packageJson !== null
88
+ ? packageJson.scripts
89
+ : undefined) ?? {};
90
+ const buildCommand = pickScript(scripts, ['build', 'compile']);
91
+ const testCommand = pickScript(scripts, ['test', 'tests']);
92
+ const lintCommand = pickScript(scripts, ['lint', 'check']);
93
+ const formatCommand = pickScript(scripts, ['format', 'fmt', 'prettier']);
94
+ const aiToolConfigs = scanAiToolConfigs(workspaceRoot);
95
+ return {
96
+ workspaceRoot,
97
+ manifest,
98
+ packageManager,
99
+ languages,
100
+ buildCommand,
101
+ testCommand,
102
+ lintCommand,
103
+ formatCommand,
104
+ aiToolConfigs,
105
+ hasReadme: existsSafe(join(workspaceRoot, 'README.md')) ||
106
+ existsSafe(join(workspaceRoot, 'readme.md')),
107
+ hasGit: existsSafe(join(workspaceRoot, '.git')),
108
+ hasCi: detectCi(workspaceRoot),
109
+ hasExistingPugiMd: aiToolConfigs['PUGI.md'],
110
+ readErrors: errors,
111
+ };
112
+ }
113
+ /* ------------------------------------------------------------------ */
114
+ /* Manifest detection */
115
+ /* ------------------------------------------------------------------ */
116
+ const MANIFEST_PROBE_ORDER = Object.freeze([
117
+ 'package.json',
118
+ 'pyproject.toml',
119
+ 'Cargo.toml',
120
+ 'go.mod',
121
+ 'pom.xml',
122
+ 'Gemfile',
123
+ 'composer.json',
124
+ ]);
125
+ function detectManifest(root) {
126
+ for (const candidate of MANIFEST_PROBE_ORDER) {
127
+ if (existsSafe(join(root, candidate)))
128
+ return candidate;
129
+ }
130
+ return 'unknown';
131
+ }
132
+ function inferPackageManager(root, packageJson) {
133
+ // package.json `packageManager` field wins when present (corepack convention).
134
+ if (packageJson &&
135
+ typeof packageJson === 'object' &&
136
+ packageJson !== null &&
137
+ 'packageManager' in packageJson) {
138
+ const declared = packageJson.packageManager;
139
+ if (typeof declared === 'string') {
140
+ if (declared.startsWith('pnpm@'))
141
+ return 'pnpm';
142
+ if (declared.startsWith('yarn@'))
143
+ return 'yarn';
144
+ if (declared.startsWith('npm@'))
145
+ return 'npm';
146
+ if (declared.startsWith('bun@'))
147
+ return 'bun';
148
+ }
149
+ }
150
+ // Lockfile fallback.
151
+ if (existsSafe(join(root, 'pnpm-lock.yaml')))
152
+ return 'pnpm';
153
+ if (existsSafe(join(root, 'yarn.lock')))
154
+ return 'yarn';
155
+ if (existsSafe(join(root, 'bun.lockb')) || existsSafe(join(root, 'bun.lock')))
156
+ return 'bun';
157
+ if (existsSafe(join(root, 'package-lock.json')))
158
+ return 'npm';
159
+ return 'unknown';
160
+ }
161
+ /* ------------------------------------------------------------------ */
162
+ /* Language inference */
163
+ /* ------------------------------------------------------------------ */
164
+ const EXT_TO_LANG = Object.freeze({
165
+ '.ts': 'typescript',
166
+ '.tsx': 'typescript',
167
+ '.js': 'javascript',
168
+ '.jsx': 'javascript',
169
+ '.mjs': 'javascript',
170
+ '.cjs': 'javascript',
171
+ '.py': 'python',
172
+ '.rs': 'rust',
173
+ '.go': 'go',
174
+ '.java': 'java',
175
+ '.kt': 'kotlin',
176
+ '.swift': 'swift',
177
+ '.rb': 'ruby',
178
+ '.php': 'php',
179
+ '.cs': 'csharp',
180
+ '.cpp': 'cpp',
181
+ '.c': 'c',
182
+ });
183
+ const MANIFEST_TO_LANG = Object.freeze({
184
+ 'package.json': ['javascript'],
185
+ 'pyproject.toml': ['python'],
186
+ 'Cargo.toml': ['rust'],
187
+ 'go.mod': ['go'],
188
+ 'pom.xml': ['java'],
189
+ 'Gemfile': ['ruby'],
190
+ 'composer.json': ['php'],
191
+ 'unknown': [],
192
+ });
193
+ function inferLanguages(root, manifest, errors) {
194
+ const collected = new Set(MANIFEST_TO_LANG[manifest]);
195
+ // Top-level extension scan, bounded.
196
+ try {
197
+ const entries = readdirSync(root);
198
+ let scanned = 0;
199
+ for (const entry of entries) {
200
+ if (scanned >= MAX_TOP_LEVEL_ENTRIES)
201
+ break;
202
+ scanned += 1;
203
+ // Skip dotfiles + common dependency dirs - they pollute the
204
+ // language inference with build/cache content.
205
+ if (entry.startsWith('.') || entry === 'node_modules' || entry === 'dist')
206
+ continue;
207
+ const dot = entry.lastIndexOf('.');
208
+ if (dot <= 0)
209
+ continue;
210
+ const ext = entry.slice(dot);
211
+ const lang = EXT_TO_LANG[ext];
212
+ if (lang)
213
+ collected.add(lang);
214
+ }
215
+ }
216
+ catch (error) {
217
+ errors.push(`readdir ${root}: ${normalizeError(error)}`);
218
+ }
219
+ // `typescript` implies `javascript` runtime; keep both so the
220
+ // interview can ask "compiled-with vs run-with" if needed.
221
+ return Array.from(collected).sort();
222
+ }
223
+ /* ------------------------------------------------------------------ */
224
+ /* Script picker */
225
+ /* ------------------------------------------------------------------ */
226
+ function pickScript(scripts, candidates) {
227
+ for (const key of candidates) {
228
+ const value = scripts[key];
229
+ if (typeof value === 'string' && value.trim().length > 0) {
230
+ // Surface the npm-style invocation so Phase 4 can quote it
231
+ // verbatim. The package manager name is filled in by the caller
232
+ // once it has resolved `packageManager`.
233
+ return key;
234
+ }
235
+ }
236
+ return undefined;
237
+ }
238
+ /* ------------------------------------------------------------------ */
239
+ /* AI tool config scan */
240
+ /* ------------------------------------------------------------------ */
241
+ const AI_TOOL_CONFIG_PATHS = Object.freeze([
242
+ 'CLAUDE.md',
243
+ 'CLAUDE.local.md',
244
+ 'AGENTS.md',
245
+ '.cursorrules',
246
+ '.cursor/rules',
247
+ '.github/copilot-instructions.md',
248
+ '.windsurfrules',
249
+ '.clinerules',
250
+ '.mcp.json',
251
+ 'PUGI.md',
252
+ 'PUGI.local.md',
253
+ ]);
254
+ function scanAiToolConfigs(root) {
255
+ const result = {};
256
+ for (const rel of AI_TOOL_CONFIG_PATHS) {
257
+ result[rel] = existsSafe(join(root, rel));
258
+ }
259
+ return Object.freeze(result);
260
+ }
261
+ /* ------------------------------------------------------------------ */
262
+ /* CI detection */
263
+ /* ------------------------------------------------------------------ */
264
+ const CI_PROBE_PATHS = Object.freeze([
265
+ '.github/workflows',
266
+ '.gitlab-ci.yml',
267
+ '.circleci/config.yml',
268
+ 'azure-pipelines.yml',
269
+ '.travis.yml',
270
+ '.buildkite',
271
+ ]);
272
+ function detectCi(root) {
273
+ return CI_PROBE_PATHS.some((rel) => existsSafe(join(root, rel)));
274
+ }
275
+ /* ------------------------------------------------------------------ */
276
+ /* Safe IO helpers */
277
+ /* ------------------------------------------------------------------ */
278
+ function existsSafe(path) {
279
+ try {
280
+ return existsSync(path);
281
+ }
282
+ catch {
283
+ return false;
284
+ }
285
+ }
286
+ function safeReadJson(path, errors) {
287
+ try {
288
+ const stats = statSync(path);
289
+ if (!stats.isFile())
290
+ return undefined;
291
+ if (stats.size > MAX_READ_BYTES) {
292
+ errors.push(`oversize ${path}: ${stats.size} bytes`);
293
+ return undefined;
294
+ }
295
+ const raw = readFileSync(path, 'utf8');
296
+ return JSON.parse(raw);
297
+ }
298
+ catch (error) {
299
+ errors.push(`read ${path}: ${normalizeError(error)}`);
300
+ return undefined;
301
+ }
302
+ }
303
+ function normalizeError(error) {
304
+ if (error instanceof Error)
305
+ return error.message;
306
+ return String(error);
307
+ }
308
+ //# sourceMappingURL=codebase-survey.js.map
@@ -31,6 +31,7 @@
31
31
  * keys stay readable English (`brief`, `ts`). No forbidden words.
32
32
  */
33
33
  import { existsSync, mkdirSync, readFileSync, writeFileSync, appendFileSync, renameSync, unlinkSync, } from 'node:fs';
34
+ import { randomBytes } from 'node:crypto';
34
35
  import { homedir } from 'node:os';
35
36
  import { dirname, join } from 'node:path';
36
37
  /** Cap on stored entries per workspace. Drops oldest on overflow. */
@@ -77,7 +78,16 @@ export function append(input) {
77
78
  // sibling guarantees that). P2 fix from PR #335 triple-review.
78
79
  if (existing.length + 1 > MAX_HISTORY_ENTRIES) {
79
80
  const trimmed = [...existing.slice(existing.length + 1 - MAX_HISTORY_ENTRIES), entry];
80
- const tmpPath = `${path}.tmp`;
81
+ // β1b #52 (2026-05-26): unique-per-call tmp suffix.
82
+ // Previous form was a fixed `${path}.tmp`, which means two CLI
83
+ // processes hitting the overflow rewrite at the same moment race
84
+ // on the same sibling file. Whichever writeFileSync lands second
85
+ // can corrupt the renameSync target's content (one process's
86
+ // serialized buffer overwrites the other mid-flight). Append a
87
+ // pid + monotonic-ish timestamp + 8 hex random bytes so the tmp
88
+ // names are collision-proof across PIDs, concurrent calls inside
89
+ // one PID, and rapid re-runs that share the same ms timestamp.
90
+ const tmpPath = `${path}.${process.pid}.${Date.now()}.${randomBytes(4).toString('hex')}.tmp`;
81
91
  try {
82
92
  writeFileSync(tmpPath, trimmed.map(serialize).join('\n') + '\n', { mode: 0o600 });
83
93
  renameSync(tmpPath, path);