@bastani/atomic 0.8.31-alpha.1 → 0.8.31-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/CHANGELOG.md +17 -5
  2. package/README.md +12 -10
  3. package/dist/builtin/cursor/CHANGELOG.md +1 -1
  4. package/dist/builtin/cursor/package.json +2 -2
  5. package/dist/builtin/intercom/CHANGELOG.md +1 -1
  6. package/dist/builtin/intercom/package.json +2 -2
  7. package/dist/builtin/mcp/CHANGELOG.md +1 -1
  8. package/dist/builtin/mcp/package.json +3 -3
  9. package/dist/builtin/subagents/CHANGELOG.md +10 -1
  10. package/dist/builtin/subagents/agents/codebase-online-researcher.md +8 -8
  11. package/dist/builtin/subagents/agents/debugger.md +6 -6
  12. package/dist/builtin/subagents/package.json +4 -4
  13. package/dist/builtin/subagents/skills/effective-liteparse/SKILL.md +118 -0
  14. package/dist/builtin/subagents/skills/effective-liteparse/scripts/search.py +128 -0
  15. package/dist/builtin/subagents/skills/playwright-cli/SKILL.md +404 -0
  16. package/dist/builtin/subagents/skills/playwright-cli/references/element-attributes.md +23 -0
  17. package/dist/builtin/subagents/skills/playwright-cli/references/playwright-tests.md +39 -0
  18. package/dist/builtin/subagents/skills/playwright-cli/references/request-mocking.md +87 -0
  19. package/dist/builtin/subagents/skills/playwright-cli/references/running-code.md +241 -0
  20. package/dist/builtin/subagents/skills/playwright-cli/references/session-management.md +225 -0
  21. package/dist/builtin/subagents/skills/playwright-cli/references/spec-driven-testing.md +305 -0
  22. package/dist/builtin/subagents/skills/playwright-cli/references/storage-state.md +275 -0
  23. package/dist/builtin/subagents/skills/playwright-cli/references/test-generation.md +134 -0
  24. package/dist/builtin/subagents/skills/playwright-cli/references/tracing.md +139 -0
  25. package/dist/builtin/subagents/skills/playwright-cli/references/video-recording.md +143 -0
  26. package/dist/builtin/web-access/CHANGELOG.md +1 -1
  27. package/dist/builtin/web-access/package.json +2 -2
  28. package/dist/builtin/workflows/CHANGELOG.md +7 -1
  29. package/dist/builtin/workflows/README.md +4 -4
  30. package/dist/builtin/workflows/builtin/open-claude-design.ts +59 -56
  31. package/dist/builtin/workflows/builtin/ralph.ts +56 -3
  32. package/dist/builtin/workflows/builtin/shared-prompts.ts +1 -1
  33. package/dist/builtin/workflows/package.json +2 -2
  34. package/dist/builtin/workflows/skills/research-codebase/SKILL.md +1 -1
  35. package/dist/cli/args.d.ts.map +1 -1
  36. package/dist/cli/args.js +1 -1
  37. package/dist/cli/args.js.map +1 -1
  38. package/dist/core/agent-session.d.ts +1 -0
  39. package/dist/core/agent-session.d.ts.map +1 -1
  40. package/dist/core/agent-session.js +49 -21
  41. package/dist/core/agent-session.js.map +1 -1
  42. package/dist/core/context-window.d.ts +26 -1
  43. package/dist/core/context-window.d.ts.map +1 -1
  44. package/dist/core/context-window.js +30 -6
  45. package/dist/core/context-window.js.map +1 -1
  46. package/dist/core/copilot-model-catalog.d.ts +39 -21
  47. package/dist/core/copilot-model-catalog.d.ts.map +1 -1
  48. package/dist/core/copilot-model-catalog.js +44 -16
  49. package/dist/core/copilot-model-catalog.js.map +1 -1
  50. package/dist/core/model-registry.d.ts.map +1 -1
  51. package/dist/core/model-registry.js +6 -4
  52. package/dist/core/model-registry.js.map +1 -1
  53. package/dist/core/project-trust.d.ts.map +1 -1
  54. package/dist/core/project-trust.js +2 -1
  55. package/dist/core/project-trust.js.map +1 -1
  56. package/dist/core/sdk.d.ts.map +1 -1
  57. package/dist/core/sdk.js +18 -7
  58. package/dist/core/sdk.js.map +1 -1
  59. package/dist/core/settings-manager.d.ts +11 -2
  60. package/dist/core/settings-manager.d.ts.map +1 -1
  61. package/dist/core/settings-manager.js +62 -8
  62. package/dist/core/settings-manager.js.map +1 -1
  63. package/dist/core/system-prompt.d.ts.map +1 -1
  64. package/dist/core/system-prompt.js +1 -0
  65. package/dist/core/system-prompt.js.map +1 -1
  66. package/dist/core/tools/edit-diff.d.ts +1 -2
  67. package/dist/core/tools/edit-diff.d.ts.map +1 -1
  68. package/dist/core/tools/edit-diff.js +1 -2
  69. package/dist/core/tools/edit-diff.js.map +1 -1
  70. package/dist/index.d.ts +2 -1
  71. package/dist/index.d.ts.map +1 -1
  72. package/dist/index.js +2 -1
  73. package/dist/index.js.map +1 -1
  74. package/dist/modes/interactive/components/config-selector.d.ts.map +1 -1
  75. package/dist/modes/interactive/components/config-selector.js +5 -7
  76. package/dist/modes/interactive/components/config-selector.js.map +1 -1
  77. package/dist/modes/interactive/components/model-selector.d.ts.map +1 -1
  78. package/dist/modes/interactive/components/model-selector.js +2 -1
  79. package/dist/modes/interactive/components/model-selector.js.map +1 -1
  80. package/dist/modes/interactive/components/scoped-models-selector.d.ts.map +1 -1
  81. package/dist/modes/interactive/components/scoped-models-selector.js +4 -1
  82. package/dist/modes/interactive/components/scoped-models-selector.js.map +1 -1
  83. package/dist/modes/interactive/components/settings-selector.d.ts +2 -0
  84. package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
  85. package/dist/modes/interactive/components/settings-selector.js +165 -15
  86. package/dist/modes/interactive/components/settings-selector.js.map +1 -1
  87. package/dist/modes/interactive/components/tree-selector.d.ts.map +1 -1
  88. package/dist/modes/interactive/components/tree-selector.js +44 -4
  89. package/dist/modes/interactive/components/tree-selector.js.map +1 -1
  90. package/dist/modes/interactive/interactive-mode.d.ts +1 -1
  91. package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
  92. package/dist/modes/interactive/interactive-mode.js +24 -54
  93. package/dist/modes/interactive/interactive-mode.js.map +1 -1
  94. package/dist/modes/interactive/model-search.d.ts +7 -0
  95. package/dist/modes/interactive/model-search.d.ts.map +1 -0
  96. package/dist/modes/interactive/model-search.js +6 -0
  97. package/dist/modes/interactive/model-search.js.map +1 -0
  98. package/dist/modes/interactive/theme/theme-controller.d.ts +30 -0
  99. package/dist/modes/interactive/theme/theme-controller.d.ts.map +1 -0
  100. package/dist/modes/interactive/theme/theme-controller.js +108 -0
  101. package/dist/modes/interactive/theme/theme-controller.js.map +1 -0
  102. package/dist/modes/interactive/theme/theme-schema.json +2 -1
  103. package/dist/modes/interactive/theme/theme.d.ts +5 -0
  104. package/dist/modes/interactive/theme/theme.d.ts.map +1 -1
  105. package/dist/modes/interactive/theme/theme.js +70 -29
  106. package/dist/modes/interactive/theme/theme.js.map +1 -1
  107. package/dist/modes/rpc/rpc-client.d.ts +1 -1
  108. package/dist/modes/rpc/rpc-client.d.ts.map +1 -1
  109. package/dist/modes/rpc/rpc-client.js +1 -1
  110. package/dist/modes/rpc/rpc-client.js.map +1 -1
  111. package/dist/modes/rpc/rpc-mode.d.ts.map +1 -1
  112. package/dist/modes/rpc/rpc-mode.js +1 -1
  113. package/dist/modes/rpc/rpc-mode.js.map +1 -1
  114. package/dist/package-manager-cli.d.ts.map +1 -1
  115. package/dist/package-manager-cli.js +39 -9
  116. package/dist/package-manager-cli.js.map +1 -1
  117. package/docs/extensions.md +21 -0
  118. package/docs/models.md +3 -3
  119. package/docs/packages.md +13 -9
  120. package/docs/providers.md +3 -3
  121. package/docs/quickstart.md +14 -0
  122. package/docs/rpc.md +3 -3
  123. package/docs/sdk.md +15 -11
  124. package/docs/session-format.md +1 -1
  125. package/docs/settings.md +8 -3
  126. package/docs/themes.md +3 -1
  127. package/docs/tui.md +1 -1
  128. package/docs/usage.md +12 -9
  129. package/docs/workflows.md +9 -7
  130. package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
  131. package/examples/extensions/custom-provider-anthropic/package.json +1 -1
  132. package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
  133. package/examples/extensions/gondolin/package-lock.json +2 -2
  134. package/examples/extensions/gondolin/package.json +1 -1
  135. package/examples/extensions/preset.ts +10 -4
  136. package/examples/extensions/provider-payload.ts +5 -5
  137. package/examples/extensions/sandbox/index.ts +2 -2
  138. package/examples/extensions/sandbox/package-lock.json +3 -3
  139. package/examples/extensions/sandbox/package.json +2 -2
  140. package/examples/extensions/subagent/agents.ts +2 -2
  141. package/examples/extensions/subagent/index.ts +4 -2
  142. package/examples/extensions/with-deps/package-lock.json +2 -2
  143. package/examples/extensions/with-deps/package.json +1 -1
  144. package/package.json +5 -5
  145. package/dist/builtin/subagents/skills/browser/EXAMPLES.md +0 -151
  146. package/dist/builtin/subagents/skills/browser/LICENSE.txt +0 -21
  147. package/dist/builtin/subagents/skills/browser/REFERENCE.md +0 -451
  148. package/dist/builtin/subagents/skills/browser/SKILL.md +0 -170
package/CHANGELOG.md CHANGED
@@ -4,20 +4,32 @@
4
4
 
5
5
  ### Added
6
6
 
7
- - Added configurable context-window support for models that declare `contextWindowOptions`, including explicit `--context-window` CLI/settings control, a GitHub Copilot CLI-style `/model`-flow picker (numbered `Default`/`Long context` tiers with token counts), session replay, SDK/runtime/RPC APIs, and docs while preserving each model's scalar default context window. For GitHub Copilot, context windows are measured in **input (prompt) tokens** (consistent with every other provider) and derived **dynamically from GitHub's live CAPI model catalog** (`GET /models`) instead of a hardcoded model list: Atomic resolves each model's input budget as `max_prompt_tokens || max_context_window_tokens || 128_000` and, for tiered models, exposes the per-tier input budgets (`token_prices.<tier>.context_max`) as a selectable default/long window — gated on the user actually having the GitHub Copilot provider and cached on disk for 30 minutes (for example `github-copilot/gpt-5.5` resolves to `272k` default / `922k` long, and the Claude/Gemini long-context models to `200k` default / `936k` long). Atomic raises the local budget and sends `X-GitHub-Api-Version: 2026-06-01`, while GitHub applies the long-context billing tier server-side by prompt token count. Long-context Copilot requests consume more AI credits and require Copilot long-context/usage-based billing entitlement; offline, unauthenticated, or non-Copilot sessions leave the built-in window untouched and show no picker; custom providers and explicit model overrides can still expose their own selectable windows ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
7
+ - Added configurable context-window support for models that declare `contextWindowOptions`, including explicit `--context-window` CLI/settings control, a GitHub Copilot CLI-style `/model`-flow picker (numbered `Default`/`Long context` tiers with token counts), session replay, SDK/runtime/RPC APIs, and docs while preserving each model's scalar default context window. For GitHub Copilot, context windows are measured in **input (prompt) tokens** (consistent with every other provider) and derived **dynamically from GitHub's live CAPI model catalog** (`GET /models`) instead of a hardcoded model list: Atomic resolves each model's input budget as `max_prompt_tokens || max_context_window_tokens || 128_000` and, for tiered models, exposes a selectable default window (`token_prices.default.context_max`) plus a long window set to the model's full `max_context_window_tokens` (retaining `max_prompt_tokens` as the internal effective compaction/overflow budget) — gated on the user actually having the GitHub Copilot provider and cached on disk for 30 minutes (for example `github-copilot/gpt-5.5` exposes `272k` default / `1.05m` long, and the Claude/Gemini long-context models `200k` default / `1m` long). Atomic raises the local budget and sends `X-GitHub-Api-Version: 2026-06-01`, while GitHub applies the long-context billing tier server-side by prompt token count. Long-context Copilot requests consume more AI credits and require Copilot long-context/usage-based billing entitlement; offline, unauthenticated, or non-Copilot sessions leave the built-in window untouched and show no picker; custom providers and explicit model overrides can still expose their own selectable windows ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
8
8
  - Exported context-window helper functions and types from the package root, including parser/formatter/normalizer/selection utilities and the `Model<Api>` augmentation for `contextWindowOptions`/`defaultContextWindow`, so SDK consumers can use the public API without importing internal source paths ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
9
- - Added RPC mode runtime context-window commands so headless clients can read supported token budgets with `get_available_context_windows` and select the active runtime budget with `set_context_window` without persisting `defaultContextWindow` settings ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
9
+ - Added RPC mode runtime context-window commands so headless clients can read supported token budgets with `get_available_context_windows` and select the active runtime budget with `set_context_window` without persisting context-window settings ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
10
+ - Added upstream pi v0.79.7 automatic theme mode support so `/settings` can choose separate light and dark themes and follow terminal color-scheme changes.
11
+ - Exported the upstream `CONFIG_DIR_NAME` constant and edit diff helpers (`generateDiffString`, `generateUnifiedPatch`, and `EditDiffResult`) from the public SDK entrypoint so extensions can avoid hardcoded project config paths and reuse edit-style diff rendering.
10
12
 
11
13
  ### Changed
12
14
 
13
- - Changed built-in GitHub Copilot context windows to be measured in **input (prompt) tokens** (matching every other provider) and derived from GitHub's live CAPI model catalog (`GET /models`, cached 30 minutes, gated on the Copilot provider) instead of a hardcoded long-context model list, so newly added/removed Copilot models and retiered windows are reflected automatically without shipping a stale snapshot. Each model's window now resolves to `max_prompt_tokens || max_context_window_tokens || 128_000`, and tiered models expose their per-tier input budgets (`token_prices.<tier>.context_max`) as the selectable default/long windows (e.g. `gpt-5.5` 272k/922k, Claude/Gemini 200k/936k) replacing the previous input+output totals while preserving custom provider entries and explicit `models.json` overrides and relying on GitHub's API-version header and server-side tier selection rather than payload fields or model-id variants ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
15
+ - Changed the GitHub Copilot **long-context tier to advertise the model's full context window** (`max_context_window_tokens`, for example `github-copilot/gpt-5.5` `1.05m`, and `github-copilot/claude-opus-4.8`/`github-copilot/gemini-3.1-pro-preview` `1m`) instead of GitHub's prompt-token cap, so Copilot models report and display the same window as the native `openai/*` and `anthropic/*` providers (the chat footer denominator now shows the full window). GitHub's lower server-side input cap (`max_prompt_tokens`, e.g. `922k`/`936k`, which equals `max_context_window_tokens max_output_tokens`) is now parsed and carried as an internal effective input budget (`Model.maxInputTokens`, exposed via the new `getEffectiveInputBudget()` helper): auto-compaction thresholds and the Copilot overflow-recovery guard run against that budget while the picker/footer show the full window. As a result, a prompt that reaches the real prompt cap is now compacted-and-retried automatically (previously the long window equalled the cap), and the friendly “enable long-context/usage-based billing / server-cap” hint fires only when GitHub rejects a prompt *below* the cap (a genuine entitlement/tier drop) rather than at the cap. Sparse catalog payloads without `max_context_window_tokens` still fall back to the long-context prompt threshold, and the on-disk Copilot catalog cache schema version was bumped so existing caches refetch the new windows ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
16
+ - Changed built-in GitHub Copilot context windows to be measured in **input (prompt) tokens** (matching every other provider) and derived from GitHub's live CAPI model catalog (`GET /models`, cached 30 minutes, gated on the Copilot provider) instead of a hardcoded long-context model list, so newly added/removed Copilot models and retiered windows are reflected automatically without shipping a stale snapshot. Each model's window now resolves to `max_prompt_tokens || max_context_window_tokens || 128_000`, and tiered models expose a selectable default window (`token_prices.default.context_max`) plus a long window set to the model's full `max_context_window_tokens` (e.g. `gpt-5.5` 272k/1.05m, Claude/Gemini 200k/1m), with `max_prompt_tokens` retained as the internal effective compaction/overflow budget — while preserving custom provider entries and explicit `models.json` overrides and relying on GitHub's API-version header and server-side tier selection rather than payload fields or model-id variants ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
14
17
  - Bumped the bundled upstream pi runtime libraries `@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, and `@earendil-works/pi-tui` from `^0.79.4` to `^0.79.6` so Atomic's installed pi runtime packages pick up upstream v0.79.5/v0.79.6 provider, model, thinking-payload, and shared TUI compatibility fixes; no Atomic coding-agent source changes were made for upstream coding-agent-only marked export or fetch-override behavior in this dependency sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
18
+ - Synced Atomic's coding-agent fork with upstream pi v0.79.7, including the new self-only default for bare `atomic update` (`atomic update --all` restores the previous all-packages behavior), automatic light/dark theme settings, configured project config directory labels, extension example updates, model-search parity, tree navigator horizontal panning, and the latest user-facing docs.
19
+ - Bumped the bundled upstream pi runtime libraries `@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, and `@earendil-works/pi-tui` from `^0.79.6` to `^0.79.7` so Atomic inherits upstream v0.79.7 TUI color-scheme, Warp image, generated model catalog, and agent-core fixes.
20
+ - Reserved `/` in theme names for automatic light/dark theme settings.
21
+ - Replaced the bundled `browser` skill / `browse` CLI with the `playwright-cli` skill and `playwright-cli` command across `@bastani/atomic`, and bundled the new `effective-liteparse` document-extraction skill. The builtin `ralph`, `goal`, and `open-claude-design` workflows and the `debugger`/`codebase-online-researcher` subagents now drive browsers via `playwright-cli`; `open-claude-design`'s deterministic setup step ensures `playwright-cli` (`npm install -g @playwright/cli@latest`) and renames its `browse_cli_status` output to `playwright_cli_status`; and `ralph` now records a `playwright-cli` QA end-to-end proof video (`qa_video_path`) for UI-applicable/full-stack changes, references it in the implementation notes, and attaches or links it to the final pull request when `create_pr=true`. Updated the user-facing docs (workflows, SDK bash-policy examples, quickstart skills, README) to match.
22
+
15
23
 
16
24
  ### Fixed
17
25
 
26
+ - Fixed RPC unknown-command errors to include the request id so RPC clients do not hang waiting for a response.
27
+ - Fixed `/model` autocomplete and model-selection searches to match provider/model queries regardless of whether the provider or model token is typed first.
28
+ - Fixed the tree navigator to horizontally pan deep entries so the selected item remains readable.
29
+ - Fixed long-context selection for GitHub Copilot's rounded 1M model names: requesting `1m` now selects the advertised full context window when the catalog exposes it, and otherwise resolves to the largest advertised long-context window at or below the request (for example `936k` for sparse catalog payloads) instead of falling back to the short `200k` tier. Interactive/context-picker persistence now writes the effective selected budget to per-model `defaultContextWindows["provider/modelId"]` settings instead of the global `defaultContextWindow` fallback, so Copilot-specific prompt caps such as `936k`/`922k` do not leak into Anthropic, Cursor, or other providers on restart. Legacy/stale global `defaultContextWindow` values from earlier builds are now treated as optional fallbacks and ignored without warning when unsupported by the active model.
18
30
  - Fixed a GitHub Copilot context-window warning on restart: after selecting a long-context window (e.g. `claude-opus-4.8` → `936k`) and reopening Atomic, startup validated the persisted selection before the (async, auth-gated) Copilot catalog loaded, so the model still looked limited to its default window and Atomic warned “Context window 936k is not supported… Supported values: 200k” and reset the choice. The model registry now seeds the Copilot context-window catalog synchronously from its on-disk cache at construction (ignoring the refresh TTL, gated on a `github-copilot` credential), so a returning user's selection is recognized immediately while the live refresh still runs in the background ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
19
- - Fixed context-window startup, session-switch, settings, and RPC edge cases: unknown provider fallback models no longer inherit selectable context-window options from provider defaults, fatal startup diagnostics no longer persist `defaultContextWindow`, `AgentSession.setModel()` preserves an incoming target model's explicit selected context window, model-switch paths that change effective context windows now notify listeners via `context_window_changed`, the interactive context-window picker keys selection on raw token counts so colliding formatted labels never change which window is selected, RPC `set_model` returns the effective post-switch session model, and explicit startup `contextWindow` selections are journaled even when they equal the model scalar default ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
20
- - Fixed `AgentSession.setContextWindow()` so bare SDK/runtime calls update the active session, append `context_window_change`, and emit `context_window_changed` without persisting `defaultContextWindow`; callers must pass `{ persistDefault: true }` to update settings ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
31
+ - Fixed context-window startup, session-switch, settings, and RPC edge cases: unknown provider fallback models no longer inherit selectable context-window options from provider defaults, fatal startup diagnostics no longer persist context-window settings, `AgentSession.setModel()` preserves an incoming target model's explicit selected context window, model-switch paths that change effective context windows now notify listeners via `context_window_changed`, the interactive context-window picker keys selection on raw token counts so colliding formatted labels never change which window is selected, RPC `set_model` returns the effective post-switch session model, and explicit startup `contextWindow` selections are journaled even when they equal the model scalar default ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
32
+ - Fixed `AgentSession.setContextWindow()` so bare SDK/runtime calls update the active session, append `context_window_change`, and emit `context_window_changed` without persisting settings; callers must pass `{ persistDefault: true }` to update the active model's `defaultContextWindows["provider/modelId"]` setting ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
21
33
  - Fixed `packages/coding-agent` source-CLI subprocess tests (`session-id-readonly`, `startup-session-name`, `stdout-cleanliness`) crashing with `ERR_MODULE_NOT_FOUND` (for example `src/core/tools/oversized-tool-result.js`) when the Vitest worker pool runs under Node. They now launch the TypeScript source CLI with Bun explicitly via a `bunExecutable()` helper (matching `context-window-cli`/`rpc-context-window`) instead of assuming `process.execPath` is Bun, so the package test suite is portable across environments. The repo-wide `.js`->`.ts` source-import convention and shipped `dist/` are unchanged ([#1419](https://github.com/bastani-inc/atomic/issues/1419)).
22
34
 
23
35
  ## [0.8.30] - 2026-06-17
package/README.md CHANGED
@@ -385,15 +385,16 @@ atomic install ssh://git@github.com/user/repo@v1 # tag or commit
385
385
  atomic remove npm:@foo/atomic-tools
386
386
  atomic uninstall npm:@foo/atomic-tools # alias for remove
387
387
  atomic list
388
- atomic update # update Atomic and packages (skips pinned packages)
388
+ atomic update # update Atomic only
389
+ atomic update --all # update Atomic and packages
389
390
  atomic update --extensions # update packages only
390
391
  atomic update --self # update Atomic only
391
392
  atomic update --self --force # reinstall Atomic even if current
392
- atomic update npm:@foo/atomic-tools # update one package
393
+ atomic update npm:@foo/atomic-tools # update one package
393
394
  atomic config # enable/disable extensions, skills, prompts, themes
394
395
  ```
395
396
 
396
- Packages install to `~/.atomic/agent/git/` (git) or global npm. Use `-l` for project-local installs (`.atomic/git/`, `.atomic/npm/`; legacy `.pi/git/` and `.pi/npm/` are compatibility fallbacks). Git packages install dependencies with `npm install --omit=dev` by default, so runtime deps must be listed under `dependencies`; when `npmCommand` is configured, git packages use plain `install` for compatibility with wrappers. If you use a Node version manager and want package installs to reuse a stable npm context, set `npmCommand` in `settings.json`, for example `["mise", "exec", "node@20", "--", "npm"]`.
397
+ Packages install to `~/.atomic/agent/git/` (git) or global npm. Use `-l` for project-local installs (`.atomic/git/`, `.atomic/npm/`; legacy `.pi/git/` and `.pi/npm/` are compatibility fallbacks). Git `@ref` values are pinned tags or commits; pinned packages are skipped by `atomic update --extensions` and `atomic update --all`, so use `atomic install git:host/user/repo@new-ref` to move an existing package to a new ref. Git packages install dependencies with `npm install --omit=dev` by default, so runtime deps must be listed under `dependencies`; when `npmCommand` is configured, git packages use plain `install` for compatibility with wrappers. If you use a Node version manager and want package installs to reuse a stable npm context, set `npmCommand` in `settings.json`, for example `["mise", "exec", "node@20", "--", "npm"]`.
397
398
 
398
399
  Create a package by adding an app-name manifest key to `package.json` (`atomic` for this package). The legacy `pi` key is still accepted as a backwards-compatible shim:
399
400
 
@@ -480,16 +481,17 @@ atomic [options] [@files...] [messages...]
480
481
  ### Package Commands
481
482
 
482
483
  ```bash
483
- atomic install <source> [-l] # Install package, -l for project-local
484
- atomic remove <source> [-l] # Remove package
485
- atomic uninstall <source> [-l] # Alias for remove
486
- atomic update [source|self|atomic] # Update Atomic and packages (skips pinned packages)
484
+ atomic install <source> [-l] # Install package, -l for project-local
485
+ atomic remove <source> [-l] # Remove package
486
+ atomic uninstall <source> [-l] # Alias for remove
487
+ atomic update [source|self|atomic] # Update Atomic only, or one package source
488
+ atomic update --all # Update Atomic and packages
487
489
  atomic update --extensions # Update packages only
488
490
  atomic update --self # Update Atomic only
489
491
  atomic update --self --force # Reinstall Atomic even if current
490
- atomic update --extension <src> # Update one package
491
- atomic list # List installed packages
492
- atomic config # Enable/disable package resources
492
+ atomic update --extension <src> # Update one package
493
+ atomic list # List installed packages
494
+ atomic config # Enable/disable package resources
493
495
  ```
494
496
 
495
497
  ### Modes
@@ -4,7 +4,7 @@
4
4
 
5
5
  ### Changed
6
6
 
7
- - Published a synchronized Atomic 0.8.31-alpha.1 prerelease; no functional Cursor provider changes were made after 0.8.30.
7
+ - Published a synchronized Atomic 0.8.31-alpha.3 prerelease; no functional Cursor provider changes were made after 0.8.30.
8
8
 
9
9
  ## [0.8.30] - 2026-06-17
10
10
 
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bastani/cursor",
3
- "version": "0.8.31-alpha.1",
3
+ "version": "0.8.31-alpha.3",
4
4
  "private": true,
5
5
  "description": "Experimental first-party Atomic extension for Cursor OAuth, model discovery, and streaming provider registration.",
6
6
  "contributors": [
@@ -40,7 +40,7 @@
40
40
  }
41
41
  },
42
42
  "dependencies": {
43
- "@bastani/atomic-natives": "0.8.31-alpha.1",
43
+ "@bastani/atomic-natives": "0.8.31-alpha.3",
44
44
  "@bufbuild/protobuf": "^2.0.0"
45
45
  }
46
46
  }
@@ -6,7 +6,7 @@ All notable changes to the `pi-intercom` extension will be documented in this fi
6
6
 
7
7
  ### Changed
8
8
 
9
- - Aligned the intercom extension peer dependency with upstream pi TUI `^0.79.6` so coordination UI surfaces consume the latest shared TUI compatibility fixes; no intercom extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
9
+ - Aligned the intercom extension peer dependency with upstream pi TUI `^0.79.7` so coordination UI surfaces consume the latest shared TUI color-scheme, Warp image capability, and compatibility fixes; no intercom extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
10
10
 
11
11
  ## [0.8.30] - 2026-06-17
12
12
 
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bastani/intercom",
3
- "version": "0.8.31-alpha.1",
3
+ "version": "0.8.31-alpha.3",
4
4
  "private": true,
5
5
  "description": "Atomic extension providing a private coordination channel between parent and child agent sessions. Fork of: https://github.com/nicobailon/pi-intercom",
6
6
  "contributors": [
@@ -39,7 +39,7 @@
39
39
  },
40
40
  "peerDependencies": {
41
41
  "@bastani/atomic": "*",
42
- "@earendil-works/pi-tui": "^0.79.6"
42
+ "@earendil-works/pi-tui": "^0.79.7"
43
43
  },
44
44
  "peerDependenciesMeta": {
45
45
  "@bastani/atomic": {
@@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
9
9
 
10
10
  ### Changed
11
11
 
12
- - Aligned the MCP extension peer dependencies with upstream pi AI/TUI `^0.79.6` so MCP-backed sessions can use the host's latest provider, model, thinking-payload, and shared TUI compatibility fixes; no MCP extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
12
+ - Aligned the MCP extension peer dependencies with upstream pi AI/TUI `^0.79.7` so MCP-backed sessions can use the host's latest provider catalog, model-search, theme/color-scheme, Warp image capability, and shared TUI compatibility fixes; no MCP extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
13
13
 
14
14
  ## [0.8.30] - 2026-06-17
15
15
 
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bastani/mcp",
3
- "version": "0.8.31-alpha.1",
3
+ "version": "0.8.31-alpha.3",
4
4
  "private": true,
5
5
  "description": "Atomic extension that adapts MCP (Model Context Protocol) servers into the coding agent. Fork of: https://github.com/nicobailon/pi-mcp-adapter",
6
6
  "contributors": [
@@ -32,8 +32,8 @@
32
32
  },
33
33
  "peerDependencies": {
34
34
  "@bastani/atomic": "*",
35
- "@earendil-works/pi-ai": "^0.79.6",
36
- "@earendil-works/pi-tui": "^0.79.6",
35
+ "@earendil-works/pi-ai": "^0.79.7",
36
+ "@earendil-works/pi-tui": "^0.79.7",
37
37
  "zod": "^3.25.0 || ^4.0.0"
38
38
  },
39
39
  "peerDependenciesMeta": {
@@ -2,9 +2,18 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ### Added
6
+
7
+ - Added the `playwright-cli` builtin skill (browser automation, end-to-end UI checks, screenshots, reviewable video recording, and Playwright test workflows) and the `effective-liteparse` builtin skill (fast, local, model-free text/table/value extraction from PDF, DOCX, PPTX, XLSX, and image files via the `lit` CLI).
8
+
5
9
  ### Changed
6
10
 
7
- - Aligned the subagents extension peer dependencies with upstream pi `^0.79.6` runtime packages (`@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, and `@earendil-works/pi-tui`) so child sessions can use the host's latest provider, model, thinking-payload, and shared TUI compatibility fixes; no subagents extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
11
+ - Changed the `debugger` and `codebase-online-researcher` subagents to load the `playwright-cli` skill and drive the `playwright-cli` command for JS-heavy, auth-gated, or interactive web work instead of the removed `browser` skill / `browse` CLI.
12
+ - Aligned the subagents extension peer dependencies with upstream pi `^0.79.7` runtime packages (`@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, and `@earendil-works/pi-tui`) so child sessions can use the host's latest provider catalog, RPC id handling, model-search, theme/color-scheme, Warp image capability, and shared TUI compatibility fixes; no subagents extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
13
+
14
+ ### Removed
15
+
16
+ - Removed the bundled `browser` skill and all references to its `browse` CLI in favor of the `playwright-cli` skill and `playwright-cli` command.
8
17
 
9
18
  ## [0.8.30] - 2026-06-17
10
19
 
@@ -4,7 +4,7 @@ description: Online research for up-to-date documentation and library-source kno
4
4
  tools: read, grep, find, ls, bash, web_search, fetch_content, get_search_content
5
5
  model: openai/gpt-5.5:low
6
6
  fallbackModels: openai-codex/gpt-5.5:low, github-copilot/gpt-5.5:low, anthropic/claude-opus-4-8:low, github-copilot/claude-opus-4.7:low
7
- skills: browser
7
+ skills: playwright-cli
8
8
  ---
9
9
 
10
10
  You are an expert research specialist focused on finding accurate, relevant information from authoritative sources — including open-source library internals with GitHub permalinks. You have three web tools available:
@@ -13,11 +13,11 @@ You are an expert research specialist focused on finding accurate, relevant info
13
13
  - `fetch_content` — fetch a specific URL and return clean reader-mode text/markdown (HTML pages, GitHub issues/PRs, Stack Overflow, npm, arXiv, Reddit, Wikipedia, JSON endpoints, PDFs, RSS/Atom, YouTube). `fetch_content` on a GitHub repo URL also clones the repo locally under `/tmp/atomic-github-repos/<owner>/<repo>` and returns the file tree. Prefer this over a raw HTTP fetch.
14
14
  - `get_search_content` — fetch the underlying content for the most promising results of a previous `web_search` in one call.
15
15
 
16
- For JS-heavy or auth-gated pages, load the `browser` skill and invoke its `browse` CLI through `bash`.
16
+ For JS-heavy or auth-gated pages, load the `playwright-cli` skill and drive its `playwright-cli` command through `bash`.
17
17
 
18
18
  <EXTREMELY_IMPORTANT>
19
19
  - PREFER `fetch_content` for static pages; it's faster and cheaper than spinning up a real browser.
20
- - Reach for the `browser` skill's `browse` CLI via `bash` ONLY when a real DOM/JS is required.
20
+ - Reach for the `playwright-cli` skill's `playwright-cli` command via `bash` ONLY when a real DOM/JS is required.
21
21
  - ALWAYS check `research/web/` for a recent cached copy before fetching anything new.
22
22
  - EVERY code-related claim about an open-source library needs a GitHub **permalink with a full commit SHA** — branch links break when code changes.
23
23
  </EXTREMELY_IMPORTANT>
@@ -39,7 +39,7 @@ When fetching any external page, apply these techniques in order. They produce p
39
39
  1. **`fetch_content <url>` first.** Returns clean reader-mode text/markdown for nearly every well-formed page (and handles PDFs and JSON). Try it before anything else.
40
40
  2. **Check `/llms.txt`.** Many modern docs sites publish an AI-friendly index at `/llms.txt` (spec: [llmstxt.org](https://llmstxt.org/llms.txt)). `fetch_content https://<site>/llms.txt` often links directly to the most relevant pages in plain text, saving a round-trip through the full site.
41
41
  3. **Request Markdown via `Accept: text/markdown`.** Sites behind Cloudflare with [Markdown for Agents](https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/) return pre-converted Markdown when you set the header. Use `bash` with `curl <url> -H "Accept: text/markdown"` (look for `content-type: text/markdown` and the `x-markdown-tokens` header).
42
- 4. **Fall back to a real browser.** Load the `browser` skill and drive its `browse` CLI through `bash` to render and interact with JS-heavy or auth-gated pages.
42
+ 4. **Fall back to a real browser.** Load the `playwright-cli` skill and drive its `playwright-cli` command through `bash` to render and interact with JS-heavy or auth-gated pages.
43
43
 
44
44
  ## Library Source Research with Permalinks
45
45
 
@@ -151,12 +151,12 @@ When you receive a research query:
151
151
  2. **Check the local cache first**. Look in `research/web/` for existing documents on the topic. If a recent (still-relevant) copy exists, cite it before re-fetching.
152
152
  3. **Execute strategic searches**.
153
153
  - Identify the authoritative source (e.g. the library's official docs site, its GitHub repo, its release notes).
154
- - Apply the Web Fetch Strategy: `fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` → `browser` fallback.
154
+ - Apply the Web Fetch Strategy: `fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` → `playwright-cli` fallback.
155
155
  - Use multiple query variations to capture different perspectives via `web_search`.
156
156
  - Use `get_search_content` to bulk-fetch the underlying content of the top results of a `web_search` in one shot.
157
157
  - For source repositories, prefer raw GitHub URLs (`https://raw.githubusercontent.com/<owner>/<repo>/<ref>/<path>`) over the HTML UI. For library internals, clone via `fetch_content` and use `grep`/`read` + permalinks.
158
158
  4. **Fetch and analyze content**.
159
- - Use `fetch_content <url>` (or the browser skill's `browse` CLI via `bash` when interactivity is required) to pull the full content of promising sources.
159
+ - Use `fetch_content <url>` (or the playwright-cli skill's `playwright-cli` command via `bash` when interactivity is required) to pull the full content of promising sources.
160
160
  - Prioritize official documentation, reputable technical blogs, and authoritative sources.
161
161
  - Extract specific quotes and sections relevant to the query.
162
162
  - Note publication dates to ensure currency of information.
@@ -275,7 +275,7 @@ For library-source answers, every code claim should look like the citation examp
275
275
  ## Search Efficiency
276
276
 
277
277
  - Check `research/web/` for an existing copy before fetching anything new.
278
- - Start by fetching the authoritative source (`fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` → `browser`) rather than search-engine-style exploration.
278
+ - Start by fetching the authoritative source (`fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` → `playwright-cli`) rather than search-engine-style exploration.
279
279
  - Use `fetch_content` (or `get_search_content` after a `web_search`) to pull full content from the most promising 3-5 web pages.
280
280
  - Reuse already-cloned repos under `/tmp/atomic-github-repos/` instead of re-cloning.
281
281
  - If initial results are insufficient, refine search terms and try again.
@@ -298,4 +298,4 @@ For library-source answers, every code claim should look like the citation examp
298
298
  | Page returns 403 / bot block | Gemini fallback triggers automatically; no action needed if Gemini is configured. |
299
299
  | `web_search` fails | Check provider config; try explicit `provider: "gemini"` if a Perplexity key is missing. |
300
300
 
301
- Remember: you are the user's expert guide to technical research. Lean on `fetch_content` first with the `/llms.txt` → `Accept: text/markdown` → `browser` fallback chain to efficiently pull authoritative content, clone open-source repos when implementation evidence is needed, store anything reusable under `research/web/`, and deliver comprehensive, up-to-date answers with exact citations and GitHub permalinks. Answer directly — skip preamble like "I'll help you with…" and go straight to findings.
301
+ Remember: you are the user's expert guide to technical research. Lean on `fetch_content` first with the `/llms.txt` → `Accept: text/markdown` → `playwright-cli` fallback chain to efficiently pull authoritative content, clone open-source repos when implementation evidence is needed, store anything reusable under `research/web/`, and deliver comprehensive, up-to-date answers with exact citations and GitHub permalinks. Answer directly — skip preamble like "I'll help you with…" and go straight to findings.
@@ -4,7 +4,7 @@ description: Debug errors, test failures, and unexpected behavior. Use PROACTIVE
4
4
  tools: read, grep, find, ls, bash, web_search, fetch_content, get_search_content
5
5
  model: openai/gpt-5.5:xhigh
6
6
  fallbackModels: openai-codex/gpt-5.5:xhigh, github-copilot/gpt-5.5:xhigh, anthropic/claude-opus-4-8:xhigh, github-copilot/claude-opus-4.7:xhigh
7
- skills: tdd, browser, tmux
7
+ skills: tdd, playwright-cli, tmux
8
8
  ---
9
9
 
10
10
  You are tasked with debugging and identifying errors, test failures, and unexpected behavior in the codebase. Your goal is to identify root causes and generate a report detailing the issues and proposed fixes, so another agent can implement the solutions you suggest.
@@ -13,13 +13,13 @@ You are tasked with debugging and identifying errors, test failures, and unexpec
13
13
 
14
14
  - `tdd` — load the TDD skill before creating or modifying any tests.
15
15
  - `tmux` load the tmux skill for debugging terminal environment or TUI apps.
16
- - `browser` — load the browser skill for debugging web apps. Assume the `browse` CLI is installed; if it fails, follow the skill setup (`which browse || npm install -g browse`) or use `npx browse`.
16
+ - `playwright-cli` — load the playwright-cli skill for debugging web apps. If the `playwright-cli` command is missing, install it per the skill (`npx --no-install playwright-cli --version` || `npm install -g @playwright/cli@latest`); install a browser with `npx playwright install chromium` if one is missing.
17
17
  - `fetch_content <url>` — the `pi-web-access` fetch tool returns reader-mode text/markdown for URLs (HTML, JSON, PDFs, GitHub issues/PRs, npm, arXiv, RSS, Reddit, Stack Overflow, etc.). Prefer it over a real browser when you only need page content.
18
18
  - `web_search` / `get_search_content` — issue web queries and bulk-fetch the top results for triage.
19
- - `browse` (via `bash` after loading the `browser` skill) — full Chromium when you need JS execution, auth, or interactive actions. Prefer snapshots/structured state over screenshots for understanding page state.
19
+ - `playwright-cli` (via `bash` after loading the playwright-cli skill) — full Chromium when you need JS execution, auth, or interactive actions. Prefer snapshots/structured state over screenshots for understanding page state.
20
20
 
21
21
  <EXTREMELY_IMPORTANT>
22
- - PREFER `fetch_content <url>` for static content. Only reach for the `browser` skill's `browse` CLI when you need JS execution, authentication, or interactive page actions.
22
+ - PREFER `fetch_content <url>` for static content. Only reach for the `playwright-cli` skill when you need JS execution, authentication, or interactive page actions.
23
23
  - ALWAYS `tdd` BEFORE creating or modifying any tests.
24
24
  - NEVER suppress a failing test to make it pass. Reproduce the failure first; only then fix the underlying defect.
25
25
  </EXTREMELY_IMPORTANT>
@@ -45,7 +45,7 @@ When you need to consult docs, forums, or issue trackers, apply these techniques
45
45
  1. **`fetch_content <url>` first.** The fetch tool returns clean reader-mode text/markdown for HTML, GitHub issues/PRs, Stack Overflow, npm, arXiv, RSS, Wikipedia, Reddit, JSON endpoints, and PDFs — no browser needed.
46
46
  2. **Check `/llms.txt`.** Many modern docs sites publish an AI-friendly index at `/llms.txt` (spec: [llmstxt.org](https://llmstxt.org/llms.txt)). Try `fetch_content https://<site>/llms.txt` before anything else; it often links directly to the most relevant pages in plain text.
47
47
  3. **`Accept: text/markdown` header.** Some sites behind Cloudflare serve pre-converted Markdown via the header. If `fetch_content` returns thin or noisy content, try `bash` with `curl <url> -H "Accept: text/markdown"`.
48
- 4. **Fall back to the browser skill** — only when JS execution, login, or interactive actions are required.
48
+ 4. **Fall back to the playwright-cli skill** — only when JS execution, login, or interactive actions are required.
49
49
 
50
50
  ## Workflow
51
51
 
@@ -77,7 +77,7 @@ Debugging process:
77
77
  - Form and test hypotheses
78
78
  - Add strategic debug logging or drive the project's own debugger (`bun --inspect`, `node --inspect-brk`, `python -m pdb`, etc.) through `bash` instead of `print` spam
79
79
  - Inspect variable state by capturing it through the project's debugger session in `bash` or by writing a short repro script
80
- - Use the web research order above (`fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` → browser) to look up external library docs, error messages, Stack Overflow threads, and GitHub issues
80
+ - Use the web research order above (`fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` → playwright-cli) to look up external library docs, error messages, Stack Overflow threads, and GitHub issues
81
81
 
82
82
  For each issue, provide:
83
83
 
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@bastani/subagents",
3
- "version": "0.8.31-alpha.1",
3
+ "version": "0.8.31-alpha.3",
4
4
  "private": true,
5
5
  "description": "Atomic extension for delegating tasks to subagents with chains, parallel execution, and TUI clarification. Fork of: https://github.com/nicobailon/pi-subagents",
6
6
  "contributors": [
@@ -38,9 +38,9 @@
38
38
  },
39
39
  "peerDependencies": {
40
40
  "@bastani/atomic": "*",
41
- "@earendil-works/pi-agent-core": "^0.79.6",
42
- "@earendil-works/pi-ai": "^0.79.6",
43
- "@earendil-works/pi-tui": "^0.79.6"
41
+ "@earendil-works/pi-agent-core": "^0.79.7",
42
+ "@earendil-works/pi-ai": "^0.79.7",
43
+ "@earendil-works/pi-tui": "^0.79.7"
44
44
  },
45
45
  "peerDependenciesMeta": {
46
46
  "@bastani/atomic": {
@@ -0,0 +1,118 @@
1
+ ---
2
+ name: effective-liteparse
3
+ description: Use this skill whenever a task involves a document file (PDF, DOCX, PPTX, XLSX, or image) and you need to read it or pull text, tables, or specific values out of it — to answer a question about its contents, look up a figure, or extract data. Provides fast, local, model-free extraction via the `lit` CLI with disciplined, low-cost search patterns.
4
+ compatibility: Requires Node 18+ and `@llamaindex/liteparse` (`npm i -g @llamaindex/liteparse`, verify `lit --version`). LibreOffice for Office files; ImageMagick for images. The bundled search.py helper needs `uv`.
5
+ license: MIT
6
+ metadata:
7
+ author: LlamaIndex
8
+ version: "1.0.0"
9
+ ---
10
+
11
+ # Effective LiteParse
12
+
13
+ Extract text from documents locally with the `lit` CLI — a fast, model-free parser. This skill is
14
+ about using it **cheaply**: each `lit parse` re-runs full extraction, and every line you dump into
15
+ the conversation is paid for on every subsequent turn. The patterns below come from analyzing real
16
+ agent traces where the same PDF was parsed up to **9 times** and single image reads cost
17
+ **140k+ characters** of context. Don't repeat those mistakes.
18
+
19
+ ## The golden rule: parse ONCE to a file, then search the file
20
+
21
+ `lit parse` re-extracts the whole document every time you call it. Re-parsing per search is the #1
22
+ waste seen in traces. Parse a document exactly once, to a temp file, then run all your searches
23
+ against that file:
24
+
25
+ ```bash
26
+ # ONE TIME, per document. --no-ocr for born-digital PDFs (almost all reports) — much faster.
27
+ lit parse "/abs/path/doc.pdf" --format text --no-ocr -o /tmp/doc.txt && wc -l /tmp/doc.txt
28
+ ```
29
+
30
+ Then search the file with cheap shell tools — **never** re-run `lit parse` to search again.
31
+
32
+ ## Search discipline — minimize ROUND-TRIPS, then keep results small
33
+
34
+ Every Bash call is a full model round-trip (latency + re-read of context). The biggest waste after
35
+ parsing is a **serial** loop: grep → look → grep again → `sed` to read the window → grep again. In
36
+ traces this doubled the turn count versus just reading the doc. Two rules fix it:
37
+
38
+ **1. Get context in the SAME command — don't grep then `sed`.** Use `grep -C` so the surrounding
39
+ lines come back with the hit. This removes the follow-up `sed` turn for the common case:
40
+
41
+ ```bash
42
+ grep -n -i -C4 "total assets" /tmp/doc.txt | head -40 # location AND its window, one turn
43
+ ```
44
+
45
+ Only fall back to `sed -n 'A,Bp'` when you already know the exact line and need a *wider* window
46
+ than `-C` gave you.
47
+
48
+ **2. Batch independent lookups into ONE command.** When a question needs several distinct facts
49
+ (e.g. emissions *and* revenue), don't spend one turn per term. Probe them together with labels:
50
+
51
+ ```bash
52
+ for q in "carbon intensity" "scope 1" "total revenue"; do \
53
+ echo "=== $q ==="; grep -n -i -C3 "$q" /tmp/doc.txt | head -25; done
54
+ ```
55
+
56
+ Then keep results small:
57
+
58
+ - **Always bound output** with `head` and use `-n` for line numbers.
59
+ - **Don't fan out blindly.** Aim to resolve a question in ≤3 search commands. If two targeted greps
60
+ don't pin it down, switch to `search.py` (below) — don't keep firing keyword variations one per turn.
61
+ - Prefer **Bash `grep`/`sed` on the saved file over the Read and Grep tools** — fewer round-trips and
62
+ you control output size precisely.
63
+
64
+ ## Ranked search when keywords are uncertain (bundled helper)
65
+
66
+ When two targeted greps haven't pinned the answer, **stop greping** — don't iterate keyword variants
67
+ one turn at a time. Run the bundled BM25 ranker ONCE to surface the most relevant line-windows in a
68
+ single command:
69
+
70
+ ```bash
71
+ ./.claude/skills/effective-liteparse/scripts/search.py /tmp/doc.txt -q "materiality assessment priority topics" -k 8 -e 5
72
+ ```
73
+
74
+ `-k` = number of matches, `-e` = lines of context around each (so the window comes back inline — no
75
+ follow-up `sed` turn). It returns ranked windows with line numbers. Use a rich natural-language query
76
+ (several synonyms in one string), not a single keyword. This replaces a long chain of speculative greps.
77
+
78
+ ## Born-digital vs scanned
79
+
80
+ - **Born-digital PDF** (real text layer — nearly all corporate/finance/ESG reports): always pass
81
+ `--no-ocr`. It's much faster and the text is identical. Leaving OCR on wastes time.
82
+ - **Scanned PDF / image**: drop `--no-ocr`. If the value is missing or digits look wrong, read the
83
+ page visually (see below) rather than trusting OCR.
84
+
85
+ ## Reading a page visually — last resort, ONE screenshot, modest DPI
86
+
87
+ Screenshots are the most expensive thing you can put in context: a single high-DPI page PNG ran
88
+ **~140k characters** in one trace, and agents often rendered the same page twice (default + hi-res).
89
+
90
+ Only screenshot when text/tables genuinely can't answer the question (dense multi-column tables,
91
+ figures, charts). Then:
92
+
93
+ - Render **one** page at a time with `--target-pages "N"` (note: it's `--target-pages`, NOT `--pages`).
94
+ - Use **modest DPI (~150–200)**. Do not start at 300+; do not re-render the same page at higher DPI
95
+ unless the text is actually illegible.
96
+
97
+ ```bash
98
+ lit screenshot "/abs/path/doc.pdf" --target-pages "13" --dpi 150 -o /tmp/shots/ # then Read the PNG
99
+ ```
100
+
101
+ ## Many questions about the same document
102
+
103
+ Parsing once to a file already covers this: keep the `/tmp/doc.txt` and reuse it across every
104
+ question instead of re-parsing.
105
+
106
+ ## Don't waste turns on preamble
107
+
108
+ Skip `lit --version`, `ls -la`, and `lit … --help` unless something actually failed. Go straight to
109
+ the parse. Core flags you need:
110
+
111
+ `--format text|json` · `--no-ocr` · `--target-pages "1-5,10"` · `--dpi <n>` (default 150) ·
112
+ `--ocr-language <iso>`. Use `--format json` only when you need bounding boxes/layout — it's much
113
+ larger; still search it, never load it whole.
114
+
115
+ ## Setup
116
+
117
+ PDFs work out of the box. If `lit` is missing: `npm i -g @llamaindex/liteparse`. Office docs need
118
+ LibreOffice; images need ImageMagick (both auto-converted to PDF).
@@ -0,0 +1,128 @@
1
+ #!/usr/bin/env -S uv run --script
2
+ # /// script
3
+ # requires-python = ">=3.13"
4
+ # dependencies = [
5
+ # "bm25s>=0.3.9,<1",
6
+ # "aiofiles>=25.1.0,<26",
7
+ # ]
8
+ # ///
9
+ import argparse
10
+ import asyncio
11
+ from typing import TypedDict, cast
12
+
13
+ import aiofiles
14
+ import bm25s
15
+
16
+
17
+ class LineRecord(TypedDict):
18
+ index: int
19
+ content: str
20
+
21
+
22
+ def _chunk(content: str) -> list[LineRecord]:
23
+ return [{"index": i, "content": c} for (i, c) in enumerate(content.splitlines())]
24
+
25
+
26
+ def _expand(corpus: list[LineRecord], match: LineRecord, n: int) -> str:
27
+ idx = match["index"]
28
+ start = max(0, idx - n)
29
+ end = min(len(corpus) - 1, idx + n)
30
+ return f"Lines {start} - {end}\n\n\n" + "\n".join(
31
+ [c["content"] for c in corpus[start : end + 1]]
32
+ )
33
+
34
+
35
+ def _retrieve(
36
+ corpus: list[LineRecord], query: str, top_k: int | None, expand: int = 0
37
+ ) -> list[tuple[str, float]]:
38
+ corpus_tokens = bm25s.tokenize([c["content"] for c in corpus])
39
+ retriever = bm25s.BM25(corpus=corpus)
40
+ retriever.index(corpus_tokens)
41
+ query_tokens = bm25s.tokenize(query)
42
+ docs, scores = retriever.retrieve(query_tokens, k=top_k or 10)
43
+
44
+ results: list[tuple[str, float]] = []
45
+ for doc, score in zip(docs[0].tolist(), scores[0].tolist()):
46
+ window = _expand(corpus, doc, expand) if expand > 0 else [doc]
47
+ results.append((cast(str, window), score))
48
+ return results
49
+
50
+
51
+ def _chunk_and_retrieve(
52
+ content: str, query: str, top_k: int | None, expand_n: int
53
+ ) -> list[tuple[str, float]]:
54
+ corpus = _chunk(content)
55
+ return _retrieve(corpus, query, top_k, expand_n)
56
+
57
+
58
+ async def process_chunk(
59
+ content: bytes, query: str, top_k: int | None, expand_n: int
60
+ ) -> list[tuple[str, float]]:
61
+ loop = asyncio.get_event_loop()
62
+ return await loop.run_in_executor(
63
+ None,
64
+ _chunk_and_retrieve,
65
+ content.decode("utf-8"),
66
+ query,
67
+ top_k,
68
+ expand_n,
69
+ )
70
+
71
+
72
+ async def read_and_process(
73
+ file_path: str, query: str, top_k: int | None, expand_n: int | None
74
+ ) -> list[str]:
75
+ tasks: list[asyncio.Task[list[tuple[str, float]]]] = []
76
+ async with asyncio.TaskGroup() as tg:
77
+ async with aiofiles.open(file_path, "rb") as f:
78
+ # read 64KB chunks
79
+ while chunk := await f.read(65536):
80
+ tasks.append(
81
+ tg.create_task(process_chunk(chunk, query, top_k, expand_n or 5))
82
+ )
83
+ results = [task.result() for task in tasks]
84
+ flattened = [r for result in results for r in result if r[1] >= 0.5]
85
+ flattened.sort(key=lambda x: x[1], reverse=True)
86
+ n = top_k or 10
87
+ return [f[0] for f in flattened][:n]
88
+
89
+
90
+ def main() -> None:
91
+ parser = argparse.ArgumentParser()
92
+ parser.add_argument("file_path", help="Path to the text file to search")
93
+ parser.add_argument(
94
+ "-q", "--query", help="Keyword-based query to search for", required=True
95
+ )
96
+ parser.add_argument(
97
+ "-k",
98
+ "--top-k",
99
+ help="Top K matches to retrieve. Defaults to 10.",
100
+ required=False,
101
+ type=int,
102
+ default=None,
103
+ )
104
+ parser.add_argument(
105
+ "-e",
106
+ "--expand",
107
+ help="Expand the matched content by N lines (before and after). Defaults to 5",
108
+ required=False,
109
+ type=int,
110
+ default=None,
111
+ )
112
+ args = parser.parse_args()
113
+ results = asyncio.run(
114
+ read_and_process(args.file_path, args.query, args.top_k, args.expand)
115
+ )
116
+ if results:
117
+ separator = "\n" + "─" * 60 + "\n"
118
+ for i, r in enumerate(results):
119
+ print(f"Match #{i}")
120
+ print(r.rstrip("\n").lstrip("\n"))
121
+ if i < len(results) - 1:
122
+ print(separator)
123
+ else:
124
+ print("No relevant matches found")
125
+
126
+
127
+ if __name__ == "__main__":
128
+ main()