@bastani/atomic 0.8.31-alpha.1 → 0.8.31-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -5
- package/README.md +12 -10
- package/dist/builtin/cursor/CHANGELOG.md +1 -1
- package/dist/builtin/cursor/package.json +2 -2
- package/dist/builtin/intercom/CHANGELOG.md +1 -1
- package/dist/builtin/intercom/package.json +2 -2
- package/dist/builtin/mcp/CHANGELOG.md +1 -1
- package/dist/builtin/mcp/package.json +3 -3
- package/dist/builtin/subagents/CHANGELOG.md +10 -1
- package/dist/builtin/subagents/agents/codebase-online-researcher.md +8 -8
- package/dist/builtin/subagents/agents/debugger.md +6 -6
- package/dist/builtin/subagents/package.json +4 -4
- package/dist/builtin/subagents/skills/effective-liteparse/SKILL.md +118 -0
- package/dist/builtin/subagents/skills/effective-liteparse/scripts/search.py +128 -0
- package/dist/builtin/subagents/skills/playwright-cli/SKILL.md +404 -0
- package/dist/builtin/subagents/skills/playwright-cli/references/element-attributes.md +23 -0
- package/dist/builtin/subagents/skills/playwright-cli/references/playwright-tests.md +39 -0
- package/dist/builtin/subagents/skills/playwright-cli/references/request-mocking.md +87 -0
- package/dist/builtin/subagents/skills/playwright-cli/references/running-code.md +241 -0
- package/dist/builtin/subagents/skills/playwright-cli/references/session-management.md +225 -0
- package/dist/builtin/subagents/skills/playwright-cli/references/spec-driven-testing.md +305 -0
- package/dist/builtin/subagents/skills/playwright-cli/references/storage-state.md +275 -0
- package/dist/builtin/subagents/skills/playwright-cli/references/test-generation.md +134 -0
- package/dist/builtin/subagents/skills/playwright-cli/references/tracing.md +139 -0
- package/dist/builtin/subagents/skills/playwright-cli/references/video-recording.md +143 -0
- package/dist/builtin/web-access/CHANGELOG.md +1 -1
- package/dist/builtin/web-access/package.json +2 -2
- package/dist/builtin/workflows/CHANGELOG.md +7 -1
- package/dist/builtin/workflows/README.md +4 -4
- package/dist/builtin/workflows/builtin/open-claude-design.ts +59 -56
- package/dist/builtin/workflows/builtin/ralph.ts +56 -3
- package/dist/builtin/workflows/builtin/shared-prompts.ts +1 -1
- package/dist/builtin/workflows/package.json +2 -2
- package/dist/builtin/workflows/skills/research-codebase/SKILL.md +1 -1
- package/dist/cli/args.d.ts.map +1 -1
- package/dist/cli/args.js +1 -1
- package/dist/cli/args.js.map +1 -1
- package/dist/core/agent-session.d.ts +1 -0
- package/dist/core/agent-session.d.ts.map +1 -1
- package/dist/core/agent-session.js +49 -21
- package/dist/core/agent-session.js.map +1 -1
- package/dist/core/context-window.d.ts +26 -1
- package/dist/core/context-window.d.ts.map +1 -1
- package/dist/core/context-window.js +30 -6
- package/dist/core/context-window.js.map +1 -1
- package/dist/core/copilot-model-catalog.d.ts +39 -21
- package/dist/core/copilot-model-catalog.d.ts.map +1 -1
- package/dist/core/copilot-model-catalog.js +44 -16
- package/dist/core/copilot-model-catalog.js.map +1 -1
- package/dist/core/model-registry.d.ts.map +1 -1
- package/dist/core/model-registry.js +6 -4
- package/dist/core/model-registry.js.map +1 -1
- package/dist/core/project-trust.d.ts.map +1 -1
- package/dist/core/project-trust.js +2 -1
- package/dist/core/project-trust.js.map +1 -1
- package/dist/core/sdk.d.ts.map +1 -1
- package/dist/core/sdk.js +18 -7
- package/dist/core/sdk.js.map +1 -1
- package/dist/core/settings-manager.d.ts +11 -2
- package/dist/core/settings-manager.d.ts.map +1 -1
- package/dist/core/settings-manager.js +62 -8
- package/dist/core/settings-manager.js.map +1 -1
- package/dist/core/system-prompt.d.ts.map +1 -1
- package/dist/core/system-prompt.js +1 -0
- package/dist/core/system-prompt.js.map +1 -1
- package/dist/core/tools/edit-diff.d.ts +1 -2
- package/dist/core/tools/edit-diff.d.ts.map +1 -1
- package/dist/core/tools/edit-diff.js +1 -2
- package/dist/core/tools/edit-diff.js.map +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -1
- package/dist/index.js.map +1 -1
- package/dist/modes/interactive/components/config-selector.d.ts.map +1 -1
- package/dist/modes/interactive/components/config-selector.js +5 -7
- package/dist/modes/interactive/components/config-selector.js.map +1 -1
- package/dist/modes/interactive/components/model-selector.d.ts.map +1 -1
- package/dist/modes/interactive/components/model-selector.js +2 -1
- package/dist/modes/interactive/components/model-selector.js.map +1 -1
- package/dist/modes/interactive/components/scoped-models-selector.d.ts.map +1 -1
- package/dist/modes/interactive/components/scoped-models-selector.js +4 -1
- package/dist/modes/interactive/components/scoped-models-selector.js.map +1 -1
- package/dist/modes/interactive/components/settings-selector.d.ts +2 -0
- package/dist/modes/interactive/components/settings-selector.d.ts.map +1 -1
- package/dist/modes/interactive/components/settings-selector.js +165 -15
- package/dist/modes/interactive/components/settings-selector.js.map +1 -1
- package/dist/modes/interactive/components/tree-selector.d.ts.map +1 -1
- package/dist/modes/interactive/components/tree-selector.js +44 -4
- package/dist/modes/interactive/components/tree-selector.js.map +1 -1
- package/dist/modes/interactive/interactive-mode.d.ts +1 -1
- package/dist/modes/interactive/interactive-mode.d.ts.map +1 -1
- package/dist/modes/interactive/interactive-mode.js +24 -54
- package/dist/modes/interactive/interactive-mode.js.map +1 -1
- package/dist/modes/interactive/model-search.d.ts +7 -0
- package/dist/modes/interactive/model-search.d.ts.map +1 -0
- package/dist/modes/interactive/model-search.js +6 -0
- package/dist/modes/interactive/model-search.js.map +1 -0
- package/dist/modes/interactive/theme/theme-controller.d.ts +30 -0
- package/dist/modes/interactive/theme/theme-controller.d.ts.map +1 -0
- package/dist/modes/interactive/theme/theme-controller.js +108 -0
- package/dist/modes/interactive/theme/theme-controller.js.map +1 -0
- package/dist/modes/interactive/theme/theme-schema.json +2 -1
- package/dist/modes/interactive/theme/theme.d.ts +5 -0
- package/dist/modes/interactive/theme/theme.d.ts.map +1 -1
- package/dist/modes/interactive/theme/theme.js +70 -29
- package/dist/modes/interactive/theme/theme.js.map +1 -1
- package/dist/modes/rpc/rpc-client.d.ts +1 -1
- package/dist/modes/rpc/rpc-client.d.ts.map +1 -1
- package/dist/modes/rpc/rpc-client.js +1 -1
- package/dist/modes/rpc/rpc-client.js.map +1 -1
- package/dist/modes/rpc/rpc-mode.d.ts.map +1 -1
- package/dist/modes/rpc/rpc-mode.js +1 -1
- package/dist/modes/rpc/rpc-mode.js.map +1 -1
- package/dist/package-manager-cli.d.ts.map +1 -1
- package/dist/package-manager-cli.js +39 -9
- package/dist/package-manager-cli.js.map +1 -1
- package/docs/extensions.md +21 -0
- package/docs/models.md +3 -3
- package/docs/packages.md +13 -9
- package/docs/providers.md +3 -3
- package/docs/quickstart.md +14 -0
- package/docs/rpc.md +3 -3
- package/docs/sdk.md +15 -11
- package/docs/session-format.md +1 -1
- package/docs/settings.md +8 -3
- package/docs/themes.md +3 -1
- package/docs/tui.md +1 -1
- package/docs/usage.md +12 -9
- package/docs/workflows.md +9 -7
- package/examples/extensions/custom-provider-anthropic/package-lock.json +2 -2
- package/examples/extensions/custom-provider-anthropic/package.json +1 -1
- package/examples/extensions/custom-provider-gitlab-duo/package.json +1 -1
- package/examples/extensions/gondolin/package-lock.json +2 -2
- package/examples/extensions/gondolin/package.json +1 -1
- package/examples/extensions/preset.ts +10 -4
- package/examples/extensions/provider-payload.ts +5 -5
- package/examples/extensions/sandbox/index.ts +2 -2
- package/examples/extensions/sandbox/package-lock.json +3 -3
- package/examples/extensions/sandbox/package.json +2 -2
- package/examples/extensions/subagent/agents.ts +2 -2
- package/examples/extensions/subagent/index.ts +4 -2
- package/examples/extensions/with-deps/package-lock.json +2 -2
- package/examples/extensions/with-deps/package.json +1 -1
- package/package.json +5 -5
- package/dist/builtin/subagents/skills/browser/EXAMPLES.md +0 -151
- package/dist/builtin/subagents/skills/browser/LICENSE.txt +0 -21
- package/dist/builtin/subagents/skills/browser/REFERENCE.md +0 -451
- package/dist/builtin/subagents/skills/browser/SKILL.md +0 -170
package/CHANGELOG.md
CHANGED
|
@@ -4,20 +4,32 @@
|
|
|
4
4
|
|
|
5
5
|
### Added
|
|
6
6
|
|
|
7
|
-
- Added configurable context-window support for models that declare `contextWindowOptions`, including explicit `--context-window` CLI/settings control, a GitHub Copilot CLI-style `/model`-flow picker (numbered `Default`/`Long context` tiers with token counts), session replay, SDK/runtime/RPC APIs, and docs while preserving each model's scalar default context window. For GitHub Copilot, context windows are measured in **input (prompt) tokens** (consistent with every other provider) and derived **dynamically from GitHub's live CAPI model catalog** (`GET /models`) instead of a hardcoded model list: Atomic resolves each model's input budget as `max_prompt_tokens || max_context_window_tokens || 128_000` and, for tiered models, exposes
|
|
7
|
+
- Added configurable context-window support for models that declare `contextWindowOptions`, including explicit `--context-window` CLI/settings control, a GitHub Copilot CLI-style `/model`-flow picker (numbered `Default`/`Long context` tiers with token counts), session replay, SDK/runtime/RPC APIs, and docs while preserving each model's scalar default context window. For GitHub Copilot, context windows are measured in **input (prompt) tokens** (consistent with every other provider) and derived **dynamically from GitHub's live CAPI model catalog** (`GET /models`) instead of a hardcoded model list: Atomic resolves each model's input budget as `max_prompt_tokens || max_context_window_tokens || 128_000` and, for tiered models, exposes a selectable default window (`token_prices.default.context_max`) plus a long window set to the model's full `max_context_window_tokens` (retaining `max_prompt_tokens` as the internal effective compaction/overflow budget) — gated on the user actually having the GitHub Copilot provider and cached on disk for 30 minutes (for example `github-copilot/gpt-5.5` exposes `272k` default / `1.05m` long, and the Claude/Gemini long-context models `200k` default / `1m` long). Atomic raises the local budget and sends `X-GitHub-Api-Version: 2026-06-01`, while GitHub applies the long-context billing tier server-side by prompt token count. Long-context Copilot requests consume more AI credits and require Copilot long-context/usage-based billing entitlement; offline, unauthenticated, or non-Copilot sessions leave the built-in window untouched and show no picker; custom providers and explicit model overrides can still expose their own selectable windows ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
8
8
|
- Exported context-window helper functions and types from the package root, including parser/formatter/normalizer/selection utilities and the `Model<Api>` augmentation for `contextWindowOptions`/`defaultContextWindow`, so SDK consumers can use the public API without importing internal source paths ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
9
|
-
- Added RPC mode runtime context-window commands so headless clients can read supported token budgets with `get_available_context_windows` and select the active runtime budget with `set_context_window` without persisting
|
|
9
|
+
- Added RPC mode runtime context-window commands so headless clients can read supported token budgets with `get_available_context_windows` and select the active runtime budget with `set_context_window` without persisting context-window settings ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
10
|
+
- Added upstream pi v0.79.7 automatic theme mode support so `/settings` can choose separate light and dark themes and follow terminal color-scheme changes.
|
|
11
|
+
- Exported the upstream `CONFIG_DIR_NAME` constant and edit diff helpers (`generateDiffString`, `generateUnifiedPatch`, and `EditDiffResult`) from the public SDK entrypoint so extensions can avoid hardcoded project config paths and reuse edit-style diff rendering.
|
|
10
12
|
|
|
11
13
|
### Changed
|
|
12
14
|
|
|
13
|
-
- Changed
|
|
15
|
+
- Changed the GitHub Copilot **long-context tier to advertise the model's full context window** (`max_context_window_tokens`, for example `github-copilot/gpt-5.5` `1.05m`, and `github-copilot/claude-opus-4.8`/`github-copilot/gemini-3.1-pro-preview` `1m`) instead of GitHub's prompt-token cap, so Copilot models report and display the same window as the native `openai/*` and `anthropic/*` providers (the chat footer denominator now shows the full window). GitHub's lower server-side input cap (`max_prompt_tokens`, e.g. `922k`/`936k`, which equals `max_context_window_tokens − max_output_tokens`) is now parsed and carried as an internal effective input budget (`Model.maxInputTokens`, exposed via the new `getEffectiveInputBudget()` helper): auto-compaction thresholds and the Copilot overflow-recovery guard run against that budget while the picker/footer show the full window. As a result, a prompt that reaches the real prompt cap is now compacted-and-retried automatically (previously the long window equalled the cap), and the friendly “enable long-context/usage-based billing / server-cap” hint fires only when GitHub rejects a prompt *below* the cap (a genuine entitlement/tier drop) rather than at the cap. Sparse catalog payloads without `max_context_window_tokens` still fall back to the long-context prompt threshold, and the on-disk Copilot catalog cache schema version was bumped so existing caches refetch the new windows ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
16
|
+
- Changed built-in GitHub Copilot context windows to be measured in **input (prompt) tokens** (matching every other provider) and derived from GitHub's live CAPI model catalog (`GET /models`, cached 30 minutes, gated on the Copilot provider) instead of a hardcoded long-context model list, so newly added/removed Copilot models and retiered windows are reflected automatically without shipping a stale snapshot. Each model's window now resolves to `max_prompt_tokens || max_context_window_tokens || 128_000`, and tiered models expose a selectable default window (`token_prices.default.context_max`) plus a long window set to the model's full `max_context_window_tokens` (e.g. `gpt-5.5` 272k/1.05m, Claude/Gemini 200k/1m), with `max_prompt_tokens` retained as the internal effective compaction/overflow budget — while preserving custom provider entries and explicit `models.json` overrides and relying on GitHub's API-version header and server-side tier selection rather than payload fields or model-id variants ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
14
17
|
- Bumped the bundled upstream pi runtime libraries `@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, and `@earendil-works/pi-tui` from `^0.79.4` to `^0.79.6` so Atomic's installed pi runtime packages pick up upstream v0.79.5/v0.79.6 provider, model, thinking-payload, and shared TUI compatibility fixes; no Atomic coding-agent source changes were made for upstream coding-agent-only marked export or fetch-override behavior in this dependency sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
|
|
18
|
+
- Synced Atomic's coding-agent fork with upstream pi v0.79.7, including the new self-only default for bare `atomic update` (`atomic update --all` restores the previous all-packages behavior), automatic light/dark theme settings, configured project config directory labels, extension example updates, model-search parity, tree navigator horizontal panning, and the latest user-facing docs.
|
|
19
|
+
- Bumped the bundled upstream pi runtime libraries `@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, and `@earendil-works/pi-tui` from `^0.79.6` to `^0.79.7` so Atomic inherits upstream v0.79.7 TUI color-scheme, Warp image, generated model catalog, and agent-core fixes.
|
|
20
|
+
- Reserved `/` in theme names for automatic light/dark theme settings.
|
|
21
|
+
- Replaced the bundled `browser` skill / `browse` CLI with the `playwright-cli` skill and `playwright-cli` command across `@bastani/atomic`, and bundled the new `effective-liteparse` document-extraction skill. The builtin `ralph`, `goal`, and `open-claude-design` workflows and the `debugger`/`codebase-online-researcher` subagents now drive browsers via `playwright-cli`; `open-claude-design`'s deterministic setup step ensures `playwright-cli` (`npm install -g @playwright/cli@latest`) and renames its `browse_cli_status` output to `playwright_cli_status`; and `ralph` now records a `playwright-cli` QA end-to-end proof video (`qa_video_path`) for UI-applicable/full-stack changes, references it in the implementation notes, and attaches or links it to the final pull request when `create_pr=true`. Updated the user-facing docs (workflows, SDK bash-policy examples, quickstart skills, README) to match.
|
|
22
|
+
|
|
15
23
|
|
|
16
24
|
### Fixed
|
|
17
25
|
|
|
26
|
+
- Fixed RPC unknown-command errors to include the request id so RPC clients do not hang waiting for a response.
|
|
27
|
+
- Fixed `/model` autocomplete and model-selection searches to match provider/model queries regardless of whether the provider or model token is typed first.
|
|
28
|
+
- Fixed the tree navigator to horizontally pan deep entries so the selected item remains readable.
|
|
29
|
+
- Fixed long-context selection for GitHub Copilot's rounded 1M model names: requesting `1m` now selects the advertised full context window when the catalog exposes it, and otherwise resolves to the largest advertised long-context window at or below the request (for example `936k` for sparse catalog payloads) instead of falling back to the short `200k` tier. Interactive/context-picker persistence now writes the effective selected budget to per-model `defaultContextWindows["provider/modelId"]` settings instead of the global `defaultContextWindow` fallback, so Copilot-specific prompt caps such as `936k`/`922k` do not leak into Anthropic, Cursor, or other providers on restart. Legacy/stale global `defaultContextWindow` values from earlier builds are now treated as optional fallbacks and ignored without warning when unsupported by the active model.
|
|
18
30
|
- Fixed a GitHub Copilot context-window warning on restart: after selecting a long-context window (e.g. `claude-opus-4.8` → `936k`) and reopening Atomic, startup validated the persisted selection before the (async, auth-gated) Copilot catalog loaded, so the model still looked limited to its default window and Atomic warned “Context window 936k is not supported… Supported values: 200k” and reset the choice. The model registry now seeds the Copilot context-window catalog synchronously from its on-disk cache at construction (ignoring the refresh TTL, gated on a `github-copilot` credential), so a returning user's selection is recognized immediately while the live refresh still runs in the background ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
19
|
-
- Fixed context-window startup, session-switch, settings, and RPC edge cases: unknown provider fallback models no longer inherit selectable context-window options from provider defaults, fatal startup diagnostics no longer persist
|
|
20
|
-
- Fixed `AgentSession.setContextWindow()` so bare SDK/runtime calls update the active session, append `context_window_change`, and emit `context_window_changed` without persisting
|
|
31
|
+
- Fixed context-window startup, session-switch, settings, and RPC edge cases: unknown provider fallback models no longer inherit selectable context-window options from provider defaults, fatal startup diagnostics no longer persist context-window settings, `AgentSession.setModel()` preserves an incoming target model's explicit selected context window, model-switch paths that change effective context windows now notify listeners via `context_window_changed`, the interactive context-window picker keys selection on raw token counts so colliding formatted labels never change which window is selected, RPC `set_model` returns the effective post-switch session model, and explicit startup `contextWindow` selections are journaled even when they equal the model scalar default ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
32
|
+
- Fixed `AgentSession.setContextWindow()` so bare SDK/runtime calls update the active session, append `context_window_change`, and emit `context_window_changed` without persisting settings; callers must pass `{ persistDefault: true }` to update the active model's `defaultContextWindows["provider/modelId"]` setting ([#1409](https://github.com/bastani-inc/atomic/issues/1409)).
|
|
21
33
|
- Fixed `packages/coding-agent` source-CLI subprocess tests (`session-id-readonly`, `startup-session-name`, `stdout-cleanliness`) crashing with `ERR_MODULE_NOT_FOUND` (for example `src/core/tools/oversized-tool-result.js`) when the Vitest worker pool runs under Node. They now launch the TypeScript source CLI with Bun explicitly via a `bunExecutable()` helper (matching `context-window-cli`/`rpc-context-window`) instead of assuming `process.execPath` is Bun, so the package test suite is portable across environments. The repo-wide `.js`->`.ts` source-import convention and shipped `dist/` are unchanged ([#1419](https://github.com/bastani-inc/atomic/issues/1419)).
|
|
22
34
|
|
|
23
35
|
## [0.8.30] - 2026-06-17
|
package/README.md
CHANGED
|
@@ -385,15 +385,16 @@ atomic install ssh://git@github.com/user/repo@v1 # tag or commit
|
|
|
385
385
|
atomic remove npm:@foo/atomic-tools
|
|
386
386
|
atomic uninstall npm:@foo/atomic-tools # alias for remove
|
|
387
387
|
atomic list
|
|
388
|
-
atomic update # update Atomic
|
|
388
|
+
atomic update # update Atomic only
|
|
389
|
+
atomic update --all # update Atomic and packages
|
|
389
390
|
atomic update --extensions # update packages only
|
|
390
391
|
atomic update --self # update Atomic only
|
|
391
392
|
atomic update --self --force # reinstall Atomic even if current
|
|
392
|
-
atomic update npm:@foo/atomic-tools
|
|
393
|
+
atomic update npm:@foo/atomic-tools # update one package
|
|
393
394
|
atomic config # enable/disable extensions, skills, prompts, themes
|
|
394
395
|
```
|
|
395
396
|
|
|
396
|
-
Packages install to `~/.atomic/agent/git/` (git) or global npm. Use `-l` for project-local installs (`.atomic/git/`, `.atomic/npm/`; legacy `.pi/git/` and `.pi/npm/` are compatibility fallbacks). Git packages install dependencies with `npm install --omit=dev` by default, so runtime deps must be listed under `dependencies`; when `npmCommand` is configured, git packages use plain `install` for compatibility with wrappers. If you use a Node version manager and want package installs to reuse a stable npm context, set `npmCommand` in `settings.json`, for example `["mise", "exec", "node@20", "--", "npm"]`.
|
|
397
|
+
Packages install to `~/.atomic/agent/git/` (git) or global npm. Use `-l` for project-local installs (`.atomic/git/`, `.atomic/npm/`; legacy `.pi/git/` and `.pi/npm/` are compatibility fallbacks). Git `@ref` values are pinned tags or commits; pinned packages are skipped by `atomic update --extensions` and `atomic update --all`, so use `atomic install git:host/user/repo@new-ref` to move an existing package to a new ref. Git packages install dependencies with `npm install --omit=dev` by default, so runtime deps must be listed under `dependencies`; when `npmCommand` is configured, git packages use plain `install` for compatibility with wrappers. If you use a Node version manager and want package installs to reuse a stable npm context, set `npmCommand` in `settings.json`, for example `["mise", "exec", "node@20", "--", "npm"]`.
|
|
397
398
|
|
|
398
399
|
Create a package by adding an app-name manifest key to `package.json` (`atomic` for this package). The legacy `pi` key is still accepted as a backwards-compatible shim:
|
|
399
400
|
|
|
@@ -480,16 +481,17 @@ atomic [options] [@files...] [messages...]
|
|
|
480
481
|
### Package Commands
|
|
481
482
|
|
|
482
483
|
```bash
|
|
483
|
-
atomic install <source> [-l]
|
|
484
|
-
atomic remove <source> [-l]
|
|
485
|
-
atomic uninstall <source> [-l]
|
|
486
|
-
atomic update [source|self|atomic] # Update Atomic
|
|
484
|
+
atomic install <source> [-l] # Install package, -l for project-local
|
|
485
|
+
atomic remove <source> [-l] # Remove package
|
|
486
|
+
atomic uninstall <source> [-l] # Alias for remove
|
|
487
|
+
atomic update [source|self|atomic] # Update Atomic only, or one package source
|
|
488
|
+
atomic update --all # Update Atomic and packages
|
|
487
489
|
atomic update --extensions # Update packages only
|
|
488
490
|
atomic update --self # Update Atomic only
|
|
489
491
|
atomic update --self --force # Reinstall Atomic even if current
|
|
490
|
-
atomic update --extension <src>
|
|
491
|
-
atomic list
|
|
492
|
-
atomic config
|
|
492
|
+
atomic update --extension <src> # Update one package
|
|
493
|
+
atomic list # List installed packages
|
|
494
|
+
atomic config # Enable/disable package resources
|
|
493
495
|
```
|
|
494
496
|
|
|
495
497
|
### Modes
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
### Changed
|
|
6
6
|
|
|
7
|
-
- Published a synchronized Atomic 0.8.31-alpha.
|
|
7
|
+
- Published a synchronized Atomic 0.8.31-alpha.3 prerelease; no functional Cursor provider changes were made after 0.8.30.
|
|
8
8
|
|
|
9
9
|
## [0.8.30] - 2026-06-17
|
|
10
10
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/cursor",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.3",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Experimental first-party Atomic extension for Cursor OAuth, model discovery, and streaming provider registration.",
|
|
6
6
|
"contributors": [
|
|
@@ -40,7 +40,7 @@
|
|
|
40
40
|
}
|
|
41
41
|
},
|
|
42
42
|
"dependencies": {
|
|
43
|
-
"@bastani/atomic-natives": "0.8.31-alpha.
|
|
43
|
+
"@bastani/atomic-natives": "0.8.31-alpha.3",
|
|
44
44
|
"@bufbuild/protobuf": "^2.0.0"
|
|
45
45
|
}
|
|
46
46
|
}
|
|
@@ -6,7 +6,7 @@ All notable changes to the `pi-intercom` extension will be documented in this fi
|
|
|
6
6
|
|
|
7
7
|
### Changed
|
|
8
8
|
|
|
9
|
-
- Aligned the intercom extension peer dependency with upstream pi TUI `^0.79.
|
|
9
|
+
- Aligned the intercom extension peer dependency with upstream pi TUI `^0.79.7` so coordination UI surfaces consume the latest shared TUI color-scheme, Warp image capability, and compatibility fixes; no intercom extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
|
|
10
10
|
|
|
11
11
|
## [0.8.30] - 2026-06-17
|
|
12
12
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/intercom",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.3",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension providing a private coordination channel between parent and child agent sessions. Fork of: https://github.com/nicobailon/pi-intercom",
|
|
6
6
|
"contributors": [
|
|
@@ -39,7 +39,7 @@
|
|
|
39
39
|
},
|
|
40
40
|
"peerDependencies": {
|
|
41
41
|
"@bastani/atomic": "*",
|
|
42
|
-
"@earendil-works/pi-tui": "^0.79.
|
|
42
|
+
"@earendil-works/pi-tui": "^0.79.7"
|
|
43
43
|
},
|
|
44
44
|
"peerDependenciesMeta": {
|
|
45
45
|
"@bastani/atomic": {
|
|
@@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
9
9
|
|
|
10
10
|
### Changed
|
|
11
11
|
|
|
12
|
-
- Aligned the MCP extension peer dependencies with upstream pi AI/TUI `^0.79.
|
|
12
|
+
- Aligned the MCP extension peer dependencies with upstream pi AI/TUI `^0.79.7` so MCP-backed sessions can use the host's latest provider catalog, model-search, theme/color-scheme, Warp image capability, and shared TUI compatibility fixes; no MCP extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
|
|
13
13
|
|
|
14
14
|
## [0.8.30] - 2026-06-17
|
|
15
15
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/mcp",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.3",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension that adapts MCP (Model Context Protocol) servers into the coding agent. Fork of: https://github.com/nicobailon/pi-mcp-adapter",
|
|
6
6
|
"contributors": [
|
|
@@ -32,8 +32,8 @@
|
|
|
32
32
|
},
|
|
33
33
|
"peerDependencies": {
|
|
34
34
|
"@bastani/atomic": "*",
|
|
35
|
-
"@earendil-works/pi-ai": "^0.79.
|
|
36
|
-
"@earendil-works/pi-tui": "^0.79.
|
|
35
|
+
"@earendil-works/pi-ai": "^0.79.7",
|
|
36
|
+
"@earendil-works/pi-tui": "^0.79.7",
|
|
37
37
|
"zod": "^3.25.0 || ^4.0.0"
|
|
38
38
|
},
|
|
39
39
|
"peerDependenciesMeta": {
|
|
@@ -2,9 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- Added the `playwright-cli` builtin skill (browser automation, end-to-end UI checks, screenshots, reviewable video recording, and Playwright test workflows) and the `effective-liteparse` builtin skill (fast, local, model-free text/table/value extraction from PDF, DOCX, PPTX, XLSX, and image files via the `lit` CLI).
|
|
8
|
+
|
|
5
9
|
### Changed
|
|
6
10
|
|
|
7
|
-
-
|
|
11
|
+
- Changed the `debugger` and `codebase-online-researcher` subagents to load the `playwright-cli` skill and drive the `playwright-cli` command for JS-heavy, auth-gated, or interactive web work instead of the removed `browser` skill / `browse` CLI.
|
|
12
|
+
- Aligned the subagents extension peer dependencies with upstream pi `^0.79.7` runtime packages (`@earendil-works/pi-agent-core`, `@earendil-works/pi-ai`, and `@earendil-works/pi-tui`) so child sessions can use the host's latest provider catalog, RPC id handling, model-search, theme/color-scheme, Warp image capability, and shared TUI compatibility fixes; no subagents extension code changes were made for this metadata sync ([#1413](https://github.com/bastani-inc/atomic/issues/1413)).
|
|
13
|
+
|
|
14
|
+
### Removed
|
|
15
|
+
|
|
16
|
+
- Removed the bundled `browser` skill and all references to its `browse` CLI in favor of the `playwright-cli` skill and `playwright-cli` command.
|
|
8
17
|
|
|
9
18
|
## [0.8.30] - 2026-06-17
|
|
10
19
|
|
|
@@ -4,7 +4,7 @@ description: Online research for up-to-date documentation and library-source kno
|
|
|
4
4
|
tools: read, grep, find, ls, bash, web_search, fetch_content, get_search_content
|
|
5
5
|
model: openai/gpt-5.5:low
|
|
6
6
|
fallbackModels: openai-codex/gpt-5.5:low, github-copilot/gpt-5.5:low, anthropic/claude-opus-4-8:low, github-copilot/claude-opus-4.7:low
|
|
7
|
-
skills:
|
|
7
|
+
skills: playwright-cli
|
|
8
8
|
---
|
|
9
9
|
|
|
10
10
|
You are an expert research specialist focused on finding accurate, relevant information from authoritative sources — including open-source library internals with GitHub permalinks. You have three web tools available:
|
|
@@ -13,11 +13,11 @@ You are an expert research specialist focused on finding accurate, relevant info
|
|
|
13
13
|
- `fetch_content` — fetch a specific URL and return clean reader-mode text/markdown (HTML pages, GitHub issues/PRs, Stack Overflow, npm, arXiv, Reddit, Wikipedia, JSON endpoints, PDFs, RSS/Atom, YouTube). `fetch_content` on a GitHub repo URL also clones the repo locally under `/tmp/atomic-github-repos/<owner>/<repo>` and returns the file tree. Prefer this over a raw HTTP fetch.
|
|
14
14
|
- `get_search_content` — fetch the underlying content for the most promising results of a previous `web_search` in one call.
|
|
15
15
|
|
|
16
|
-
For JS-heavy or auth-gated pages, load the `
|
|
16
|
+
For JS-heavy or auth-gated pages, load the `playwright-cli` skill and drive its `playwright-cli` command through `bash`.
|
|
17
17
|
|
|
18
18
|
<EXTREMELY_IMPORTANT>
|
|
19
19
|
- PREFER `fetch_content` for static pages; it's faster and cheaper than spinning up a real browser.
|
|
20
|
-
- Reach for the `
|
|
20
|
+
- Reach for the `playwright-cli` skill's `playwright-cli` command via `bash` ONLY when a real DOM/JS is required.
|
|
21
21
|
- ALWAYS check `research/web/` for a recent cached copy before fetching anything new.
|
|
22
22
|
- EVERY code-related claim about an open-source library needs a GitHub **permalink with a full commit SHA** — branch links break when code changes.
|
|
23
23
|
</EXTREMELY_IMPORTANT>
|
|
@@ -39,7 +39,7 @@ When fetching any external page, apply these techniques in order. They produce p
|
|
|
39
39
|
1. **`fetch_content <url>` first.** Returns clean reader-mode text/markdown for nearly every well-formed page (and handles PDFs and JSON). Try it before anything else.
|
|
40
40
|
2. **Check `/llms.txt`.** Many modern docs sites publish an AI-friendly index at `/llms.txt` (spec: [llmstxt.org](https://llmstxt.org/llms.txt)). `fetch_content https://<site>/llms.txt` often links directly to the most relevant pages in plain text, saving a round-trip through the full site.
|
|
41
41
|
3. **Request Markdown via `Accept: text/markdown`.** Sites behind Cloudflare with [Markdown for Agents](https://developers.cloudflare.com/fundamentals/reference/markdown-for-agents/) return pre-converted Markdown when you set the header. Use `bash` with `curl <url> -H "Accept: text/markdown"` (look for `content-type: text/markdown` and the `x-markdown-tokens` header).
|
|
42
|
-
4. **Fall back to a real browser.** Load the `
|
|
42
|
+
4. **Fall back to a real browser.** Load the `playwright-cli` skill and drive its `playwright-cli` command through `bash` to render and interact with JS-heavy or auth-gated pages.
|
|
43
43
|
|
|
44
44
|
## Library Source Research with Permalinks
|
|
45
45
|
|
|
@@ -151,12 +151,12 @@ When you receive a research query:
|
|
|
151
151
|
2. **Check the local cache first**. Look in `research/web/` for existing documents on the topic. If a recent (still-relevant) copy exists, cite it before re-fetching.
|
|
152
152
|
3. **Execute strategic searches**.
|
|
153
153
|
- Identify the authoritative source (e.g. the library's official docs site, its GitHub repo, its release notes).
|
|
154
|
-
- Apply the Web Fetch Strategy: `fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` → `
|
|
154
|
+
- Apply the Web Fetch Strategy: `fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` → `playwright-cli` fallback.
|
|
155
155
|
- Use multiple query variations to capture different perspectives via `web_search`.
|
|
156
156
|
- Use `get_search_content` to bulk-fetch the underlying content of the top results of a `web_search` in one shot.
|
|
157
157
|
- For source repositories, prefer raw GitHub URLs (`https://raw.githubusercontent.com/<owner>/<repo>/<ref>/<path>`) over the HTML UI. For library internals, clone via `fetch_content` and use `grep`/`read` + permalinks.
|
|
158
158
|
4. **Fetch and analyze content**.
|
|
159
|
-
- Use `fetch_content <url>` (or the
|
|
159
|
+
- Use `fetch_content <url>` (or the playwright-cli skill's `playwright-cli` command via `bash` when interactivity is required) to pull the full content of promising sources.
|
|
160
160
|
- Prioritize official documentation, reputable technical blogs, and authoritative sources.
|
|
161
161
|
- Extract specific quotes and sections relevant to the query.
|
|
162
162
|
- Note publication dates to ensure currency of information.
|
|
@@ -275,7 +275,7 @@ For library-source answers, every code claim should look like the citation examp
|
|
|
275
275
|
## Search Efficiency
|
|
276
276
|
|
|
277
277
|
- Check `research/web/` for an existing copy before fetching anything new.
|
|
278
|
-
- Start by fetching the authoritative source (`fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` → `
|
|
278
|
+
- Start by fetching the authoritative source (`fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` → `playwright-cli`) rather than search-engine-style exploration.
|
|
279
279
|
- Use `fetch_content` (or `get_search_content` after a `web_search`) to pull full content from the most promising 3-5 web pages.
|
|
280
280
|
- Reuse already-cloned repos under `/tmp/atomic-github-repos/` instead of re-cloning.
|
|
281
281
|
- If initial results are insufficient, refine search terms and try again.
|
|
@@ -298,4 +298,4 @@ For library-source answers, every code claim should look like the citation examp
|
|
|
298
298
|
| Page returns 403 / bot block | Gemini fallback triggers automatically; no action needed if Gemini is configured. |
|
|
299
299
|
| `web_search` fails | Check provider config; try explicit `provider: "gemini"` if a Perplexity key is missing. |
|
|
300
300
|
|
|
301
|
-
Remember: you are the user's expert guide to technical research. Lean on `fetch_content` first with the `/llms.txt` → `Accept: text/markdown` → `
|
|
301
|
+
Remember: you are the user's expert guide to technical research. Lean on `fetch_content` first with the `/llms.txt` → `Accept: text/markdown` → `playwright-cli` fallback chain to efficiently pull authoritative content, clone open-source repos when implementation evidence is needed, store anything reusable under `research/web/`, and deliver comprehensive, up-to-date answers with exact citations and GitHub permalinks. Answer directly — skip preamble like "I'll help you with…" and go straight to findings.
|
|
@@ -4,7 +4,7 @@ description: Debug errors, test failures, and unexpected behavior. Use PROACTIVE
|
|
|
4
4
|
tools: read, grep, find, ls, bash, web_search, fetch_content, get_search_content
|
|
5
5
|
model: openai/gpt-5.5:xhigh
|
|
6
6
|
fallbackModels: openai-codex/gpt-5.5:xhigh, github-copilot/gpt-5.5:xhigh, anthropic/claude-opus-4-8:xhigh, github-copilot/claude-opus-4.7:xhigh
|
|
7
|
-
skills: tdd,
|
|
7
|
+
skills: tdd, playwright-cli, tmux
|
|
8
8
|
---
|
|
9
9
|
|
|
10
10
|
You are tasked with debugging and identifying errors, test failures, and unexpected behavior in the codebase. Your goal is to identify root causes and generate a report detailing the issues and proposed fixes, so another agent can implement the solutions you suggest.
|
|
@@ -13,13 +13,13 @@ You are tasked with debugging and identifying errors, test failures, and unexpec
|
|
|
13
13
|
|
|
14
14
|
- `tdd` — load the TDD skill before creating or modifying any tests.
|
|
15
15
|
- `tmux` load the tmux skill for debugging terminal environment or TUI apps.
|
|
16
|
-
- `
|
|
16
|
+
- `playwright-cli` — load the playwright-cli skill for debugging web apps. If the `playwright-cli` command is missing, install it per the skill (`npx --no-install playwright-cli --version` || `npm install -g @playwright/cli@latest`); install a browser with `npx playwright install chromium` if one is missing.
|
|
17
17
|
- `fetch_content <url>` — the `pi-web-access` fetch tool returns reader-mode text/markdown for URLs (HTML, JSON, PDFs, GitHub issues/PRs, npm, arXiv, RSS, Reddit, Stack Overflow, etc.). Prefer it over a real browser when you only need page content.
|
|
18
18
|
- `web_search` / `get_search_content` — issue web queries and bulk-fetch the top results for triage.
|
|
19
|
-
- `
|
|
19
|
+
- `playwright-cli` (via `bash` after loading the playwright-cli skill) — full Chromium when you need JS execution, auth, or interactive actions. Prefer snapshots/structured state over screenshots for understanding page state.
|
|
20
20
|
|
|
21
21
|
<EXTREMELY_IMPORTANT>
|
|
22
|
-
- PREFER `fetch_content <url>` for static content. Only reach for the `
|
|
22
|
+
- PREFER `fetch_content <url>` for static content. Only reach for the `playwright-cli` skill when you need JS execution, authentication, or interactive page actions.
|
|
23
23
|
- ALWAYS `tdd` BEFORE creating or modifying any tests.
|
|
24
24
|
- NEVER suppress a failing test to make it pass. Reproduce the failure first; only then fix the underlying defect.
|
|
25
25
|
</EXTREMELY_IMPORTANT>
|
|
@@ -45,7 +45,7 @@ When you need to consult docs, forums, or issue trackers, apply these techniques
|
|
|
45
45
|
1. **`fetch_content <url>` first.** The fetch tool returns clean reader-mode text/markdown for HTML, GitHub issues/PRs, Stack Overflow, npm, arXiv, RSS, Wikipedia, Reddit, JSON endpoints, and PDFs — no browser needed.
|
|
46
46
|
2. **Check `/llms.txt`.** Many modern docs sites publish an AI-friendly index at `/llms.txt` (spec: [llmstxt.org](https://llmstxt.org/llms.txt)). Try `fetch_content https://<site>/llms.txt` before anything else; it often links directly to the most relevant pages in plain text.
|
|
47
47
|
3. **`Accept: text/markdown` header.** Some sites behind Cloudflare serve pre-converted Markdown via the header. If `fetch_content` returns thin or noisy content, try `bash` with `curl <url> -H "Accept: text/markdown"`.
|
|
48
|
-
4. **Fall back to the
|
|
48
|
+
4. **Fall back to the playwright-cli skill** — only when JS execution, login, or interactive actions are required.
|
|
49
49
|
|
|
50
50
|
## Workflow
|
|
51
51
|
|
|
@@ -77,7 +77,7 @@ Debugging process:
|
|
|
77
77
|
- Form and test hypotheses
|
|
78
78
|
- Add strategic debug logging or drive the project's own debugger (`bun --inspect`, `node --inspect-brk`, `python -m pdb`, etc.) through `bash` instead of `print` spam
|
|
79
79
|
- Inspect variable state by capturing it through the project's debugger session in `bash` or by writing a short repro script
|
|
80
|
-
- Use the web research order above (`fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` →
|
|
80
|
+
- Use the web research order above (`fetch_content <url>` → `/llms.txt` → `Accept: text/markdown` → playwright-cli) to look up external library docs, error messages, Stack Overflow threads, and GitHub issues
|
|
81
81
|
|
|
82
82
|
For each issue, provide:
|
|
83
83
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bastani/subagents",
|
|
3
|
-
"version": "0.8.31-alpha.
|
|
3
|
+
"version": "0.8.31-alpha.3",
|
|
4
4
|
"private": true,
|
|
5
5
|
"description": "Atomic extension for delegating tasks to subagents with chains, parallel execution, and TUI clarification. Fork of: https://github.com/nicobailon/pi-subagents",
|
|
6
6
|
"contributors": [
|
|
@@ -38,9 +38,9 @@
|
|
|
38
38
|
},
|
|
39
39
|
"peerDependencies": {
|
|
40
40
|
"@bastani/atomic": "*",
|
|
41
|
-
"@earendil-works/pi-agent-core": "^0.79.
|
|
42
|
-
"@earendil-works/pi-ai": "^0.79.
|
|
43
|
-
"@earendil-works/pi-tui": "^0.79.
|
|
41
|
+
"@earendil-works/pi-agent-core": "^0.79.7",
|
|
42
|
+
"@earendil-works/pi-ai": "^0.79.7",
|
|
43
|
+
"@earendil-works/pi-tui": "^0.79.7"
|
|
44
44
|
},
|
|
45
45
|
"peerDependenciesMeta": {
|
|
46
46
|
"@bastani/atomic": {
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: effective-liteparse
|
|
3
|
+
description: Use this skill whenever a task involves a document file (PDF, DOCX, PPTX, XLSX, or image) and you need to read it or pull text, tables, or specific values out of it — to answer a question about its contents, look up a figure, or extract data. Provides fast, local, model-free extraction via the `lit` CLI with disciplined, low-cost search patterns.
|
|
4
|
+
compatibility: Requires Node 18+ and `@llamaindex/liteparse` (`npm i -g @llamaindex/liteparse`, verify `lit --version`). LibreOffice for Office files; ImageMagick for images. The bundled search.py helper needs `uv`.
|
|
5
|
+
license: MIT
|
|
6
|
+
metadata:
|
|
7
|
+
author: LlamaIndex
|
|
8
|
+
version: "1.0.0"
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
# Effective LiteParse
|
|
12
|
+
|
|
13
|
+
Extract text from documents locally with the `lit` CLI — a fast, model-free parser. This skill is
|
|
14
|
+
about using it **cheaply**: each `lit parse` re-runs full extraction, and every line you dump into
|
|
15
|
+
the conversation is paid for on every subsequent turn. The patterns below come from analyzing real
|
|
16
|
+
agent traces where the same PDF was parsed up to **9 times** and single image reads cost
|
|
17
|
+
**140k+ characters** of context. Don't repeat those mistakes.
|
|
18
|
+
|
|
19
|
+
## The golden rule: parse ONCE to a file, then search the file
|
|
20
|
+
|
|
21
|
+
`lit parse` re-extracts the whole document every time you call it. Re-parsing per search is the #1
|
|
22
|
+
waste seen in traces. Parse a document exactly once, to a temp file, then run all your searches
|
|
23
|
+
against that file:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
# ONE TIME, per document. --no-ocr for born-digital PDFs (almost all reports) — much faster.
|
|
27
|
+
lit parse "/abs/path/doc.pdf" --format text --no-ocr -o /tmp/doc.txt && wc -l /tmp/doc.txt
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
Then search the file with cheap shell tools — **never** re-run `lit parse` to search again.
|
|
31
|
+
|
|
32
|
+
## Search discipline — minimize ROUND-TRIPS, then keep results small
|
|
33
|
+
|
|
34
|
+
Every Bash call is a full model round-trip (latency + re-read of context). The biggest waste after
|
|
35
|
+
parsing is a **serial** loop: grep → look → grep again → `sed` to read the window → grep again. In
|
|
36
|
+
traces this doubled the turn count versus just reading the doc. Two rules fix it:
|
|
37
|
+
|
|
38
|
+
**1. Get context in the SAME command — don't grep then `sed`.** Use `grep -C` so the surrounding
|
|
39
|
+
lines come back with the hit. This removes the follow-up `sed` turn for the common case:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
grep -n -i -C4 "total assets" /tmp/doc.txt | head -40 # location AND its window, one turn
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Only fall back to `sed -n 'A,Bp'` when you already know the exact line and need a *wider* window
|
|
46
|
+
than `-C` gave you.
|
|
47
|
+
|
|
48
|
+
**2. Batch independent lookups into ONE command.** When a question needs several distinct facts
|
|
49
|
+
(e.g. emissions *and* revenue), don't spend one turn per term. Probe them together with labels:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
for q in "carbon intensity" "scope 1" "total revenue"; do \
|
|
53
|
+
echo "=== $q ==="; grep -n -i -C3 "$q" /tmp/doc.txt | head -25; done
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Then keep results small:
|
|
57
|
+
|
|
58
|
+
- **Always bound output** with `head` and use `-n` for line numbers.
|
|
59
|
+
- **Don't fan out blindly.** Aim to resolve a question in ≤3 search commands. If two targeted greps
|
|
60
|
+
don't pin it down, switch to `search.py` (below) — don't keep firing keyword variations one per turn.
|
|
61
|
+
- Prefer **Bash `grep`/`sed` on the saved file over the Read and Grep tools** — fewer round-trips and
|
|
62
|
+
you control output size precisely.
|
|
63
|
+
|
|
64
|
+
## Ranked search when keywords are uncertain (bundled helper)
|
|
65
|
+
|
|
66
|
+
When two targeted greps haven't pinned the answer, **stop greping** — don't iterate keyword variants
|
|
67
|
+
one turn at a time. Run the bundled BM25 ranker ONCE to surface the most relevant line-windows in a
|
|
68
|
+
single command:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
./.claude/skills/effective-liteparse/scripts/search.py /tmp/doc.txt -q "materiality assessment priority topics" -k 8 -e 5
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
`-k` = number of matches, `-e` = lines of context around each (so the window comes back inline — no
|
|
75
|
+
follow-up `sed` turn). It returns ranked windows with line numbers. Use a rich natural-language query
|
|
76
|
+
(several synonyms in one string), not a single keyword. This replaces a long chain of speculative greps.
|
|
77
|
+
|
|
78
|
+
## Born-digital vs scanned
|
|
79
|
+
|
|
80
|
+
- **Born-digital PDF** (real text layer — nearly all corporate/finance/ESG reports): always pass
|
|
81
|
+
`--no-ocr`. It's much faster and the text is identical. Leaving OCR on wastes time.
|
|
82
|
+
- **Scanned PDF / image**: drop `--no-ocr`. If the value is missing or digits look wrong, read the
|
|
83
|
+
page visually (see below) rather than trusting OCR.
|
|
84
|
+
|
|
85
|
+
## Reading a page visually — last resort, ONE screenshot, modest DPI
|
|
86
|
+
|
|
87
|
+
Screenshots are the most expensive thing you can put in context: a single high-DPI page PNG ran
|
|
88
|
+
**~140k characters** in one trace, and agents often rendered the same page twice (default + hi-res).
|
|
89
|
+
|
|
90
|
+
Only screenshot when text/tables genuinely can't answer the question (dense multi-column tables,
|
|
91
|
+
figures, charts). Then:
|
|
92
|
+
|
|
93
|
+
- Render **one** page at a time with `--target-pages "N"` (note: it's `--target-pages`, NOT `--pages`).
|
|
94
|
+
- Use **modest DPI (~150–200)**. Do not start at 300+; do not re-render the same page at higher DPI
|
|
95
|
+
unless the text is actually illegible.
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
lit screenshot "/abs/path/doc.pdf" --target-pages "13" --dpi 150 -o /tmp/shots/ # then Read the PNG
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## Many questions about the same document
|
|
102
|
+
|
|
103
|
+
Parsing once to a file already covers this: keep the `/tmp/doc.txt` and reuse it across every
|
|
104
|
+
question instead of re-parsing.
|
|
105
|
+
|
|
106
|
+
## Don't waste turns on preamble
|
|
107
|
+
|
|
108
|
+
Skip `lit --version`, `ls -la`, and `lit … --help` unless something actually failed. Go straight to
|
|
109
|
+
the parse. Core flags you need:
|
|
110
|
+
|
|
111
|
+
`--format text|json` · `--no-ocr` · `--target-pages "1-5,10"` · `--dpi <n>` (default 150) ·
|
|
112
|
+
`--ocr-language <iso>`. Use `--format json` only when you need bounding boxes/layout — it's much
|
|
113
|
+
larger; still search it, never load it whole.
|
|
114
|
+
|
|
115
|
+
## Setup
|
|
116
|
+
|
|
117
|
+
PDFs work out of the box. If `lit` is missing: `npm i -g @llamaindex/liteparse`. Office docs need
|
|
118
|
+
LibreOffice; images need ImageMagick (both auto-converted to PDF).
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
#!/usr/bin/env -S uv run --script
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.13"
|
|
4
|
+
# dependencies = [
|
|
5
|
+
# "bm25s>=0.3.9,<1",
|
|
6
|
+
# "aiofiles>=25.1.0,<26",
|
|
7
|
+
# ]
|
|
8
|
+
# ///
|
|
9
|
+
import argparse
|
|
10
|
+
import asyncio
|
|
11
|
+
from typing import TypedDict, cast
|
|
12
|
+
|
|
13
|
+
import aiofiles
|
|
14
|
+
import bm25s
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class LineRecord(TypedDict):
|
|
18
|
+
index: int
|
|
19
|
+
content: str
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _chunk(content: str) -> list[LineRecord]:
|
|
23
|
+
return [{"index": i, "content": c} for (i, c) in enumerate(content.splitlines())]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _expand(corpus: list[LineRecord], match: LineRecord, n: int) -> str:
|
|
27
|
+
idx = match["index"]
|
|
28
|
+
start = max(0, idx - n)
|
|
29
|
+
end = min(len(corpus) - 1, idx + n)
|
|
30
|
+
return f"Lines {start} - {end}\n\n\n" + "\n".join(
|
|
31
|
+
[c["content"] for c in corpus[start : end + 1]]
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _retrieve(
|
|
36
|
+
corpus: list[LineRecord], query: str, top_k: int | None, expand: int = 0
|
|
37
|
+
) -> list[tuple[str, float]]:
|
|
38
|
+
corpus_tokens = bm25s.tokenize([c["content"] for c in corpus])
|
|
39
|
+
retriever = bm25s.BM25(corpus=corpus)
|
|
40
|
+
retriever.index(corpus_tokens)
|
|
41
|
+
query_tokens = bm25s.tokenize(query)
|
|
42
|
+
docs, scores = retriever.retrieve(query_tokens, k=top_k or 10)
|
|
43
|
+
|
|
44
|
+
results: list[tuple[str, float]] = []
|
|
45
|
+
for doc, score in zip(docs[0].tolist(), scores[0].tolist()):
|
|
46
|
+
window = _expand(corpus, doc, expand) if expand > 0 else [doc]
|
|
47
|
+
results.append((cast(str, window), score))
|
|
48
|
+
return results
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _chunk_and_retrieve(
|
|
52
|
+
content: str, query: str, top_k: int | None, expand_n: int
|
|
53
|
+
) -> list[tuple[str, float]]:
|
|
54
|
+
corpus = _chunk(content)
|
|
55
|
+
return _retrieve(corpus, query, top_k, expand_n)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def process_chunk(
|
|
59
|
+
content: bytes, query: str, top_k: int | None, expand_n: int
|
|
60
|
+
) -> list[tuple[str, float]]:
|
|
61
|
+
loop = asyncio.get_event_loop()
|
|
62
|
+
return await loop.run_in_executor(
|
|
63
|
+
None,
|
|
64
|
+
_chunk_and_retrieve,
|
|
65
|
+
content.decode("utf-8"),
|
|
66
|
+
query,
|
|
67
|
+
top_k,
|
|
68
|
+
expand_n,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
async def read_and_process(
|
|
73
|
+
file_path: str, query: str, top_k: int | None, expand_n: int | None
|
|
74
|
+
) -> list[str]:
|
|
75
|
+
tasks: list[asyncio.Task[list[tuple[str, float]]]] = []
|
|
76
|
+
async with asyncio.TaskGroup() as tg:
|
|
77
|
+
async with aiofiles.open(file_path, "rb") as f:
|
|
78
|
+
# read 64KB chunks
|
|
79
|
+
while chunk := await f.read(65536):
|
|
80
|
+
tasks.append(
|
|
81
|
+
tg.create_task(process_chunk(chunk, query, top_k, expand_n or 5))
|
|
82
|
+
)
|
|
83
|
+
results = [task.result() for task in tasks]
|
|
84
|
+
flattened = [r for result in results for r in result if r[1] >= 0.5]
|
|
85
|
+
flattened.sort(key=lambda x: x[1], reverse=True)
|
|
86
|
+
n = top_k or 10
|
|
87
|
+
return [f[0] for f in flattened][:n]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def main() -> None:
|
|
91
|
+
parser = argparse.ArgumentParser()
|
|
92
|
+
parser.add_argument("file_path", help="Path to the text file to search")
|
|
93
|
+
parser.add_argument(
|
|
94
|
+
"-q", "--query", help="Keyword-based query to search for", required=True
|
|
95
|
+
)
|
|
96
|
+
parser.add_argument(
|
|
97
|
+
"-k",
|
|
98
|
+
"--top-k",
|
|
99
|
+
help="Top K matches to retrieve. Defaults to 10.",
|
|
100
|
+
required=False,
|
|
101
|
+
type=int,
|
|
102
|
+
default=None,
|
|
103
|
+
)
|
|
104
|
+
parser.add_argument(
|
|
105
|
+
"-e",
|
|
106
|
+
"--expand",
|
|
107
|
+
help="Expand the matched content by N lines (before and after). Defaults to 5",
|
|
108
|
+
required=False,
|
|
109
|
+
type=int,
|
|
110
|
+
default=None,
|
|
111
|
+
)
|
|
112
|
+
args = parser.parse_args()
|
|
113
|
+
results = asyncio.run(
|
|
114
|
+
read_and_process(args.file_path, args.query, args.top_k, args.expand)
|
|
115
|
+
)
|
|
116
|
+
if results:
|
|
117
|
+
separator = "\n" + "─" * 60 + "\n"
|
|
118
|
+
for i, r in enumerate(results):
|
|
119
|
+
print(f"Match #{i}")
|
|
120
|
+
print(r.rstrip("\n").lstrip("\n"))
|
|
121
|
+
if i < len(results) - 1:
|
|
122
|
+
print(separator)
|
|
123
|
+
else:
|
|
124
|
+
print("No relevant matches found")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
if __name__ == "__main__":
|
|
128
|
+
main()
|