claude-code-tools 1.0.6__py3-none-any.whl → 1.4.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- claude_code_tools/__init__.py +1 -1
- claude_code_tools/action_rpc.py +16 -10
- claude_code_tools/aichat.py +793 -51
- claude_code_tools/claude_continue.py +4 -0
- claude_code_tools/codex_continue.py +48 -0
- claude_code_tools/export_session.py +94 -11
- claude_code_tools/find_claude_session.py +36 -12
- claude_code_tools/find_codex_session.py +33 -18
- claude_code_tools/find_session.py +30 -16
- claude_code_tools/gdoc2md.py +220 -0
- claude_code_tools/md2gdoc.py +549 -0
- claude_code_tools/search_index.py +119 -15
- claude_code_tools/session_menu_cli.py +1 -1
- claude_code_tools/session_utils.py +3 -3
- claude_code_tools/smart_trim.py +18 -8
- claude_code_tools/smart_trim_core.py +4 -2
- claude_code_tools/tmux_cli_controller.py +35 -25
- claude_code_tools/trim_session.py +28 -2
- claude_code_tools-1.4.6.dist-info/METADATA +1112 -0
- {claude_code_tools-1.0.6.dist-info → claude_code_tools-1.4.6.dist-info}/RECORD +31 -24
- {claude_code_tools-1.0.6.dist-info → claude_code_tools-1.4.6.dist-info}/entry_points.txt +2 -0
- docs/linked-in-20260102.md +32 -0
- docs/local-llm-setup.md +286 -0
- docs/reddit-aichat-resume-v2.md +80 -0
- docs/reddit-aichat-resume.md +29 -0
- docs/reddit-aichat.md +79 -0
- docs/rollover-details.md +67 -0
- node_ui/action_config.js +3 -3
- node_ui/menu.js +67 -113
- claude_code_tools/session_tui.py +0 -516
- claude_code_tools-1.0.6.dist-info/METADATA +0 -685
- {claude_code_tools-1.0.6.dist-info → claude_code_tools-1.4.6.dist-info}/WHEEL +0 -0
- {claude_code_tools-1.0.6.dist-info → claude_code_tools-1.4.6.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
claude_code_tools/__init__.py,sha256=
|
|
2
|
-
claude_code_tools/action_rpc.py,sha256=
|
|
3
|
-
claude_code_tools/aichat.py,sha256=
|
|
4
|
-
claude_code_tools/claude_continue.py,sha256=
|
|
5
|
-
claude_code_tools/codex_continue.py,sha256=
|
|
1
|
+
claude_code_tools/__init__.py,sha256=lJ_ty7aXHz-OaAsz_ikznt_x0M6lgCfbFZqflfmM_-U,89
|
|
2
|
+
claude_code_tools/action_rpc.py,sha256=6NfWUjt22qqkDKq7ftNH2V9B8VSQycbFx_jDA0UrlJQ,17016
|
|
3
|
+
claude_code_tools/aichat.py,sha256=s8pfRTmXR55J4yNElJdztXnfjFifaJbjrBtVo6NUe-s,95346
|
|
4
|
+
claude_code_tools/claude_continue.py,sha256=GwgKGbTpj4ES615yCadjz0Q6wOb69T76rf_-wPnHix8,11727
|
|
5
|
+
claude_code_tools/codex_continue.py,sha256=_iKNTKigtz6AMHXGfQGhc2JmQaoiktnnWmYXtT2pwsE,11571
|
|
6
6
|
claude_code_tools/config.py,sha256=9v8Xe0f0vqGPsTcFJvmcD0AhmyqiP_4_1DzirLoeqt0,2542
|
|
7
7
|
claude_code_tools/delete_session.py,sha256=noKkwzu6DVYCT6G5icI0PgSwkAKG7Mx7nPthEL-x__U,5819
|
|
8
8
|
claude_code_tools/dotenv_vault.py,sha256=KPI9NDFu5HE6FfhQUYw6RhdR-miN0ScJHsBg0OVG61k,9617
|
|
@@ -10,24 +10,25 @@ claude_code_tools/env_safe.py,sha256=TSSkOjEpzBwNgbeSR-0tR1-pAW_qmbZNmn3fiAsHJ4w
|
|
|
10
10
|
claude_code_tools/export_all.py,sha256=GOWj_5IZrrngeRUsDxbE48cOOZIxo7drZJWZh9QiuHg,9848
|
|
11
11
|
claude_code_tools/export_claude_session.py,sha256=rEJLMcaCMuWbWxs1rfd0LuT6gSmjEsej6nueGrH1ujo,16256
|
|
12
12
|
claude_code_tools/export_codex_session.py,sha256=V2deRcI6FMCEWYAEvvL74XXuW798B1esgTs6PH3_-7E,15650
|
|
13
|
-
claude_code_tools/export_session.py,sha256=
|
|
14
|
-
claude_code_tools/find_claude_session.py,sha256=
|
|
15
|
-
claude_code_tools/find_codex_session.py,sha256=
|
|
13
|
+
claude_code_tools/export_session.py,sha256=I2ncN3lbbrfc8M-3URQVimyM2fAbcu4BXITtCdNfL6E,21860
|
|
14
|
+
claude_code_tools/find_claude_session.py,sha256=QRv6u4T5X9c9QLj-1X8-uYj3wul5YsbFI5LgUUTFMW0,70559
|
|
15
|
+
claude_code_tools/find_codex_session.py,sha256=dpZVek3cJ-se4JMwzGEDkZ50_XvtK6dfP36mo8KDHnI,48177
|
|
16
16
|
claude_code_tools/find_original_session.py,sha256=JlHeati0X1KkPkmz4knvdfCqRHjuJRLfRRcn3ZsuG8o,4120
|
|
17
|
-
claude_code_tools/find_session.py,sha256=
|
|
17
|
+
claude_code_tools/find_session.py,sha256=bc86c4dYUty7x6PfWXCQoBrDNYzuDBb0wk3proFp2LI,38970
|
|
18
18
|
claude_code_tools/find_trimmed_sessions.py,sha256=JvMSetHD4DgXzKDFaZlAndBT_dYaw_lIT02cta55q3I,7435
|
|
19
|
+
claude_code_tools/gdoc2md.py,sha256=J83CZJomHquOBIl15fISqtDyGsmkqqMuRY-nN7-7K1I,6346
|
|
20
|
+
claude_code_tools/md2gdoc.py,sha256=sA6gU2QsWanJpAwfSC6HnSPQuSywv0xUopXSvbRUX_o,17945
|
|
19
21
|
claude_code_tools/node_menu_ui.py,sha256=CQ6PxxNQ5jbLRLYESJ-klLSxSIIuLegU8s-Sj5yRl8Q,12621
|
|
20
|
-
claude_code_tools/search_index.py,sha256=
|
|
22
|
+
claude_code_tools/search_index.py,sha256=_ORSD2E6PF-Gjtzrnvp03KyfGueO5FA3WCzTbg7n208,50557
|
|
21
23
|
claude_code_tools/session_lineage.py,sha256=BYKpAolPGLJUv97-xMXvNFMzgauUVNAsRx8Shw0X_hk,8430
|
|
22
24
|
claude_code_tools/session_menu.py,sha256=5M1AlqhmCWly3r3P1u-GhxWB0_rbGKsKSlIPEgTaN9w,6095
|
|
23
|
-
claude_code_tools/session_menu_cli.py,sha256=
|
|
24
|
-
claude_code_tools/
|
|
25
|
-
claude_code_tools/
|
|
26
|
-
claude_code_tools/
|
|
27
|
-
claude_code_tools/
|
|
28
|
-
claude_code_tools/tmux_cli_controller.py,sha256=47G9sxEOf68-cBkk_y3iWSKnxqgWoiA_L3OaqkJKOlA,34916
|
|
25
|
+
claude_code_tools/session_menu_cli.py,sha256=SnCdm1xyJQAC0ogZ5-PRc8SkAZVKHXYu6mtc0Lp_las,15426
|
|
26
|
+
claude_code_tools/session_utils.py,sha256=s8_hTYfVqg7dcUjwaZJyDAQYNKca-L8VCQrXWNOVXgM,44739
|
|
27
|
+
claude_code_tools/smart_trim.py,sha256=A6PVtBbRA1Uq4ic_co4qSsULNVDp8DgdSLKaP0nDvV8,16385
|
|
28
|
+
claude_code_tools/smart_trim_core.py,sha256=t68mw3qaQFmOPSodcyOX7SR81BJu-WwrkItPNHbob2A,23580
|
|
29
|
+
claude_code_tools/tmux_cli_controller.py,sha256=8pXNKazpEMW0XKy4ohYVdEty3VTxxEvL2f6GkX33qZ4,35524
|
|
29
30
|
claude_code_tools/tmux_remote_controller.py,sha256=eY1ouLtUzJ40Ik4nqUBvc3Gl1Rx0_L4TFW4j708lgvI,9942
|
|
30
|
-
claude_code_tools/trim_session.py,sha256=
|
|
31
|
+
claude_code_tools/trim_session.py,sha256=7x2GtAxoI5H9ta8pomsa02k7WOFr1ra9FhHF5hS__do,27710
|
|
31
32
|
claude_code_tools/trim_session_claude.py,sha256=CtGelBtcKi5txpkkQoupOLSOyPoViAUlv_fjTLULNs8,12272
|
|
32
33
|
claude_code_tools/trim_session_codex.py,sha256=CnrgQzoqL9GeI9xRTmGfmY4x9wft6eChfSG6pFf4diY,12249
|
|
33
34
|
docs/cc-codex-instructions.md,sha256=5E9QotkrcVYIE5VrvJGi-sg7tdyITDrsbhaqBKr4MUk,1109
|
|
@@ -35,12 +36,18 @@ docs/claude-code-chutes.md,sha256=jCnYAAHZm32NGHE0CzGGl3vpO_zlF_xdmr23YxuCjPg,80
|
|
|
35
36
|
docs/claude-code-tmux-tutorials.md,sha256=S-9U3a1AaPEBPo3oKpWuyOfKK7yPFOIu21P_LDfGUJk,7558
|
|
36
37
|
docs/dot-zshrc.md,sha256=DC2fOiGrUlIzol6N_47CW53a4BsnMEvCnhlRRVxFCTc,7160
|
|
37
38
|
docs/find-claude-session.md,sha256=fACbQP0Bj5jqIpNWk0lGDOQQaji-K9Va3gUv2RA47VQ,4284
|
|
39
|
+
docs/linked-in-20260102.md,sha256=wCihbQGGqS-GpQ7z9-q6UObiJBJ8_VfbUufXTvqB6hY,1159
|
|
38
40
|
docs/lmsh.md,sha256=Kf5tKt1lh7eDV-B6mrMi2hsjUMZv1EGfkrsNS29HYBA,2226
|
|
41
|
+
docs/local-llm-setup.md,sha256=JnMF4m1e0s8DZxfB-8S3Y20W74KBMm2RXwBjTK0o27U,7596
|
|
42
|
+
docs/reddit-aichat-resume-v2.md,sha256=Rpq4E-tMDpgjWiSfb-jS50AeUxgdnOJIwDHs7rdLTZw,2980
|
|
43
|
+
docs/reddit-aichat-resume.md,sha256=9Q9Q4Qrp3qSV6z1-qBq7lLAdTX2AvE5df3d0gbO81iI,1104
|
|
44
|
+
docs/reddit-aichat.md,sha256=QfBk9jZn_2c6qjftHcC38ypcEHz68e0YgXMz_FApExg,7117
|
|
39
45
|
docs/reddit-post.md,sha256=ZA7kPoJNi06t6F9JQMBiIOv039ADC9lM8YXFt8UA_Jg,2345
|
|
46
|
+
docs/rollover-details.md,sha256=Cf7POkMTv-G8WzEhSEHvn8MiJcsWn-pbXxU3QMWG5-c,5933
|
|
40
47
|
docs/tmux-cli-instructions.md,sha256=hKGOdaPdBlb5XFzHfi0Mm7CVlysBuJUAfop3GHreyuw,5008
|
|
41
48
|
docs/vault-documentation.md,sha256=5XzNpHyhGU38JU2hKEWEL1gdPq3rC2zBg8yotK4eNF4,3600
|
|
42
|
-
node_ui/action_config.js,sha256=
|
|
43
|
-
node_ui/menu.js,sha256=
|
|
49
|
+
node_ui/action_config.js,sha256=NL9rStyaqrfZAcvN-yb5GJdKXhDtnl9eFjA5nevlIMw,2114
|
|
50
|
+
node_ui/menu.js,sha256=wy8-BnlCI_kNjQ8aG4cSF69YWnft55t9SKDG4mij9Es,77892
|
|
44
51
|
node_ui/package.json,sha256=1XWJ4nNQsrF3B5dgpA7Q74N0UjzkQHOyVzJqDBVYGRg,436
|
|
45
52
|
node_ui/node_modules/.package-lock.json,sha256=y7_WLVliP_6WrPjOCY36dgJNjJXYdtYabUTEeuSnTfU,25226
|
|
46
53
|
node_ui/node_modules/.bin/is-in-ci,sha256=rDmm4QOiAxkC6Qu_oHH8ojMzWqEnIQUffof6t1I0zIg,120
|
|
@@ -1800,8 +1807,8 @@ node_ui/node_modules/yoga-wasm-web/dist/wrapAsm-f766f97f.js,sha256=-82_XGQhP7kkD
|
|
|
1800
1807
|
node_ui/node_modules/yoga-wasm-web/dist/wrapAsm.d.ts,sha256=2l7bSIMruV8KTC2I4XKJBDQx8nsgwVR43q9rvkClpUE,4877
|
|
1801
1808
|
node_ui/node_modules/yoga-wasm-web/dist/yoga.wasm,sha256=R_tPgdJ0kyGEzRnHXtNPkC0T8FGTAVkHiaN_cHeXfic,88658
|
|
1802
1809
|
node_ui/node_modules/yoga-wasm-web/dist/generated/YGEnums.d.ts,sha256=kE3_7yS8iqNd5sMfXtD9B3Tq_JcJkVOQkdwxhch1pI4,8893
|
|
1803
|
-
claude_code_tools-1.
|
|
1804
|
-
claude_code_tools-1.
|
|
1805
|
-
claude_code_tools-1.
|
|
1806
|
-
claude_code_tools-1.
|
|
1807
|
-
claude_code_tools-1.
|
|
1810
|
+
claude_code_tools-1.4.6.dist-info/METADATA,sha256=O2zrPX_UYG6uRdKknR5S0P9ACZJEvOlzyxaCuy89_NU,42998
|
|
1811
|
+
claude_code_tools-1.4.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
1812
|
+
claude_code_tools-1.4.6.dist-info/entry_points.txt,sha256=-hVowB6m8tgqV_dCyzCLbt7vthEDiBxodGMqMvD4F2M,280
|
|
1813
|
+
claude_code_tools-1.4.6.dist-info/licenses/LICENSE,sha256=BBQdOBLdFB3CEPmb3pqxeOThaFCIdsiLzmDANsCHhoM,1073
|
|
1814
|
+
claude_code_tools-1.4.6.dist-info/RECORD,,
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
[console_scripts]
|
|
2
2
|
aichat = claude_code_tools.aichat:main
|
|
3
3
|
env-safe = claude_code_tools.env_safe:main
|
|
4
|
+
gdoc2md = claude_code_tools.gdoc2md:main
|
|
5
|
+
md2gdoc = claude_code_tools.md2gdoc:main
|
|
4
6
|
tmux-cli = claude_code_tools.tmux_cli_controller:main
|
|
5
7
|
vault = claude_code_tools.dotenv_vault:main
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
The top pain for users of Claude Code and similar CLI agents is...
|
|
2
|
+
|
|
3
|
+
Sesesion Continuity: What do you do when you've filled your context window?
|
|
4
|
+
|
|
5
|
+
Compaction? you lose valuable detail that you have to explain all over again.
|
|
6
|
+
|
|
7
|
+
Here's what I do instead, to recover the precise, full context I need, to continue my work:
|
|
8
|
+
|
|
9
|
+
In my Claude Code session, I type ">resume" -- This triggers a hook that copies the current session ID to the clipboard.
|
|
10
|
+
|
|
11
|
+
Then I run:
|
|
12
|
+
|
|
13
|
+
aichat resume <paste-session-id>
|
|
14
|
+
|
|
15
|
+
This launches a TUI that shows a few ways to continue my work: I select
|
|
16
|
+
the "rollover" option: it creates a new session and injects the session log file
|
|
17
|
+
path into the first user message.
|
|
18
|
+
|
|
19
|
+
Then I prompt it to retrieve the exact context I need, or use a slash command /aichat:recover-context
|
|
20
|
+
|
|
21
|
+
This works with Codex-CLI as well, and you can even do cross-agent handoff: start in
|
|
22
|
+
Claude-Code, continue with Codex-CLI or vice versa.
|
|
23
|
+
|
|
24
|
+
The aichat command is one of several productivity tools in my claude-code-tools repo:
|
|
25
|
+
If you'd like to try them out, see the repo for instructions on how to install the suite of tools
|
|
26
|
+
|
|
27
|
+
https://github.com/pchalasani/claude-code-tools
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
|
docs/local-llm-setup.md
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
# Running Claude Code and Codex with Local LLMs
|
|
2
|
+
|
|
3
|
+
This guide covers running **Claude Code** and **OpenAI Codex CLI** with local
|
|
4
|
+
models using [llama.cpp](https://github.com/ggml-org/llama.cpp)'s server:
|
|
5
|
+
|
|
6
|
+
- **Claude Code** uses the Anthropic-compatible `/v1/messages` endpoint
|
|
7
|
+
- **Codex CLI** uses the OpenAI-compatible `/v1/chat/completions` endpoint
|
|
8
|
+
|
|
9
|
+
## Table of Contents
|
|
10
|
+
|
|
11
|
+
- [When to Use Local Models](#when-to-use-local-models)
|
|
12
|
+
- [How It Works](#how-it-works)
|
|
13
|
+
- [Prerequisites](#prerequisites)
|
|
14
|
+
- [Shell Function for Claude Code](#shell-function-for-claude-code)
|
|
15
|
+
- [Model Commands](#model-commands)
|
|
16
|
+
- [Quick Reference](#quick-reference)
|
|
17
|
+
- [Usage](#usage)
|
|
18
|
+
- [Troubleshooting](#troubleshooting)
|
|
19
|
+
- [Using Codex CLI with Local LLMs](#using-codex-cli-with-local-llms)
|
|
20
|
+
|
|
21
|
+
## When to Use Local Models
|
|
22
|
+
|
|
23
|
+
These local models (20B-80B parameters) aren't suited for complex coding tasks
|
|
24
|
+
where frontier models excel, but they're useful for non-coding tasks like
|
|
25
|
+
summarization, answering questions about your private notes, working with
|
|
26
|
+
sensitive documents that can't be sent to external APIs, or high-volume tasks
|
|
27
|
+
where API costs would add up.
|
|
28
|
+
|
|
29
|
+
## How It Works
|
|
30
|
+
|
|
31
|
+
1. **Start llama-server** with a model (see [Model Commands](#model-commands)
|
|
32
|
+
below) - this makes the model available at a local endpoint (e.g., port 8123)
|
|
33
|
+
2. **Run Claude Code** pointing to that endpoint using the `cclocal` helper
|
|
34
|
+
function
|
|
35
|
+
|
|
36
|
+
## Prerequisites
|
|
37
|
+
|
|
38
|
+
- [llama.cpp](https://github.com/ggml-org/llama.cpp) built and `llama-server`
|
|
39
|
+
available in your PATH
|
|
40
|
+
- Sufficient RAM (64GB+ recommended for 30B+ models)
|
|
41
|
+
- Models will be downloaded automatically from HuggingFace on first run
|
|
42
|
+
|
|
43
|
+
## Shell Function for Claude Code
|
|
44
|
+
|
|
45
|
+
At its simplest, connecting Claude Code to a local model is just one line:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
ANTHROPIC_BASE_URL=http://127.0.0.1:8123 claude
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
The helper function below is just a convenience wrapper for this. Add it to your
|
|
52
|
+
`~/.zshrc` or `~/.bashrc`:
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
cclocal() {
|
|
56
|
+
local port=8123
|
|
57
|
+
if [[ "$1" =~ ^[0-9]+$ ]]; then
|
|
58
|
+
port="$1"
|
|
59
|
+
shift
|
|
60
|
+
fi
|
|
61
|
+
(
|
|
62
|
+
export ANTHROPIC_BASE_URL="http://127.0.0.1:${port}"
|
|
63
|
+
claude "$@"
|
|
64
|
+
)
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Usage:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
cclocal # Connect to localhost:8123
|
|
72
|
+
cclocal 8124 # Connect to localhost:8124
|
|
73
|
+
cclocal 8124 --resume abc123 # With additional claude args
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
> [!IMPORTANT]
|
|
77
|
+
> Add this to your `~/.claude/settings.json` to disable telemetry:
|
|
78
|
+
>
|
|
79
|
+
> ```json
|
|
80
|
+
> {
|
|
81
|
+
> // ... other settings ...
|
|
82
|
+
> "env": {
|
|
83
|
+
> "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1"
|
|
84
|
+
> }
|
|
85
|
+
> // ... other settings ...
|
|
86
|
+
> }
|
|
87
|
+
> ```
|
|
88
|
+
>
|
|
89
|
+
> Without this, Claude Code sends telemetry requests to your local server,
|
|
90
|
+
> which returns 404s and retries aggressively—causing ephemeral port exhaustion
|
|
91
|
+
> on macOS and system-wide network failures.
|
|
92
|
+
|
|
93
|
+
## Model Commands
|
|
94
|
+
|
|
95
|
+
### GPT-OSS-20B (Fast, Good Baseline)
|
|
96
|
+
|
|
97
|
+
Uses the built-in preset with optimized settings:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
llama-server --gpt-oss-20b-default --port 8123
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
**Performance:** ~17-38 tok/s generation on M1 Max
|
|
104
|
+
|
|
105
|
+
### Qwen3-30B-A3B
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
llama-server -hf unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF \
|
|
109
|
+
--port 8124 \
|
|
110
|
+
-c 131072 \
|
|
111
|
+
-b 32768 \
|
|
112
|
+
-ub 1024 \
|
|
113
|
+
--parallel 1 \
|
|
114
|
+
--jinja \
|
|
115
|
+
--chat-template-file ~/Git/llama.cpp/models/templates/Qwen3-Coder.jinja
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
**Performance:** ~15-27 tok/s generation on M1 Max
|
|
119
|
+
|
|
120
|
+
### Qwen3-Coder-30B-A3B (Recommended)
|
|
121
|
+
|
|
122
|
+
Uses the built-in preset with Q8_0 quantization (higher quality):
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
llama-server --fim-qwen-30b-default --port 8127
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
Downloads `ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF` automatically on first
|
|
129
|
+
run.
|
|
130
|
+
|
|
131
|
+
### Qwen3-Next-80B-A3B (Better Long Context)
|
|
132
|
+
|
|
133
|
+
Newer SOTA model. Slower generation but performance doesn't degrade as much
|
|
134
|
+
with long contexts:
|
|
135
|
+
|
|
136
|
+
```bash
|
|
137
|
+
llama-server -hf unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF:Q4_K_XL \
|
|
138
|
+
--port 8126 \
|
|
139
|
+
-c 131072 \
|
|
140
|
+
-b 32768 \
|
|
141
|
+
-ub 1024 \
|
|
142
|
+
--parallel 1 \
|
|
143
|
+
--jinja
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
**Performance:** ~5x slower generation than Qwen3-30B-A3B, but better on long
|
|
147
|
+
contexts
|
|
148
|
+
|
|
149
|
+
### Nemotron-3-Nano-30B-A3B (NVIDIA Reasoning Model)
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
llama-server -hf unsloth/Nemotron-3-Nano-30B-A3B-GGUF:Q4_K_XL \
|
|
153
|
+
--port 8125 \
|
|
154
|
+
-c 131072 \
|
|
155
|
+
-b 32768 \
|
|
156
|
+
-ub 1024 \
|
|
157
|
+
--parallel 1 \
|
|
158
|
+
--jinja \
|
|
159
|
+
--chat-template-file ~/Git/llama.cpp/models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja \
|
|
160
|
+
--temp 0.6 \
|
|
161
|
+
--top-p 0.95 \
|
|
162
|
+
--min-p 0.01
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
**Recommended settings (from NVIDIA):**
|
|
166
|
+
|
|
167
|
+
- Tool calling: `temp=0.6`, `top_p=0.95`
|
|
168
|
+
- Reasoning tasks: `temp=1.0`, `top_p=1.0`
|
|
169
|
+
|
|
170
|
+
## Quick Reference
|
|
171
|
+
|
|
172
|
+
| Model | Port | Command |
|
|
173
|
+
|-------|------|---------|
|
|
174
|
+
| GPT-OSS-20B | 8123 | `llama-server --gpt-oss-20b-default --port 8123|
|
|
175
|
+
| Qwen3-30B-A3B | 8124 | See full command above |
|
|
176
|
+
| Nemotron-3-Nano | 8125 | See full command above |
|
|
177
|
+
| Qwen3-Next-80B-A3B | 8126 | See full command above |
|
|
178
|
+
| Qwen3-Coder-30B | 8127 | `llama-server --fim-qwen-30b-default --port 8127 |
|
|
179
|
+
|
|
180
|
+
## Usage
|
|
181
|
+
|
|
182
|
+
1. Start the llama-server with your chosen model (first request will be slow
|
|
183
|
+
while model loads)
|
|
184
|
+
2. In another terminal, run `cclocal <port>` to start Claude Code
|
|
185
|
+
3. Use Claude Code as normal
|
|
186
|
+
|
|
187
|
+
## Notes
|
|
188
|
+
|
|
189
|
+
- First request is slow while the model loads into memory (~10-30 seconds
|
|
190
|
+
depending on model size)
|
|
191
|
+
- Subsequent requests are fast
|
|
192
|
+
- The `/v1/messages` endpoint in llama-server handles Anthropic API translation
|
|
193
|
+
automatically
|
|
194
|
+
- Each model's chat template handles the model-specific prompt formatting
|
|
195
|
+
|
|
196
|
+
## Troubleshooting
|
|
197
|
+
|
|
198
|
+
**"failed to find a memory slot" errors:**
|
|
199
|
+
|
|
200
|
+
Increase context size (`-c`) or reduce parallel slots (`--parallel 1`). Claude
|
|
201
|
+
Code sends large system prompts (~20k+ tokens).
|
|
202
|
+
|
|
203
|
+
**Slow generation:**
|
|
204
|
+
|
|
205
|
+
- Increase batch size: `-b 32768`
|
|
206
|
+
- Reduce parallel slots: `--parallel 1`
|
|
207
|
+
- Check if model is fully loaded in RAM/VRAM
|
|
208
|
+
|
|
209
|
+
**Model not responding correctly:**
|
|
210
|
+
|
|
211
|
+
Ensure you're using the correct chat template for your model. The template
|
|
212
|
+
handles formatting the Anthropic API messages into the model's expected format.
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
# Using Codex CLI with Local LLMs
|
|
217
|
+
|
|
218
|
+
[OpenAI Codex CLI](https://github.com/openai/codex) can also use local models via
|
|
219
|
+
llama-server's OpenAI-compatible `/v1/chat/completions` endpoint.
|
|
220
|
+
|
|
221
|
+
## Configuration
|
|
222
|
+
|
|
223
|
+
Add a local provider to `~/.codex/config.toml`:
|
|
224
|
+
|
|
225
|
+
```toml
|
|
226
|
+
[model_providers.llama-local]
|
|
227
|
+
name = "Local LLM via llama.cpp"
|
|
228
|
+
base_url = "http://localhost:8123/v1"
|
|
229
|
+
wire_api = "chat"
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
For multiple ports (different models), define multiple providers:
|
|
233
|
+
|
|
234
|
+
```toml
|
|
235
|
+
[model_providers.llama-8123]
|
|
236
|
+
name = "Local LLM port 8123"
|
|
237
|
+
base_url = "http://localhost:8123/v1"
|
|
238
|
+
wire_api = "chat"
|
|
239
|
+
|
|
240
|
+
[model_providers.llama-8124]
|
|
241
|
+
name = "Local LLM port 8124"
|
|
242
|
+
base_url = "http://localhost:8124/v1"
|
|
243
|
+
wire_api = "chat"
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## Switching Models at Command Line
|
|
247
|
+
|
|
248
|
+
Use the `--model` flag and `-c` (config) flag to switch models without editing
|
|
249
|
+
the TOML file:
|
|
250
|
+
|
|
251
|
+
```bash
|
|
252
|
+
# Use GPT-OSS-20B on port 8123 (model name is immaterial)
|
|
253
|
+
codex --model gpt-oss-20b -c model_provider=llama-8123
|
|
254
|
+
|
|
255
|
+
# Use Qwen3-30B on port 8124 (model name is immaterial)
|
|
256
|
+
codex --model qwen3-30b -c model_provider=llama-8124
|
|
257
|
+
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
You can also override nested config values with dots:
|
|
261
|
+
|
|
262
|
+
```bash
|
|
263
|
+
codex --model gpt-oss-20b \
|
|
264
|
+
-c model_provider=llama-local \
|
|
265
|
+
-c model_providers.llama-local.base_url="http://localhost:8124/v1"
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## Running llama-server for Codex
|
|
269
|
+
|
|
270
|
+
Use the same llama-server commands as for Claude Code.
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
# GPT-OSS-20B
|
|
275
|
+
llama-server --gpt-oss-20b-default --port 8123
|
|
276
|
+
|
|
277
|
+
# Qwen3-Coder-30B
|
|
278
|
+
llama-server --fim-qwen-30b-default --port 8127
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## Notes
|
|
282
|
+
|
|
283
|
+
- Codex uses the `/v1/chat/completions` endpoint (OpenAI format), not
|
|
284
|
+
`/v1/messages` (Anthropic format)
|
|
285
|
+
- Both endpoints are served by llama-server simultaneously
|
|
286
|
+
- The same model can serve both Claude Code and Codex at the same time
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Reddit Post: aichat resume (v2)
|
|
2
|
+
|
|
3
|
+
**Title:** I don't compact my Claude Code sessions. I chain them.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
Compaction throws away context. I'd rather keep everything and let the
|
|
8
|
+
agent retrieve it when needed.
|
|
9
|
+
|
|
10
|
+
**Core principles:**
|
|
11
|
+
|
|
12
|
+
- **Lossless** — compaction summarizes and discards; I want nothing lost
|
|
13
|
+
- **Searchable** — sessions must be full-text searchable, fast (Claude Code's
|
|
14
|
+
built-in search only matches titles)
|
|
15
|
+
- **Fast** — 50+ sessions in a lineage, thousands of lines each — grep doesn't
|
|
16
|
+
scale, so I built a Tantivy-indexed Rust CLI that returns results in ms
|
|
17
|
+
- **Portable** — hand off between agents: start in Claude Code, continue in
|
|
18
|
+
Codex CLI, or vice versa
|
|
19
|
+
|
|
20
|
+
**The problem with compaction:**
|
|
21
|
+
|
|
22
|
+
When you hit context limits, Claude Code's default is to compact —
|
|
23
|
+
summarize and discard. But summaries lose nuance. That debugging session
|
|
24
|
+
where you finally figured out the race condition? Gone. The architectural
|
|
25
|
+
decision you made three hours ago? Flattened into a sentence.
|
|
26
|
+
|
|
27
|
+
**My approach: session chaining**
|
|
28
|
+
|
|
29
|
+
Instead of compacting, I chain sessions together:
|
|
30
|
+
|
|
31
|
+
1. When context fills up, type `>resume`
|
|
32
|
+
2. Pick a strategy (trim, smart-trim, or rollover)
|
|
33
|
+
3. Start fresh — but with full lineage back to every ancestor session
|
|
34
|
+
|
|
35
|
+
Nothing gets deleted. The agent traces back and pulls context on demand.
|
|
36
|
+
|
|
37
|
+
**Three resume strategies:**
|
|
38
|
+
|
|
39
|
+
| Strategy | What it does | When to use |
|
|
40
|
+
|----------|--------------|-------------|
|
|
41
|
+
| **Trim** | Truncates bloated tool outputs and early messages | Quick fix, frees 30-50% |
|
|
42
|
+
| **Smart trim** | AI decides what's safe to cut | When you want surgical precision |
|
|
43
|
+
| **Rollover** | Fresh session with lineage pointers | Clean slate, full history preserved |
|
|
44
|
+
|
|
45
|
+
**Why Rust + Tantivy?**
|
|
46
|
+
|
|
47
|
+
Session chains get long. You might have 50+ sessions in a lineage, each with
|
|
48
|
+
thousands of lines of conversation. Grepping through JSON files doesn't scale.
|
|
49
|
+
So I built `aichat-search` — a Rust CLI using Tantivy (the engine behind
|
|
50
|
+
Quickwit and other search tools). It indexes sessions on first run, then
|
|
51
|
+
returns results in milliseconds. The agent can search your entire history
|
|
52
|
+
without you waiting.
|
|
53
|
+
|
|
54
|
+
**What you get:**
|
|
55
|
+
|
|
56
|
+
- Fast full-text search across all sessions (Tantivy-indexed, not grep)
|
|
57
|
+
- `/recover-context` command — agent pulls context from parent sessions
|
|
58
|
+
- Session-searcher sub-agent — searches history without polluting your context
|
|
59
|
+
- Cross-agent handoff — start in Claude Code, continue in Codex CLI, or vice versa
|
|
60
|
+
|
|
61
|
+
**Quick demo:** [video in README]
|
|
62
|
+
|
|
63
|
+
**Install:**
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# Install the CLI tools
|
|
67
|
+
uv tool install claude-code-tools
|
|
68
|
+
brew install pchalasani/tap/aichat-search # or: cargo install aichat-search
|
|
69
|
+
|
|
70
|
+
# Add the plugin
|
|
71
|
+
claude plugin marketplace add pchalasani/claude-code-tools
|
|
72
|
+
claude plugin install "aichat@cctools-plugins"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Repo: https://github.com/pchalasani/claude-code-tools
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
Curious how others handle context limits. Do you compact and hope for the
|
|
80
|
+
best, or have you built something similar?
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Reddit Post: aichat resume
|
|
2
|
+
|
|
3
|
+
**Title:** Tool for continuing Claude Code sessions when context fills up
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
If you use Claude Code, you've hit this: context fills up mid-task, and your options are (a) lossy compaction that throws away information, or (b) start fresh and lose the conversation history.
|
|
8
|
+
|
|
9
|
+
I built `aichat resume` to handle this. When you're running low on context:
|
|
10
|
+
|
|
11
|
+
1. Type `>resume` in your session
|
|
12
|
+
2. Quit Claude Code
|
|
13
|
+
3. Run `aichat resume` (session ID is already in clipboard)
|
|
14
|
+
4. Pick a strategy: trim large tool outputs, smart-trim with AI analysis, or rollover to fresh session
|
|
15
|
+
|
|
16
|
+
The key thing: nothing gets lost. All strategies keep pointers to parent sessions, so the agent can look up prior work when needed. You get a chain of linked sessions instead of losing context.
|
|
17
|
+
|
|
18
|
+
Quick demo of the `>resume` trigger: [video in README]
|
|
19
|
+
|
|
20
|
+
Install:
|
|
21
|
+
```
|
|
22
|
+
uv tool install claude-code-tools
|
|
23
|
+
claude plugin marketplace add pchalasani/claude-code-tools
|
|
24
|
+
claude plugin install "aichat@cctools-plugins"
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Repo: https://github.com/pchalasani/claude-code-tools
|
|
28
|
+
|
|
29
|
+
Works with Codex too. Feedback welcome.
|
docs/reddit-aichat.md
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Aichat: Session continuation without compaction, and fast full-text session search for Claude Code and Codex CLI
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
In the [claude-code-tools](https://github.com/pchalasani/claude-code-tools) repo, I
|
|
6
|
+
I've been sharing various tools I've built to improve productivity when working
|
|
7
|
+
with Claude-Code or Codex-CLI. I wanted to share `aichat` command which I use heavily to continue work **without having to compact**.
|
|
8
|
+
|
|
9
|
+
Here is the thought process underlying this tool -- I think knowing the thought process and motivation helps understand what the `aichat` command-group does and why it might be useful to you.
|
|
10
|
+
|
|
11
|
+
### Compaction is lossy: clone the session and truncate long messages
|
|
12
|
+
|
|
13
|
+
Session compaction is **lossy:** there are very often situations where compaction loses important details, so I wanted to find ways to continue my work without compaction. A typical scenario is this -- I am at 90% context usage, and I wish I can go on a bit longer to finish the current work-phase. So I thought,
|
|
14
|
+
|
|
15
|
+
> I wish I could **truncate** some long messages (e.g. tool calls/results for file writes/reads, long assistant responses, etc) and clear out some space to continue my work.
|
|
16
|
+
|
|
17
|
+
This lead to the [`aichat trim`](https://github.com/pchalasani/claude-code-tools#three-resume-strategies) utility. It provides two variants:
|
|
18
|
+
|
|
19
|
+
- a "blind" [`trim`](https://github.com/pchalasani/claude-code-tools#three-resume-strategies) mode that truncates all messages longer than a threshold (default 500 chars), and optionally all-but-recent assistant messages -- all user-configurable. This can free up 40-60% context, depending on what's been going on in the session.
|
|
20
|
+
|
|
21
|
+
- a [`smart-trim`](https://github.com/pchalasani/claude-code-tools#three-resume-strategies) mode that uses a headless Claude/Codex agent to determine which messages can be safely truncated in order to continue the current work. The precise truncation criteria can be customized (e.g. the user may want to continue some prior work rather than the current task).
|
|
22
|
+
|
|
23
|
+
Both of these modes *clone* the current session before truncation, and inject two types of [*lineage*](https://github.com/pchalasani/claude-code-tools#lineage-nothing-is-lost):
|
|
24
|
+
- *Session-lineage* is injected into the first user message: a chronological listing of sessions from which the current session was derived. This allows the (sub-) agent to extract needed context from ancestor sessions, either when prompted by the user, or on its own initiative.
|
|
25
|
+
- Each truncated message also carries a pointer to the specific message index in the parent session so full details can always be looked up if needed.
|
|
26
|
+
|
|
27
|
+
### A cleaner alternative: Start new session with lineage and context summary
|
|
28
|
+
|
|
29
|
+
Session trimming can be a quick way to clear out context in order to continue the current task for a bit longer, but after a couple of trims, does not yield as much benefit. But the lineage-injection lead to a different idea to avoid compaction:
|
|
30
|
+
|
|
31
|
+
> Create a fresh session, inject parent-session lineage into the first user message, along with instructions to extract (using sub-agents if available) context of the latest task from the parent session, or skip context extraction and leave it to the user to extract context once the session starts.
|
|
32
|
+
|
|
33
|
+
This is the idea behind the [`aichat rollover`](https://github.com/pchalasani/claude-code-tools#three-resume-strategies) functionality, which is the variant I use the most frequently, and I use this instead of first trimming a session. I usually choose to skip the summarization (this is the `quick` rollover option in the TUI) so that the new session starts quickly and I can instruct Claude-Code/Codex-CLI to extract needed context (usually from the latest chat session shown in the lineage), as shown in the demo video below.
|
|
34
|
+
|
|
35
|
+
### A hook to simplify continuing work from a session
|
|
36
|
+
|
|
37
|
+
I wanted to make it seamless to pick any of the above three task continuation modes, when inside a Claude Code session, so I set up a `UserPromptSubmit` [hook](https://github.com/pchalasani/claude-code-tools#resume-options) (via the `aichat` plugin) that is triggered when the user types `>resume` (or `>continue` or `>handoff`). When I am close to full context usage, I type `>resume`, and the hook script copies the current session id into the clipboard and shows instructions asking the user to run `aichat resume <pasted-session-id>`; this launches a TUI that offering options to choose one of the above [session resumption modes](https://github.com/pchalasani/claude-code-tools#three-resume-strategies).
|
|
38
|
+
|
|
39
|
+
**Demo video (resume/rollover flow):**
|
|
40
|
+
|
|
41
|
+
https://github.com/user-attachments/assets/310dfa5b-a13b-4a2b-aef8-f73954ef8fe9
|
|
42
|
+
|
|
43
|
+
### Fast full-text session search for humans/agents to find prior work context
|
|
44
|
+
|
|
45
|
+
The above session resumption methods are useful to continue your work from the *current* session, but often you want to continue work that was done in an *older* Claude-Code/Codex-CLI session. This is why I added this:
|
|
46
|
+
|
|
47
|
+
> Super-fast Rust/Tantivy-based [full-text search](https://github.com/pchalasani/claude-code-tools#aichat-search--find-and-select-sessions) of all sessions across Claude-Code and Codex-CLI, with a pleasant self-explanatory TUI for humans, and a CLI mode for Agents to find past work. (The Rust/Tantivy-based search and TUI was inspired by the excellent TUI in the [zippoxer/recall](https://github.com/zippoxer/recall) repo).
|
|
48
|
+
|
|
49
|
+
Users can launch the search TUI using [`aichat search ...`](https://github.com/pchalasani/claude-code-tools#aichat-search--find-and-select-sessions) and (sub-) [agents can run](https://github.com/pchalasani/claude-code-tools#agent-access-to-history-the-session-searcher-sub-agent) `aichat search ... --json` and get results in JSONL format for quick analysis and filtering using `jq` which of course CLI agents are great at using. There is a corresponding *skill* called `session-search` and a *sub-agent* called `session-searcher`, both available via the `aichat` [plugin](https://github.com/pchalasani/claude-code-tools#claude-code-plugins). For example in Claude Code, users can recover context of some older work by simply saying something like:
|
|
50
|
+
|
|
51
|
+
> Use your session-searcher sub-agent to recover the context of how we worked on connecting the Rust search TUI with the node-based Resume Action menus.
|
|
52
|
+
|
|
53
|
+
**Demo GIF (search TUI):**
|
|
54
|
+
|
|
55
|
+

|
|
56
|
+
|
|
57
|
+
---
|
|
58
|
+
|
|
59
|
+
**Links:**
|
|
60
|
+
- GitHub repo: https://github.com/pchalasani/claude-code-tools
|
|
61
|
+
|
|
62
|
+
**Install:**
|
|
63
|
+
```bash
|
|
64
|
+
# Step 1: Python package
|
|
65
|
+
uv tool install claude-code-tools
|
|
66
|
+
|
|
67
|
+
# Step 2: Rust search engine (pick one)
|
|
68
|
+
brew install pchalasani/tap/aichat-search # Homebrew
|
|
69
|
+
cargo install aichat-search # Cargo
|
|
70
|
+
# Or download binary from Releases
|
|
71
|
+
|
|
72
|
+
# Step 3: Claude Code plugins (for >resume hook, session-searcher agent, etc.)
|
|
73
|
+
# From terminal:
|
|
74
|
+
claude plugin marketplace add pchalasani/claude-code-tools
|
|
75
|
+
claude plugin install "aichat@cctools-plugins"
|
|
76
|
+
# Or from within Claude Code:
|
|
77
|
+
/plugin marketplace add pchalasani/claude-code-tools
|
|
78
|
+
/plugin install aichat@cctools-plugins
|
|
79
|
+
```
|