claude-code-tools 1.0.6__py3-none-any.whl → 1.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. claude_code_tools/__init__.py +1 -1
  2. claude_code_tools/action_rpc.py +16 -10
  3. claude_code_tools/aichat.py +793 -51
  4. claude_code_tools/claude_continue.py +4 -0
  5. claude_code_tools/codex_continue.py +48 -0
  6. claude_code_tools/export_session.py +94 -11
  7. claude_code_tools/find_claude_session.py +36 -12
  8. claude_code_tools/find_codex_session.py +33 -18
  9. claude_code_tools/find_session.py +30 -16
  10. claude_code_tools/gdoc2md.py +220 -0
  11. claude_code_tools/md2gdoc.py +549 -0
  12. claude_code_tools/search_index.py +119 -15
  13. claude_code_tools/session_menu_cli.py +1 -1
  14. claude_code_tools/session_utils.py +3 -3
  15. claude_code_tools/smart_trim.py +18 -8
  16. claude_code_tools/smart_trim_core.py +4 -2
  17. claude_code_tools/tmux_cli_controller.py +35 -25
  18. claude_code_tools/trim_session.py +28 -2
  19. claude_code_tools-1.4.6.dist-info/METADATA +1112 -0
  20. {claude_code_tools-1.0.6.dist-info → claude_code_tools-1.4.6.dist-info}/RECORD +31 -24
  21. {claude_code_tools-1.0.6.dist-info → claude_code_tools-1.4.6.dist-info}/entry_points.txt +2 -0
  22. docs/linked-in-20260102.md +32 -0
  23. docs/local-llm-setup.md +286 -0
  24. docs/reddit-aichat-resume-v2.md +80 -0
  25. docs/reddit-aichat-resume.md +29 -0
  26. docs/reddit-aichat.md +79 -0
  27. docs/rollover-details.md +67 -0
  28. node_ui/action_config.js +3 -3
  29. node_ui/menu.js +67 -113
  30. claude_code_tools/session_tui.py +0 -516
  31. claude_code_tools-1.0.6.dist-info/METADATA +0 -685
  32. {claude_code_tools-1.0.6.dist-info → claude_code_tools-1.4.6.dist-info}/WHEEL +0 -0
  33. {claude_code_tools-1.0.6.dist-info → claude_code_tools-1.4.6.dist-info}/licenses/LICENSE +0 -0
@@ -1,8 +1,8 @@
1
- claude_code_tools/__init__.py,sha256=mDa1W6Lim5KxHaei4u-MELWo5jxU5joDYzJeLMiZ_1A,89
2
- claude_code_tools/action_rpc.py,sha256=HO_FWdIon9-xWWKIjJYpsqt-vTKpq0t0CCxo0TNULTI,16653
3
- claude_code_tools/aichat.py,sha256=u6zHDwGg8s27ktWoTYeTrnVy7OA-UBOFzX4fPo0YCWs,66127
4
- claude_code_tools/claude_continue.py,sha256=TsfGtbQ6QlYvk7fWkOSkVzEhPxzTQM8Igle_AHszaS0,11454
5
- claude_code_tools/codex_continue.py,sha256=LO3ocWdw3MVct_GpEvHvN3zsmeD8RRB0P4lYtNRJ3GM,9530
1
+ claude_code_tools/__init__.py,sha256=lJ_ty7aXHz-OaAsz_ikznt_x0M6lgCfbFZqflfmM_-U,89
2
+ claude_code_tools/action_rpc.py,sha256=6NfWUjt22qqkDKq7ftNH2V9B8VSQycbFx_jDA0UrlJQ,17016
3
+ claude_code_tools/aichat.py,sha256=s8pfRTmXR55J4yNElJdztXnfjFifaJbjrBtVo6NUe-s,95346
4
+ claude_code_tools/claude_continue.py,sha256=GwgKGbTpj4ES615yCadjz0Q6wOb69T76rf_-wPnHix8,11727
5
+ claude_code_tools/codex_continue.py,sha256=_iKNTKigtz6AMHXGfQGhc2JmQaoiktnnWmYXtT2pwsE,11571
6
6
  claude_code_tools/config.py,sha256=9v8Xe0f0vqGPsTcFJvmcD0AhmyqiP_4_1DzirLoeqt0,2542
7
7
  claude_code_tools/delete_session.py,sha256=noKkwzu6DVYCT6G5icI0PgSwkAKG7Mx7nPthEL-x__U,5819
8
8
  claude_code_tools/dotenv_vault.py,sha256=KPI9NDFu5HE6FfhQUYw6RhdR-miN0ScJHsBg0OVG61k,9617
@@ -10,24 +10,25 @@ claude_code_tools/env_safe.py,sha256=TSSkOjEpzBwNgbeSR-0tR1-pAW_qmbZNmn3fiAsHJ4w
10
10
  claude_code_tools/export_all.py,sha256=GOWj_5IZrrngeRUsDxbE48cOOZIxo7drZJWZh9QiuHg,9848
11
11
  claude_code_tools/export_claude_session.py,sha256=rEJLMcaCMuWbWxs1rfd0LuT6gSmjEsej6nueGrH1ujo,16256
12
12
  claude_code_tools/export_codex_session.py,sha256=V2deRcI6FMCEWYAEvvL74XXuW798B1esgTs6PH3_-7E,15650
13
- claude_code_tools/export_session.py,sha256=VkIyT_O9G8nFWCZmxDkdISchmkE03zmSUrrrk18oA_w,18888
14
- claude_code_tools/find_claude_session.py,sha256=R-43ysiM9v263Z743PkmleClS2btZpTaMwUIu2PHwXg,69557
15
- claude_code_tools/find_codex_session.py,sha256=6tpERakUzczQvIWZMrzrI3WAtU6CnvXdjsc27pImKiw,47388
13
+ claude_code_tools/export_session.py,sha256=I2ncN3lbbrfc8M-3URQVimyM2fAbcu4BXITtCdNfL6E,21860
14
+ claude_code_tools/find_claude_session.py,sha256=QRv6u4T5X9c9QLj-1X8-uYj3wul5YsbFI5LgUUTFMW0,70559
15
+ claude_code_tools/find_codex_session.py,sha256=dpZVek3cJ-se4JMwzGEDkZ50_XvtK6dfP36mo8KDHnI,48177
16
16
  claude_code_tools/find_original_session.py,sha256=JlHeati0X1KkPkmz4knvdfCqRHjuJRLfRRcn3ZsuG8o,4120
17
- claude_code_tools/find_session.py,sha256=agFcXANkFsBXO-B9PEnzvk4dRfOLkvdvLQMk-biZgJI,38434
17
+ claude_code_tools/find_session.py,sha256=bc86c4dYUty7x6PfWXCQoBrDNYzuDBb0wk3proFp2LI,38970
18
18
  claude_code_tools/find_trimmed_sessions.py,sha256=JvMSetHD4DgXzKDFaZlAndBT_dYaw_lIT02cta55q3I,7435
19
+ claude_code_tools/gdoc2md.py,sha256=J83CZJomHquOBIl15fISqtDyGsmkqqMuRY-nN7-7K1I,6346
20
+ claude_code_tools/md2gdoc.py,sha256=sA6gU2QsWanJpAwfSC6HnSPQuSywv0xUopXSvbRUX_o,17945
19
21
  claude_code_tools/node_menu_ui.py,sha256=CQ6PxxNQ5jbLRLYESJ-klLSxSIIuLegU8s-Sj5yRl8Q,12621
20
- claude_code_tools/search_index.py,sha256=oGChd5dOVk5PI4kZSeR4KxvlLrQhKEhA-cDeXXQxieM,45329
22
+ claude_code_tools/search_index.py,sha256=_ORSD2E6PF-Gjtzrnvp03KyfGueO5FA3WCzTbg7n208,50557
21
23
  claude_code_tools/session_lineage.py,sha256=BYKpAolPGLJUv97-xMXvNFMzgauUVNAsRx8Shw0X_hk,8430
22
24
  claude_code_tools/session_menu.py,sha256=5M1AlqhmCWly3r3P1u-GhxWB0_rbGKsKSlIPEgTaN9w,6095
23
- claude_code_tools/session_menu_cli.py,sha256=D-WfEUAYBTJdzkvP4xpfGEErVDFwx86GJYTdg1IM9zs,15419
24
- claude_code_tools/session_tui.py,sha256=h2g2QvLL7AAOW-MJgr_Kazasp46_m47Y_0R3lSN_uP8,17080
25
- claude_code_tools/session_utils.py,sha256=FHNTcivMv3iTfFcpi-IWf-pRq5lA_zC6foRXXCPafjk,44575
26
- claude_code_tools/smart_trim.py,sha256=kRUhPPJA5fqEBhL_CkJ7ZVuzIN4TDg54-T48PhBZQYE,15847
27
- claude_code_tools/smart_trim_core.py,sha256=hhTid3WxagxtLtGrFPYbqycu-UD6aqeZyludnf4vhFQ,23443
28
- claude_code_tools/tmux_cli_controller.py,sha256=47G9sxEOf68-cBkk_y3iWSKnxqgWoiA_L3OaqkJKOlA,34916
25
+ claude_code_tools/session_menu_cli.py,sha256=SnCdm1xyJQAC0ogZ5-PRc8SkAZVKHXYu6mtc0Lp_las,15426
26
+ claude_code_tools/session_utils.py,sha256=s8_hTYfVqg7dcUjwaZJyDAQYNKca-L8VCQrXWNOVXgM,44739
27
+ claude_code_tools/smart_trim.py,sha256=A6PVtBbRA1Uq4ic_co4qSsULNVDp8DgdSLKaP0nDvV8,16385
28
+ claude_code_tools/smart_trim_core.py,sha256=t68mw3qaQFmOPSodcyOX7SR81BJu-WwrkItPNHbob2A,23580
29
+ claude_code_tools/tmux_cli_controller.py,sha256=8pXNKazpEMW0XKy4ohYVdEty3VTxxEvL2f6GkX33qZ4,35524
29
30
  claude_code_tools/tmux_remote_controller.py,sha256=eY1ouLtUzJ40Ik4nqUBvc3Gl1Rx0_L4TFW4j708lgvI,9942
30
- claude_code_tools/trim_session.py,sha256=gFq-Bspsn0vRQA1h9e_JkI87PbslybcMQqRbdjsuzVA,26459
31
+ claude_code_tools/trim_session.py,sha256=7x2GtAxoI5H9ta8pomsa02k7WOFr1ra9FhHF5hS__do,27710
31
32
  claude_code_tools/trim_session_claude.py,sha256=CtGelBtcKi5txpkkQoupOLSOyPoViAUlv_fjTLULNs8,12272
32
33
  claude_code_tools/trim_session_codex.py,sha256=CnrgQzoqL9GeI9xRTmGfmY4x9wft6eChfSG6pFf4diY,12249
33
34
  docs/cc-codex-instructions.md,sha256=5E9QotkrcVYIE5VrvJGi-sg7tdyITDrsbhaqBKr4MUk,1109
@@ -35,12 +36,18 @@ docs/claude-code-chutes.md,sha256=jCnYAAHZm32NGHE0CzGGl3vpO_zlF_xdmr23YxuCjPg,80
35
36
  docs/claude-code-tmux-tutorials.md,sha256=S-9U3a1AaPEBPo3oKpWuyOfKK7yPFOIu21P_LDfGUJk,7558
36
37
  docs/dot-zshrc.md,sha256=DC2fOiGrUlIzol6N_47CW53a4BsnMEvCnhlRRVxFCTc,7160
37
38
  docs/find-claude-session.md,sha256=fACbQP0Bj5jqIpNWk0lGDOQQaji-K9Va3gUv2RA47VQ,4284
39
+ docs/linked-in-20260102.md,sha256=wCihbQGGqS-GpQ7z9-q6UObiJBJ8_VfbUufXTvqB6hY,1159
38
40
  docs/lmsh.md,sha256=Kf5tKt1lh7eDV-B6mrMi2hsjUMZv1EGfkrsNS29HYBA,2226
41
+ docs/local-llm-setup.md,sha256=JnMF4m1e0s8DZxfB-8S3Y20W74KBMm2RXwBjTK0o27U,7596
42
+ docs/reddit-aichat-resume-v2.md,sha256=Rpq4E-tMDpgjWiSfb-jS50AeUxgdnOJIwDHs7rdLTZw,2980
43
+ docs/reddit-aichat-resume.md,sha256=9Q9Q4Qrp3qSV6z1-qBq7lLAdTX2AvE5df3d0gbO81iI,1104
44
+ docs/reddit-aichat.md,sha256=QfBk9jZn_2c6qjftHcC38ypcEHz68e0YgXMz_FApExg,7117
39
45
  docs/reddit-post.md,sha256=ZA7kPoJNi06t6F9JQMBiIOv039ADC9lM8YXFt8UA_Jg,2345
46
+ docs/rollover-details.md,sha256=Cf7POkMTv-G8WzEhSEHvn8MiJcsWn-pbXxU3QMWG5-c,5933
40
47
  docs/tmux-cli-instructions.md,sha256=hKGOdaPdBlb5XFzHfi0Mm7CVlysBuJUAfop3GHreyuw,5008
41
48
  docs/vault-documentation.md,sha256=5XzNpHyhGU38JU2hKEWEL1gdPq3rC2zBg8yotK4eNF4,3600
42
- node_ui/action_config.js,sha256=bBWObjk2Eni5DZD_I0hT4jitMHvygq1awjeE4jsDrMQ,2105
43
- node_ui/menu.js,sha256=PomYgT_SPRo-GVlHdEJonT-o_WDWx2-Gjs-Ewm709ns,79398
49
+ node_ui/action_config.js,sha256=NL9rStyaqrfZAcvN-yb5GJdKXhDtnl9eFjA5nevlIMw,2114
50
+ node_ui/menu.js,sha256=wy8-BnlCI_kNjQ8aG4cSF69YWnft55t9SKDG4mij9Es,77892
44
51
  node_ui/package.json,sha256=1XWJ4nNQsrF3B5dgpA7Q74N0UjzkQHOyVzJqDBVYGRg,436
45
52
  node_ui/node_modules/.package-lock.json,sha256=y7_WLVliP_6WrPjOCY36dgJNjJXYdtYabUTEeuSnTfU,25226
46
53
  node_ui/node_modules/.bin/is-in-ci,sha256=rDmm4QOiAxkC6Qu_oHH8ojMzWqEnIQUffof6t1I0zIg,120
@@ -1800,8 +1807,8 @@ node_ui/node_modules/yoga-wasm-web/dist/wrapAsm-f766f97f.js,sha256=-82_XGQhP7kkD
1800
1807
  node_ui/node_modules/yoga-wasm-web/dist/wrapAsm.d.ts,sha256=2l7bSIMruV8KTC2I4XKJBDQx8nsgwVR43q9rvkClpUE,4877
1801
1808
  node_ui/node_modules/yoga-wasm-web/dist/yoga.wasm,sha256=R_tPgdJ0kyGEzRnHXtNPkC0T8FGTAVkHiaN_cHeXfic,88658
1802
1809
  node_ui/node_modules/yoga-wasm-web/dist/generated/YGEnums.d.ts,sha256=kE3_7yS8iqNd5sMfXtD9B3Tq_JcJkVOQkdwxhch1pI4,8893
1803
- claude_code_tools-1.0.6.dist-info/METADATA,sha256=zj6WULGphiZnBLoZcrKrbVBDLh5ms3dH1as-xMfftDE,24262
1804
- claude_code_tools-1.0.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
1805
- claude_code_tools-1.0.6.dist-info/entry_points.txt,sha256=vpRVF--rBPKRugPqcMWuNqZBZsHvgB39vxVq7snSUXA,198
1806
- claude_code_tools-1.0.6.dist-info/licenses/LICENSE,sha256=BBQdOBLdFB3CEPmb3pqxeOThaFCIdsiLzmDANsCHhoM,1073
1807
- claude_code_tools-1.0.6.dist-info/RECORD,,
1810
+ claude_code_tools-1.4.6.dist-info/METADATA,sha256=O2zrPX_UYG6uRdKknR5S0P9ACZJEvOlzyxaCuy89_NU,42998
1811
+ claude_code_tools-1.4.6.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
1812
+ claude_code_tools-1.4.6.dist-info/entry_points.txt,sha256=-hVowB6m8tgqV_dCyzCLbt7vthEDiBxodGMqMvD4F2M,280
1813
+ claude_code_tools-1.4.6.dist-info/licenses/LICENSE,sha256=BBQdOBLdFB3CEPmb3pqxeOThaFCIdsiLzmDANsCHhoM,1073
1814
+ claude_code_tools-1.4.6.dist-info/RECORD,,
@@ -1,5 +1,7 @@
1
1
  [console_scripts]
2
2
  aichat = claude_code_tools.aichat:main
3
3
  env-safe = claude_code_tools.env_safe:main
4
+ gdoc2md = claude_code_tools.gdoc2md:main
5
+ md2gdoc = claude_code_tools.md2gdoc:main
4
6
  tmux-cli = claude_code_tools.tmux_cli_controller:main
5
7
  vault = claude_code_tools.dotenv_vault:main
@@ -0,0 +1,32 @@
1
+ The top pain for users of Claude Code and similar CLI agents is...
2
+
3
+ Sesesion Continuity: What do you do when you've filled your context window?
4
+
5
+ Compaction? you lose valuable detail that you have to explain all over again.
6
+
7
+ Here's what I do instead, to recover the precise, full context I need, to continue my work:
8
+
9
+ In my Claude Code session, I type ">resume" -- This triggers a hook that copies the current session ID to the clipboard.
10
+
11
+ Then I run:
12
+
13
+ aichat resume <paste-session-id>
14
+
15
+ This launches a TUI that shows a few ways to continue my work: I select
16
+ the "rollover" option: it creates a new session and injects the session log file
17
+ path into the first user message.
18
+
19
+ Then I prompt it to retrieve the exact context I need, or use a slash command /aichat:recover-context
20
+
21
+ This works with Codex-CLI as well, and you can even do cross-agent handoff: start in
22
+ Claude-Code, continue with Codex-CLI or vice versa.
23
+
24
+ The aichat command is one of several productivity tools in my claude-code-tools repo:
25
+ If you'd like to try them out, see the repo for instructions on how to install the suite of tools
26
+
27
+ https://github.com/pchalasani/claude-code-tools
28
+
29
+
30
+
31
+
32
+
@@ -0,0 +1,286 @@
1
+ # Running Claude Code and Codex with Local LLMs
2
+
3
+ This guide covers running **Claude Code** and **OpenAI Codex CLI** with local
4
+ models using [llama.cpp](https://github.com/ggml-org/llama.cpp)'s server:
5
+
6
+ - **Claude Code** uses the Anthropic-compatible `/v1/messages` endpoint
7
+ - **Codex CLI** uses the OpenAI-compatible `/v1/chat/completions` endpoint
8
+
9
+ ## Table of Contents
10
+
11
+ - [When to Use Local Models](#when-to-use-local-models)
12
+ - [How It Works](#how-it-works)
13
+ - [Prerequisites](#prerequisites)
14
+ - [Shell Function for Claude Code](#shell-function-for-claude-code)
15
+ - [Model Commands](#model-commands)
16
+ - [Quick Reference](#quick-reference)
17
+ - [Usage](#usage)
18
+ - [Troubleshooting](#troubleshooting)
19
+ - [Using Codex CLI with Local LLMs](#using-codex-cli-with-local-llms)
20
+
21
+ ## When to Use Local Models
22
+
23
+ These local models (20B-80B parameters) aren't suited for complex coding tasks
24
+ where frontier models excel, but they're useful for non-coding tasks like
25
+ summarization, answering questions about your private notes, working with
26
+ sensitive documents that can't be sent to external APIs, or high-volume tasks
27
+ where API costs would add up.
28
+
29
+ ## How It Works
30
+
31
+ 1. **Start llama-server** with a model (see [Model Commands](#model-commands)
32
+ below) - this makes the model available at a local endpoint (e.g., port 8123)
33
+ 2. **Run Claude Code** pointing to that endpoint using the `cclocal` helper
34
+ function
35
+
36
+ ## Prerequisites
37
+
38
+ - [llama.cpp](https://github.com/ggml-org/llama.cpp) built and `llama-server`
39
+ available in your PATH
40
+ - Sufficient RAM (64GB+ recommended for 30B+ models)
41
+ - Models will be downloaded automatically from HuggingFace on first run
42
+
43
+ ## Shell Function for Claude Code
44
+
45
+ At its simplest, connecting Claude Code to a local model is just one line:
46
+
47
+ ```bash
48
+ ANTHROPIC_BASE_URL=http://127.0.0.1:8123 claude
49
+ ```
50
+
51
+ The helper function below is just a convenience wrapper for this. Add it to your
52
+ `~/.zshrc` or `~/.bashrc`:
53
+
54
+ ```bash
55
+ cclocal() {
56
+ local port=8123
57
+ if [[ "$1" =~ ^[0-9]+$ ]]; then
58
+ port="$1"
59
+ shift
60
+ fi
61
+ (
62
+ export ANTHROPIC_BASE_URL="http://127.0.0.1:${port}"
63
+ claude "$@"
64
+ )
65
+ }
66
+ ```
67
+
68
+ Usage:
69
+
70
+ ```bash
71
+ cclocal # Connect to localhost:8123
72
+ cclocal 8124 # Connect to localhost:8124
73
+ cclocal 8124 --resume abc123 # With additional claude args
74
+ ```
75
+
76
+ > [!IMPORTANT]
77
+ > Add this to your `~/.claude/settings.json` to disable telemetry:
78
+ >
79
+ > ```json
80
+ > {
81
+ > // ... other settings ...
82
+ > "env": {
83
+ > "CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC": "1"
84
+ > }
85
+ > // ... other settings ...
86
+ > }
87
+ > ```
88
+ >
89
+ > Without this, Claude Code sends telemetry requests to your local server,
90
+ > which returns 404s and retries aggressively—causing ephemeral port exhaustion
91
+ > on macOS and system-wide network failures.
92
+
93
+ ## Model Commands
94
+
95
+ ### GPT-OSS-20B (Fast, Good Baseline)
96
+
97
+ Uses the built-in preset with optimized settings:
98
+
99
+ ```bash
100
+ llama-server --gpt-oss-20b-default --port 8123
101
+ ```
102
+
103
+ **Performance:** ~17-38 tok/s generation on M1 Max
104
+
105
+ ### Qwen3-30B-A3B
106
+
107
+ ```bash
108
+ llama-server -hf unsloth/Qwen3-30B-A3B-Instruct-2507-GGUF \
109
+ --port 8124 \
110
+ -c 131072 \
111
+ -b 32768 \
112
+ -ub 1024 \
113
+ --parallel 1 \
114
+ --jinja \
115
+ --chat-template-file ~/Git/llama.cpp/models/templates/Qwen3-Coder.jinja
116
+ ```
117
+
118
+ **Performance:** ~15-27 tok/s generation on M1 Max
119
+
120
+ ### Qwen3-Coder-30B-A3B (Recommended)
121
+
122
+ Uses the built-in preset with Q8_0 quantization (higher quality):
123
+
124
+ ```bash
125
+ llama-server --fim-qwen-30b-default --port 8127
126
+ ```
127
+
128
+ Downloads `ggml-org/Qwen3-Coder-30B-A3B-Instruct-Q8_0-GGUF` automatically on first
129
+ run.
130
+
131
+ ### Qwen3-Next-80B-A3B (Better Long Context)
132
+
133
+ Newer SOTA model. Slower generation but performance doesn't degrade as much
134
+ with long contexts:
135
+
136
+ ```bash
137
+ llama-server -hf unsloth/Qwen3-Next-80B-A3B-Instruct-GGUF:Q4_K_XL \
138
+ --port 8126 \
139
+ -c 131072 \
140
+ -b 32768 \
141
+ -ub 1024 \
142
+ --parallel 1 \
143
+ --jinja
144
+ ```
145
+
146
+ **Performance:** ~5x slower generation than Qwen3-30B-A3B, but better on long
147
+ contexts
148
+
149
+ ### Nemotron-3-Nano-30B-A3B (NVIDIA Reasoning Model)
150
+
151
+ ```bash
152
+ llama-server -hf unsloth/Nemotron-3-Nano-30B-A3B-GGUF:Q4_K_XL \
153
+ --port 8125 \
154
+ -c 131072 \
155
+ -b 32768 \
156
+ -ub 1024 \
157
+ --parallel 1 \
158
+ --jinja \
159
+ --chat-template-file ~/Git/llama.cpp/models/templates/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16.jinja \
160
+ --temp 0.6 \
161
+ --top-p 0.95 \
162
+ --min-p 0.01
163
+ ```
164
+
165
+ **Recommended settings (from NVIDIA):**
166
+
167
+ - Tool calling: `temp=0.6`, `top_p=0.95`
168
+ - Reasoning tasks: `temp=1.0`, `top_p=1.0`
169
+
170
+ ## Quick Reference
171
+
172
+ | Model | Port | Command |
173
+ |-------|------|---------|
174
+ | GPT-OSS-20B | 8123 | `llama-server --gpt-oss-20b-default --port 8123|
175
+ | Qwen3-30B-A3B | 8124 | See full command above |
176
+ | Nemotron-3-Nano | 8125 | See full command above |
177
+ | Qwen3-Next-80B-A3B | 8126 | See full command above |
178
+ | Qwen3-Coder-30B | 8127 | `llama-server --fim-qwen-30b-default --port 8127 |
179
+
180
+ ## Usage
181
+
182
+ 1. Start the llama-server with your chosen model (first request will be slow
183
+ while model loads)
184
+ 2. In another terminal, run `cclocal <port>` to start Claude Code
185
+ 3. Use Claude Code as normal
186
+
187
+ ## Notes
188
+
189
+ - First request is slow while the model loads into memory (~10-30 seconds
190
+ depending on model size)
191
+ - Subsequent requests are fast
192
+ - The `/v1/messages` endpoint in llama-server handles Anthropic API translation
193
+ automatically
194
+ - Each model's chat template handles the model-specific prompt formatting
195
+
196
+ ## Troubleshooting
197
+
198
+ **"failed to find a memory slot" errors:**
199
+
200
+ Increase context size (`-c`) or reduce parallel slots (`--parallel 1`). Claude
201
+ Code sends large system prompts (~20k+ tokens).
202
+
203
+ **Slow generation:**
204
+
205
+ - Increase batch size: `-b 32768`
206
+ - Reduce parallel slots: `--parallel 1`
207
+ - Check if model is fully loaded in RAM/VRAM
208
+
209
+ **Model not responding correctly:**
210
+
211
+ Ensure you're using the correct chat template for your model. The template
212
+ handles formatting the Anthropic API messages into the model's expected format.
213
+
214
+ ---
215
+
216
+ # Using Codex CLI with Local LLMs
217
+
218
+ [OpenAI Codex CLI](https://github.com/openai/codex) can also use local models via
219
+ llama-server's OpenAI-compatible `/v1/chat/completions` endpoint.
220
+
221
+ ## Configuration
222
+
223
+ Add a local provider to `~/.codex/config.toml`:
224
+
225
+ ```toml
226
+ [model_providers.llama-local]
227
+ name = "Local LLM via llama.cpp"
228
+ base_url = "http://localhost:8123/v1"
229
+ wire_api = "chat"
230
+ ```
231
+
232
+ For multiple ports (different models), define multiple providers:
233
+
234
+ ```toml
235
+ [model_providers.llama-8123]
236
+ name = "Local LLM port 8123"
237
+ base_url = "http://localhost:8123/v1"
238
+ wire_api = "chat"
239
+
240
+ [model_providers.llama-8124]
241
+ name = "Local LLM port 8124"
242
+ base_url = "http://localhost:8124/v1"
243
+ wire_api = "chat"
244
+ ```
245
+
246
+ ## Switching Models at Command Line
247
+
248
+ Use the `--model` flag and `-c` (config) flag to switch models without editing
249
+ the TOML file:
250
+
251
+ ```bash
252
+ # Use GPT-OSS-20B on port 8123 (model name is immaterial)
253
+ codex --model gpt-oss-20b -c model_provider=llama-8123
254
+
255
+ # Use Qwen3-30B on port 8124 (model name is immaterial)
256
+ codex --model qwen3-30b -c model_provider=llama-8124
257
+
258
+ ```
259
+
260
+ You can also override nested config values with dots:
261
+
262
+ ```bash
263
+ codex --model gpt-oss-20b \
264
+ -c model_provider=llama-local \
265
+ -c model_providers.llama-local.base_url="http://localhost:8124/v1"
266
+ ```
267
+
268
+ ## Running llama-server for Codex
269
+
270
+ Use the same llama-server commands as for Claude Code.
271
+
272
+
273
+ ```bash
274
+ # GPT-OSS-20B
275
+ llama-server --gpt-oss-20b-default --port 8123
276
+
277
+ # Qwen3-Coder-30B
278
+ llama-server --fim-qwen-30b-default --port 8127
279
+ ```
280
+
281
+ ## Notes
282
+
283
+ - Codex uses the `/v1/chat/completions` endpoint (OpenAI format), not
284
+ `/v1/messages` (Anthropic format)
285
+ - Both endpoints are served by llama-server simultaneously
286
+ - The same model can serve both Claude Code and Codex at the same time
@@ -0,0 +1,80 @@
1
+ # Reddit Post: aichat resume (v2)
2
+
3
+ **Title:** I don't compact my Claude Code sessions. I chain them.
4
+
5
+ ---
6
+
7
+ Compaction throws away context. I'd rather keep everything and let the
8
+ agent retrieve it when needed.
9
+
10
+ **Core principles:**
11
+
12
+ - **Lossless** — compaction summarizes and discards; I want nothing lost
13
+ - **Searchable** — sessions must be full-text searchable, fast (Claude Code's
14
+ built-in search only matches titles)
15
+ - **Fast** — 50+ sessions in a lineage, thousands of lines each — grep doesn't
16
+ scale, so I built a Tantivy-indexed Rust CLI that returns results in ms
17
+ - **Portable** — hand off between agents: start in Claude Code, continue in
18
+ Codex CLI, or vice versa
19
+
20
+ **The problem with compaction:**
21
+
22
+ When you hit context limits, Claude Code's default is to compact —
23
+ summarize and discard. But summaries lose nuance. That debugging session
24
+ where you finally figured out the race condition? Gone. The architectural
25
+ decision you made three hours ago? Flattened into a sentence.
26
+
27
+ **My approach: session chaining**
28
+
29
+ Instead of compacting, I chain sessions together:
30
+
31
+ 1. When context fills up, type `>resume`
32
+ 2. Pick a strategy (trim, smart-trim, or rollover)
33
+ 3. Start fresh — but with full lineage back to every ancestor session
34
+
35
+ Nothing gets deleted. The agent traces back and pulls context on demand.
36
+
37
+ **Three resume strategies:**
38
+
39
+ | Strategy | What it does | When to use |
40
+ |----------|--------------|-------------|
41
+ | **Trim** | Truncates bloated tool outputs and early messages | Quick fix, frees 30-50% |
42
+ | **Smart trim** | AI decides what's safe to cut | When you want surgical precision |
43
+ | **Rollover** | Fresh session with lineage pointers | Clean slate, full history preserved |
44
+
45
+ **Why Rust + Tantivy?**
46
+
47
+ Session chains get long. You might have 50+ sessions in a lineage, each with
48
+ thousands of lines of conversation. Grepping through JSON files doesn't scale.
49
+ So I built `aichat-search` — a Rust CLI using Tantivy (the engine behind
50
+ Quickwit and other search tools). It indexes sessions on first run, then
51
+ returns results in milliseconds. The agent can search your entire history
52
+ without you waiting.
53
+
54
+ **What you get:**
55
+
56
+ - Fast full-text search across all sessions (Tantivy-indexed, not grep)
57
+ - `/recover-context` command — agent pulls context from parent sessions
58
+ - Session-searcher sub-agent — searches history without polluting your context
59
+ - Cross-agent handoff — start in Claude Code, continue in Codex CLI, or vice versa
60
+
61
+ **Quick demo:** [video in README]
62
+
63
+ **Install:**
64
+
65
+ ```bash
66
+ # Install the CLI tools
67
+ uv tool install claude-code-tools
68
+ brew install pchalasani/tap/aichat-search # or: cargo install aichat-search
69
+
70
+ # Add the plugin
71
+ claude plugin marketplace add pchalasani/claude-code-tools
72
+ claude plugin install "aichat@cctools-plugins"
73
+ ```
74
+
75
+ Repo: https://github.com/pchalasani/claude-code-tools
76
+
77
+ ---
78
+
79
+ Curious how others handle context limits. Do you compact and hope for the
80
+ best, or have you built something similar?
@@ -0,0 +1,29 @@
1
+ # Reddit Post: aichat resume
2
+
3
+ **Title:** Tool for continuing Claude Code sessions when context fills up
4
+
5
+ ---
6
+
7
+ If you use Claude Code, you've hit this: context fills up mid-task, and your options are (a) lossy compaction that throws away information, or (b) start fresh and lose the conversation history.
8
+
9
+ I built `aichat resume` to handle this. When you're running low on context:
10
+
11
+ 1. Type `>resume` in your session
12
+ 2. Quit Claude Code
13
+ 3. Run `aichat resume` (session ID is already in clipboard)
14
+ 4. Pick a strategy: trim large tool outputs, smart-trim with AI analysis, or rollover to fresh session
15
+
16
+ The key thing: nothing gets lost. All strategies keep pointers to parent sessions, so the agent can look up prior work when needed. You get a chain of linked sessions instead of losing context.
17
+
18
+ Quick demo of the `>resume` trigger: [video in README]
19
+
20
+ Install:
21
+ ```
22
+ uv tool install claude-code-tools
23
+ claude plugin marketplace add pchalasani/claude-code-tools
24
+ claude plugin install "aichat@cctools-plugins"
25
+ ```
26
+
27
+ Repo: https://github.com/pchalasani/claude-code-tools
28
+
29
+ Works with Codex too. Feedback welcome.
docs/reddit-aichat.md ADDED
@@ -0,0 +1,79 @@
1
+ # Aichat: Session continuation without compaction, and fast full-text session search for Claude Code and Codex CLI
2
+
3
+
4
+
5
+ In the [claude-code-tools](https://github.com/pchalasani/claude-code-tools) repo, I
6
+ I've been sharing various tools I've built to improve productivity when working
7
+ with Claude-Code or Codex-CLI. I wanted to share `aichat` command which I use heavily to continue work **without having to compact**.
8
+
9
+ Here is the thought process underlying this tool -- I think knowing the thought process and motivation helps understand what the `aichat` command-group does and why it might be useful to you.
10
+
11
+ ### Compaction is lossy: clone the session and truncate long messages
12
+
13
+ Session compaction is **lossy:** there are very often situations where compaction loses important details, so I wanted to find ways to continue my work without compaction. A typical scenario is this -- I am at 90% context usage, and I wish I can go on a bit longer to finish the current work-phase. So I thought,
14
+
15
+ > I wish I could **truncate** some long messages (e.g. tool calls/results for file writes/reads, long assistant responses, etc) and clear out some space to continue my work.
16
+
17
+ This lead to the [`aichat trim`](https://github.com/pchalasani/claude-code-tools#three-resume-strategies) utility. It provides two variants:
18
+
19
+ - a "blind" [`trim`](https://github.com/pchalasani/claude-code-tools#three-resume-strategies) mode that truncates all messages longer than a threshold (default 500 chars), and optionally all-but-recent assistant messages -- all user-configurable. This can free up 40-60% context, depending on what's been going on in the session.
20
+
21
+ - a [`smart-trim`](https://github.com/pchalasani/claude-code-tools#three-resume-strategies) mode that uses a headless Claude/Codex agent to determine which messages can be safely truncated in order to continue the current work. The precise truncation criteria can be customized (e.g. the user may want to continue some prior work rather than the current task).
22
+
23
+ Both of these modes *clone* the current session before truncation, and inject two types of [*lineage*](https://github.com/pchalasani/claude-code-tools#lineage-nothing-is-lost):
24
+ - *Session-lineage* is injected into the first user message: a chronological listing of sessions from which the current session was derived. This allows the (sub-) agent to extract needed context from ancestor sessions, either when prompted by the user, or on its own initiative.
25
+ - Each truncated message also carries a pointer to the specific message index in the parent session so full details can always be looked up if needed.
26
+
27
+ ### A cleaner alternative: Start new session with lineage and context summary
28
+
29
+ Session trimming can be a quick way to clear out context in order to continue the current task for a bit longer, but after a couple of trims, does not yield as much benefit. But the lineage-injection lead to a different idea to avoid compaction:
30
+
31
+ > Create a fresh session, inject parent-session lineage into the first user message, along with instructions to extract (using sub-agents if available) context of the latest task from the parent session, or skip context extraction and leave it to the user to extract context once the session starts.
32
+
33
+ This is the idea behind the [`aichat rollover`](https://github.com/pchalasani/claude-code-tools#three-resume-strategies) functionality, which is the variant I use the most frequently, and I use this instead of first trimming a session. I usually choose to skip the summarization (this is the `quick` rollover option in the TUI) so that the new session starts quickly and I can instruct Claude-Code/Codex-CLI to extract needed context (usually from the latest chat session shown in the lineage), as shown in the demo video below.
34
+
35
+ ### A hook to simplify continuing work from a session
36
+
37
+ I wanted to make it seamless to pick any of the above three task continuation modes, when inside a Claude Code session, so I set up a `UserPromptSubmit` [hook](https://github.com/pchalasani/claude-code-tools#resume-options) (via the `aichat` plugin) that is triggered when the user types `>resume` (or `>continue` or `>handoff`). When I am close to full context usage, I type `>resume`, and the hook script copies the current session id into the clipboard and shows instructions asking the user to run `aichat resume <pasted-session-id>`; this launches a TUI that offering options to choose one of the above [session resumption modes](https://github.com/pchalasani/claude-code-tools#three-resume-strategies).
38
+
39
+ **Demo video (resume/rollover flow):**
40
+
41
+ https://github.com/user-attachments/assets/310dfa5b-a13b-4a2b-aef8-f73954ef8fe9
42
+
43
+ ### Fast full-text session search for humans/agents to find prior work context
44
+
45
+ The above session resumption methods are useful to continue your work from the *current* session, but often you want to continue work that was done in an *older* Claude-Code/Codex-CLI session. This is why I added this:
46
+
47
+ > Super-fast Rust/Tantivy-based [full-text search](https://github.com/pchalasani/claude-code-tools#aichat-search--find-and-select-sessions) of all sessions across Claude-Code and Codex-CLI, with a pleasant self-explanatory TUI for humans, and a CLI mode for Agents to find past work. (The Rust/Tantivy-based search and TUI was inspired by the excellent TUI in the [zippoxer/recall](https://github.com/zippoxer/recall) repo).
48
+
49
+ Users can launch the search TUI using [`aichat search ...`](https://github.com/pchalasani/claude-code-tools#aichat-search--find-and-select-sessions) and (sub-) [agents can run](https://github.com/pchalasani/claude-code-tools#agent-access-to-history-the-session-searcher-sub-agent) `aichat search ... --json` and get results in JSONL format for quick analysis and filtering using `jq` which of course CLI agents are great at using. There is a corresponding *skill* called `session-search` and a *sub-agent* called `session-searcher`, both available via the `aichat` [plugin](https://github.com/pchalasani/claude-code-tools#claude-code-plugins). For example in Claude Code, users can recover context of some older work by simply saying something like:
50
+
51
+ > Use your session-searcher sub-agent to recover the context of how we worked on connecting the Rust search TUI with the node-based Resume Action menus.
52
+
53
+ **Demo GIF (search TUI):**
54
+
55
+ ![aichat search demo](https://raw.githubusercontent.com/pchalasani/claude-code-tools/main/demos/aichat-search-asciinema.gif)
56
+
57
+ ---
58
+
59
+ **Links:**
60
+ - GitHub repo: https://github.com/pchalasani/claude-code-tools
61
+
62
+ **Install:**
63
+ ```bash
64
+ # Step 1: Python package
65
+ uv tool install claude-code-tools
66
+
67
+ # Step 2: Rust search engine (pick one)
68
+ brew install pchalasani/tap/aichat-search # Homebrew
69
+ cargo install aichat-search # Cargo
70
+ # Or download binary from Releases
71
+
72
+ # Step 3: Claude Code plugins (for >resume hook, session-searcher agent, etc.)
73
+ # From terminal:
74
+ claude plugin marketplace add pchalasani/claude-code-tools
75
+ claude plugin install "aichat@cctools-plugins"
76
+ # Or from within Claude Code:
77
+ /plugin marketplace add pchalasani/claude-code-tools
78
+ /plugin install aichat@cctools-plugins
79
+ ```