kon-coding-agent 0.3.3__tar.gz → 0.3.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/PKG-INFO +1 -1
- kon_coding_agent-0.3.4/docs/local-models.md +80 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/pyproject.toml +1 -1
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/openai_completions.py +24 -9
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/session.py +4 -4
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/app.py +1 -1
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_openai_compat.py +108 -1
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/uv.lock +1 -1
- kon_coding_agent-0.3.3/docs/local-models.md +0 -42
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.gitignore +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.kon/skills/kon-release-publish/SKILL.md +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.kon/skills/kon-tmux-test/SKILL.md +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.kon/skills/kon-tmux-test/run-e2e-tests.sh +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.kon/skills/kon-tmux-test/setup-test-project.sh +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.python-version +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/AGENTS.md +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/CHANGELOG.md +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/LICENSE +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/README.md +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/docs/architecture-review.md +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/docs/images/kon-screenshot.png +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/scripts/show_themes.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/__init__.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/async_utils.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/builtin_skills/init/SKILL.md +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/config.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/__init__.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/_xml.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/agent_mds.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/git.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/loader.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/skills.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/core/__init__.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/core/compaction.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/core/handoff.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/core/types.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/defaults/__init__.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/defaults/config.toml +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/events.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/__init__.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/base.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/models.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/oauth/__init__.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/oauth/copilot.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/oauth/openai.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/__init__.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/anthropic.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/azure_ai_foundry.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/copilot.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/copilot_anthropic.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/github_copilot_headers.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/mock.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/openai_codex_responses.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/openai_compat.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/openai_responses.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/sanitize.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/loop.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/permissions.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/py.typed +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/themes.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/__init__.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/_read_image.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/_tool_utils.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/base.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/bash.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/edit.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/find.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/grep.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/read.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/web_fetch.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/web_search.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/write.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools_manager.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/turn.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/__init__.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/app_protocol.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/autocomplete.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/blocks.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/chat.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/clipboard.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/commands.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/export.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/floating_list.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/formatting.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/input.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/path_complete.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/prompt_history.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/selection_mode.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/session_ui.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/styles.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/widgets.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/update_check.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/conftest.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/context/test_agents.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/context/test_skills.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/__init__.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_anthropic_provider.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_azure_ai_foundry_provider.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_mock_provider.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_openai_codex_provider_errors.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_openai_oauth.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_agentic_loop.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_cli_auth_flags.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_cli_provider_resolution.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_compaction.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_config_binaries.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_config_error_fallback.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_config_injection.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_config_migration.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_handoff.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_handoff_link_interrupt.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_launch_warnings.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_local_auth_config.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_model_provider_resolution.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_permissions.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_session_persistence.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_session_resume.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_system_prompt.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_system_prompt_git_context.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_tools_manager.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_update_check.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_update_notice_behavior.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_diff.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_edit.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_edit_display.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_read.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_read_image.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_read_image_integration.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_subprocess_cancellation.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_write.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_autocomplete.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_floating_list.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_input_handoff.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_input_paste.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_prompt_history.py +0 -0
- {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_status_line.py +0 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Local Models
|
|
2
|
+
|
|
3
|
+
This document provides detailed information about running and configuring local models with Kon.
|
|
4
|
+
|
|
5
|
+
## Tested Models
|
|
6
|
+
|
|
7
|
+
> Tested on llama server build b8740
|
|
8
|
+
|
|
9
|
+
| Model | Quantization | Context Length | TPS | System Specs |
|
|
10
|
+
| ----- | -------------- | -------------- | --- | ------------ |
|
|
11
|
+
| `zai-org/glm-4.7-flash` | Q4_K_M | 65,536 | N/A | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
|
|
12
|
+
| `unsloth/Qwen3.5-27B-GGUF` | Q4_K_M | 32,768 | ~30 | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
|
|
13
|
+
| `unsloth/gemma-4-26B-A4B-it-GGUF` | UD-Q4_K_M | 32,768 | ~100 | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
|
|
14
|
+
|
|
15
|
+
Run Qwen3.5 27B on an RTX 3090 with a 32k context window using llama-server:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
/path-to-llama-server/llama-server \
|
|
19
|
+
--model /path-to-model/Qwen3.5-27B-Q4_K_M.gguf \
|
|
20
|
+
--port 5000 \
|
|
21
|
+
--ctx-size 32768 \
|
|
22
|
+
--gpu-layers all \
|
|
23
|
+
--threads 8 \
|
|
24
|
+
--threads-batch 8 \
|
|
25
|
+
--batch-size 1024 \
|
|
26
|
+
--ubatch-size 512 \
|
|
27
|
+
--flash-attn on
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
On this machine, that setup generates at roughly 30 tokens per second.
|
|
31
|
+
|
|
32
|
+
Then start Kon for a one-off local session:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
kon --model unsloth/Qwen3.5-27B-GGUF --provider openai \
|
|
36
|
+
--base-url http://localhost:5000/v1 \
|
|
37
|
+
--openai-compat-auth none
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Run Gemma 4 26B A4B on the same machine using llama-server:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
/path-to-llama-server/llama-server \
|
|
44
|
+
--model /path-to-model/gemma-4-26B-A4B-it-UD-Q4_K_M.gguf \
|
|
45
|
+
--port 5000 \
|
|
46
|
+
--ctx-size 32768 \
|
|
47
|
+
--gpu-layers all \
|
|
48
|
+
--threads 8 \
|
|
49
|
+
--threads-batch 8 \
|
|
50
|
+
--batch-size 1024 \
|
|
51
|
+
--ubatch-size 512 \
|
|
52
|
+
--flash-attn on \
|
|
53
|
+
--temperature 1.5
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Then start Kon against that local server:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
kon --model unsloth/gemma-4-26B-A4B-it-GGUF --provider openai \
|
|
60
|
+
--base-url http://localhost:5000/v1 \
|
|
61
|
+
--openai-compat-auth none
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
To avoid passing provider, model, and auth flags every time you start Kon, you can define your local setup in `~/.kon/config.toml`. This also allows you to tune compaction to trigger at a specific point relative to your model's context window.
|
|
65
|
+
|
|
66
|
+
If this is your default setup, put it in `~/.kon/config.toml` instead:
|
|
67
|
+
|
|
68
|
+
```toml
|
|
69
|
+
[llm]
|
|
70
|
+
default_provider = "openai"
|
|
71
|
+
default_model = "unsloth/gemma-4-26B-A4B-it-GGUF"
|
|
72
|
+
default_base_url = "http://localhost:5000/v1"
|
|
73
|
+
|
|
74
|
+
[llm.auth]
|
|
75
|
+
openai_compat = "none" # or "auto"
|
|
76
|
+
|
|
77
|
+
[compaction]
|
|
78
|
+
# Set this close to your model's context size (e.g., 30000 for a 32k window)
|
|
79
|
+
buffer_tokens = 27768 # 32768 - 5000 (safety margin)
|
|
80
|
+
```
|
{kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/openai_completions.py
RENAMED
|
@@ -30,7 +30,7 @@ from ...core.types import (
|
|
|
30
30
|
Usage,
|
|
31
31
|
UserMessage,
|
|
32
32
|
)
|
|
33
|
-
from ..base import BaseProvider, LLMStream, ProviderConfig, resolve_api_key
|
|
33
|
+
from ..base import BaseProvider, LLMStream, ProviderConfig, is_local_base_url, resolve_api_key
|
|
34
34
|
from .openai_compat import supports_developer_role
|
|
35
35
|
from .sanitize import sanitize_surrogates
|
|
36
36
|
|
|
@@ -41,12 +41,13 @@ class OpenAICompletionsCompat:
|
|
|
41
41
|
supports_developer_role: bool = True
|
|
42
42
|
supports_reasoning_effort: bool = True
|
|
43
43
|
max_tokens_field: Literal["max_tokens", "max_completion_tokens"] = "max_completion_tokens"
|
|
44
|
-
thinking_format: Literal["openai", "zai", "qwen"] = "openai"
|
|
44
|
+
thinking_format: Literal["openai", "zai", "qwen", "llama_gemma"] = "openai"
|
|
45
45
|
|
|
46
46
|
|
|
47
|
-
def _detect_compat(provider: str, base_url: str) -> OpenAICompletionsCompat:
|
|
47
|
+
def _detect_compat(provider: str, base_url: str, model: str = "") -> OpenAICompletionsCompat:
|
|
48
48
|
normalized_provider = provider.lower()
|
|
49
49
|
normalized_base_url = base_url.lower()
|
|
50
|
+
normalized_model = model.lower()
|
|
50
51
|
is_zai = (
|
|
51
52
|
normalized_provider == "zai"
|
|
52
53
|
or normalized_provider == "zhipu"
|
|
@@ -61,6 +62,13 @@ def _detect_compat(provider: str, base_url: str) -> OpenAICompletionsCompat:
|
|
|
61
62
|
thinking_format="zai",
|
|
62
63
|
)
|
|
63
64
|
|
|
65
|
+
if is_local_base_url(base_url) and "gemma" in normalized_model:
|
|
66
|
+
return OpenAICompletionsCompat(
|
|
67
|
+
supports_developer_role=supports_developer_role(provider, base_url),
|
|
68
|
+
supports_reasoning_effort=False,
|
|
69
|
+
thinking_format="llama_gemma",
|
|
70
|
+
)
|
|
71
|
+
|
|
64
72
|
return OpenAICompletionsCompat(
|
|
65
73
|
supports_developer_role=supports_developer_role(provider, base_url)
|
|
66
74
|
)
|
|
@@ -92,7 +100,9 @@ class OpenAICompletionsProvider(BaseProvider):
|
|
|
92
100
|
'or configure llm.auth.openai_compat = "auto"/"none" for local endpoints.'
|
|
93
101
|
)
|
|
94
102
|
self._client = AsyncOpenAI(api_key=api_key, base_url=config.base_url)
|
|
95
|
-
self._compat = _detect_compat(
|
|
103
|
+
self._compat = _detect_compat(
|
|
104
|
+
config.provider or "", config.base_url or "", config.model or ""
|
|
105
|
+
)
|
|
96
106
|
|
|
97
107
|
async def _stream_impl(
|
|
98
108
|
self,
|
|
@@ -138,7 +148,7 @@ class OpenAICompletionsProvider(BaseProvider):
|
|
|
138
148
|
if compat.thinking_format == "zai":
|
|
139
149
|
if thinking_level and thinking_level != "none":
|
|
140
150
|
extra_body["thinking"] = {"type": "enabled"}
|
|
141
|
-
elif compat.thinking_format
|
|
151
|
+
elif compat.thinking_format in {"qwen", "llama_gemma"}:
|
|
142
152
|
extra_body["enable_thinking"] = bool(thinking_level and thinking_level != "none")
|
|
143
153
|
elif (
|
|
144
154
|
self.supports_reasoning_effort
|
|
@@ -238,11 +248,16 @@ class OpenAICompletionsProvider(BaseProvider):
|
|
|
238
248
|
|
|
239
249
|
if system_prompt:
|
|
240
250
|
role = "developer" if (compat and compat.supports_developer_role) else "system"
|
|
251
|
+
prompt_content = sanitize_surrogates(system_prompt)
|
|
252
|
+
if (
|
|
253
|
+
compat
|
|
254
|
+
and compat.thinking_format == "llama_gemma"
|
|
255
|
+
and self.config.thinking_level != "none"
|
|
256
|
+
and not prompt_content.startswith("<|think|>")
|
|
257
|
+
):
|
|
258
|
+
prompt_content = "<|think|>" + prompt_content
|
|
241
259
|
result.append(
|
|
242
|
-
cast(
|
|
243
|
-
ChatCompletionMessageParam,
|
|
244
|
-
{"role": role, "content": sanitize_surrogates(system_prompt)},
|
|
245
|
-
)
|
|
260
|
+
cast(ChatCompletionMessageParam, {"role": role, "content": prompt_content})
|
|
246
261
|
)
|
|
247
262
|
|
|
248
263
|
pending_images: list[ImageContent] = []
|
|
@@ -226,7 +226,7 @@ class Session:
|
|
|
226
226
|
self._flushed = True
|
|
227
227
|
self._persisted_entries_count = len(self._entries)
|
|
228
228
|
else:
|
|
229
|
-
with open(self._session_file, "a") as f:
|
|
229
|
+
with open(self._session_file, "a", encoding="utf-8") as f:
|
|
230
230
|
f.write(entry.model_dump_json() + "\n")
|
|
231
231
|
self._persisted_entries_count += 1
|
|
232
232
|
|
|
@@ -236,7 +236,7 @@ class Session:
|
|
|
236
236
|
|
|
237
237
|
self._session_file.parent.mkdir(parents=True, exist_ok=True)
|
|
238
238
|
|
|
239
|
-
with open(self._session_file, "w") as f:
|
|
239
|
+
with open(self._session_file, "w", encoding="utf-8") as f:
|
|
240
240
|
if self._header:
|
|
241
241
|
f.write(self._header.model_dump_json() + "\n")
|
|
242
242
|
for entry in self._entries:
|
|
@@ -469,7 +469,7 @@ class Session:
|
|
|
469
469
|
header: SessionHeader | None = None
|
|
470
470
|
entries: list[SessionEntry] = []
|
|
471
471
|
|
|
472
|
-
with open(path) as f:
|
|
472
|
+
with open(path, encoding="utf-8") as f:
|
|
473
473
|
for line in f:
|
|
474
474
|
line = line.strip()
|
|
475
475
|
if not line:
|
|
@@ -605,7 +605,7 @@ class Session:
|
|
|
605
605
|
message_count = 0
|
|
606
606
|
first_message = ""
|
|
607
607
|
|
|
608
|
-
with open(path) as f:
|
|
608
|
+
with open(path, encoding="utf-8") as f:
|
|
609
609
|
for line in f:
|
|
610
610
|
line = line.strip()
|
|
611
611
|
if not line:
|
|
@@ -94,7 +94,7 @@ _CHANGELOG_URL = "https://github.com/0xku/kon/blob/main/CHANGELOG.md"
|
|
|
94
94
|
try:
|
|
95
95
|
VERSION = version(_PYPI_PACKAGE_NAME)
|
|
96
96
|
except PackageNotFoundError:
|
|
97
|
-
VERSION = "0.3.
|
|
97
|
+
VERSION = "0.3.4"
|
|
98
98
|
|
|
99
99
|
_COPILOT_API_TYPES: frozenset[ApiType] = frozenset(
|
|
100
100
|
{ApiType.GITHUB_COPILOT, ApiType.GITHUB_COPILOT_RESPONSES, ApiType.ANTHROPIC_COPILOT}
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
+
from typing import Any, cast
|
|
2
|
+
|
|
1
3
|
import pytest
|
|
2
4
|
|
|
3
5
|
from kon.llm.base import ProviderConfig, is_local_base_url, resolve_api_key
|
|
4
6
|
from kon.llm.providers.openai_codex_responses import OpenAICodexResponsesProvider
|
|
5
7
|
from kon.llm.providers.openai_compat import supports_developer_role
|
|
6
|
-
from kon.llm.providers.openai_completions import _detect_compat
|
|
8
|
+
from kon.llm.providers.openai_completions import OpenAICompletionsProvider, _detect_compat
|
|
7
9
|
from kon.llm.providers.openai_responses import OpenAIResponsesProvider
|
|
8
10
|
|
|
9
11
|
|
|
@@ -26,6 +28,111 @@ def test_detect_compat_disables_developer_role_for_local_api() -> None:
|
|
|
26
28
|
assert compat.supports_reasoning_effort is True
|
|
27
29
|
|
|
28
30
|
|
|
31
|
+
def test_detect_compat_uses_llama_gemma_for_local_gemma_models() -> None:
|
|
32
|
+
compat = _detect_compat(
|
|
33
|
+
"openai", "http://127.0.0.1:1234/v1", "unsloth/gemma-4-26B-A4B-it-GGUF"
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
assert compat.supports_developer_role is False
|
|
37
|
+
assert compat.supports_reasoning_effort is False
|
|
38
|
+
assert compat.thinking_format == "llama_gemma"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_openai_completions_prefixes_think_token_for_local_gemma() -> None:
|
|
42
|
+
provider = OpenAICompletionsProvider(
|
|
43
|
+
ProviderConfig(
|
|
44
|
+
api_key="test-key",
|
|
45
|
+
base_url="http://127.0.0.1:1234/v1",
|
|
46
|
+
model="unsloth/gemma-4-26B-A4B-it-GGUF",
|
|
47
|
+
provider="openai",
|
|
48
|
+
thinking_level="medium",
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
messages = provider._convert_messages([], "You are helpful", provider._compat)
|
|
53
|
+
|
|
54
|
+
assert messages[0]["role"] == "system"
|
|
55
|
+
assert messages[0]["content"] == "<|think|>You are helpful"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def test_openai_completions_does_not_prefix_think_token_when_thinking_disabled() -> None:
|
|
59
|
+
provider = OpenAICompletionsProvider(
|
|
60
|
+
ProviderConfig(
|
|
61
|
+
api_key="test-key",
|
|
62
|
+
base_url="http://127.0.0.1:1234/v1",
|
|
63
|
+
model="unsloth/gemma-4-26B-A4B-it-GGUF",
|
|
64
|
+
provider="openai",
|
|
65
|
+
thinking_level="none",
|
|
66
|
+
)
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
messages = provider._convert_messages([], "You are helpful", provider._compat)
|
|
70
|
+
|
|
71
|
+
assert messages[0]["role"] == "system"
|
|
72
|
+
assert messages[0]["content"] == "You are helpful"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def test_openai_completions_uses_developer_without_think_prefix_for_openai_api() -> None:
|
|
76
|
+
provider = OpenAICompletionsProvider(
|
|
77
|
+
ProviderConfig(
|
|
78
|
+
api_key="test-key",
|
|
79
|
+
base_url="https://api.openai.com/v1",
|
|
80
|
+
model="gpt-5",
|
|
81
|
+
provider="openai",
|
|
82
|
+
thinking_level="medium",
|
|
83
|
+
)
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
messages = provider._convert_messages([], "You are helpful", provider._compat)
|
|
87
|
+
|
|
88
|
+
assert messages[0]["role"] == "developer"
|
|
89
|
+
assert messages[0]["content"] == "You are helpful"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class _EmptyAsyncIterator:
|
|
93
|
+
def __aiter__(self):
|
|
94
|
+
return self
|
|
95
|
+
|
|
96
|
+
async def __anext__(self):
|
|
97
|
+
raise StopAsyncIteration
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class _DummyChatCompletions:
|
|
101
|
+
def __init__(self) -> None:
|
|
102
|
+
self.calls: list[dict[str, Any]] = []
|
|
103
|
+
|
|
104
|
+
async def create(self, **kwargs):
|
|
105
|
+
self.calls.append(kwargs)
|
|
106
|
+
return _EmptyAsyncIterator()
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
@pytest.mark.asyncio
|
|
110
|
+
async def test_openai_completions_sends_enable_thinking_for_local_gemma() -> None:
|
|
111
|
+
provider = OpenAICompletionsProvider(
|
|
112
|
+
ProviderConfig(
|
|
113
|
+
api_key="test-key",
|
|
114
|
+
base_url="http://127.0.0.1:1234/v1",
|
|
115
|
+
model="unsloth/gemma-4-26B-A4B-it-GGUF",
|
|
116
|
+
provider="openai",
|
|
117
|
+
thinking_level="medium",
|
|
118
|
+
)
|
|
119
|
+
)
|
|
120
|
+
dummy_chat = _DummyChatCompletions()
|
|
121
|
+
provider._client = cast(
|
|
122
|
+
Any,
|
|
123
|
+
type("DummyClient", (), {"chat": type("DummyChat", (), {"completions": dummy_chat})()})(),
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
stream = await provider._stream_impl(messages=[], system_prompt="You are helpful")
|
|
127
|
+
async for _ in stream:
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
kwargs = dummy_chat.calls[0]
|
|
131
|
+
assert kwargs["extra_body"] == {"enable_thinking": True}
|
|
132
|
+
assert "reasoning_effort" not in kwargs
|
|
133
|
+
assert kwargs["messages"][0]["content"] == "<|think|>You are helpful"
|
|
134
|
+
|
|
135
|
+
|
|
29
136
|
def test_openai_responses_uses_system_for_local_api() -> None:
|
|
30
137
|
provider = OpenAIResponsesProvider(
|
|
31
138
|
ProviderConfig(
|
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
# Local Models
|
|
2
|
-
|
|
3
|
-
This document provides detailed information about running and configuring local models with Kon.
|
|
4
|
-
|
|
5
|
-
## Tested Models
|
|
6
|
-
|
|
7
|
-
| Model | Quantization | Context Length | TPS | System Specs |
|
|
8
|
-
| ----- | -------------- | -------------- | --- | ------------ |
|
|
9
|
-
| `qwen/qwen3-coder-next` | Q4_K_M | 65,536 | N/A | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
|
|
10
|
-
| `zai-org/glm-4.7-flash` | Q4_K_M | 65,536 | N/A | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
|
|
11
|
-
| `unsloth/Qwen3.5-9B-GGUF` | Q4_K_M | 65,536 | N/A | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
|
|
12
|
-
|
|
13
|
-
Run a local model using llama-server with the following command:
|
|
14
|
-
|
|
15
|
-
```bash
|
|
16
|
-
./llama-server \
|
|
17
|
-
--hf-repo unsloth/Qwen3.5-9B-GGUF \
|
|
18
|
-
--hf-file Qwen3.5-9B-Q4_K_M.gguf \
|
|
19
|
-
--port 5000 \
|
|
20
|
-
-c 65536
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
Then start Kon for a one-off local session:
|
|
24
|
-
|
|
25
|
-
```bash
|
|
26
|
-
kon --model unsloth/Qwen3.5-9B-GGUF --provider openai \
|
|
27
|
-
--base-url http://localhost:5000/v1 \
|
|
28
|
-
--openai-compat-auth none
|
|
29
|
-
```
|
|
30
|
-
|
|
31
|
-
If this is your default setup, put it in `~/.kon/config.toml` instead:
|
|
32
|
-
|
|
33
|
-
```toml
|
|
34
|
-
[llm]
|
|
35
|
-
default_provider = "openai"
|
|
36
|
-
default_model = "unsloth/Qwen3.5-9B-GGUF"
|
|
37
|
-
default_base_url = "http://localhost:5000/v1"
|
|
38
|
-
|
|
39
|
-
[llm.auth]
|
|
40
|
-
openai_compat = "auto" # or "none" to always inject a placeholder key
|
|
41
|
-
```
|
|
42
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.kon/skills/kon-tmux-test/run-e2e-tests.sh
RENAMED
|
File without changes
|
{kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.kon/skills/kon-tmux-test/setup-test-project.sh
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/copilot_anthropic.py
RENAMED
|
File without changes
|
{kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/github_copilot_headers.py
RENAMED
|
File without changes
|
|
File without changes
|
{kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/openai_codex_responses.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_azure_ai_foundry_provider.py
RENAMED
|
File without changes
|
|
File without changes
|
{kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_openai_codex_provider_errors.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_read_image_integration.py
RENAMED
|
File without changes
|
{kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_subprocess_cancellation.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|