kon-coding-agent 0.3.3__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/PKG-INFO +1 -1
  2. kon_coding_agent-0.3.4/docs/local-models.md +80 -0
  3. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/pyproject.toml +1 -1
  4. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/openai_completions.py +24 -9
  5. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/session.py +4 -4
  6. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/app.py +1 -1
  7. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_openai_compat.py +108 -1
  8. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/uv.lock +1 -1
  9. kon_coding_agent-0.3.3/docs/local-models.md +0 -42
  10. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.gitignore +0 -0
  11. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.kon/skills/kon-release-publish/SKILL.md +0 -0
  12. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.kon/skills/kon-tmux-test/SKILL.md +0 -0
  13. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.kon/skills/kon-tmux-test/run-e2e-tests.sh +0 -0
  14. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.kon/skills/kon-tmux-test/setup-test-project.sh +0 -0
  15. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/.python-version +0 -0
  16. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/AGENTS.md +0 -0
  17. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/CHANGELOG.md +0 -0
  18. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/LICENSE +0 -0
  19. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/README.md +0 -0
  20. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/docs/architecture-review.md +0 -0
  21. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/docs/images/kon-screenshot.png +0 -0
  22. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/scripts/show_themes.py +0 -0
  23. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/__init__.py +0 -0
  24. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/async_utils.py +0 -0
  25. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/builtin_skills/init/SKILL.md +0 -0
  26. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/config.py +0 -0
  27. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/__init__.py +0 -0
  28. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/_xml.py +0 -0
  29. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/agent_mds.py +0 -0
  30. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/git.py +0 -0
  31. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/loader.py +0 -0
  32. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/context/skills.py +0 -0
  33. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/core/__init__.py +0 -0
  34. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/core/compaction.py +0 -0
  35. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/core/handoff.py +0 -0
  36. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/core/types.py +0 -0
  37. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/defaults/__init__.py +0 -0
  38. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/defaults/config.toml +0 -0
  39. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/events.py +0 -0
  40. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/__init__.py +0 -0
  41. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/base.py +0 -0
  42. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/models.py +0 -0
  43. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/oauth/__init__.py +0 -0
  44. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/oauth/copilot.py +0 -0
  45. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/oauth/openai.py +0 -0
  46. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/__init__.py +0 -0
  47. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/anthropic.py +0 -0
  48. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/azure_ai_foundry.py +0 -0
  49. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/copilot.py +0 -0
  50. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/copilot_anthropic.py +0 -0
  51. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/github_copilot_headers.py +0 -0
  52. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/mock.py +0 -0
  53. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/openai_codex_responses.py +0 -0
  54. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/openai_compat.py +0 -0
  55. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/openai_responses.py +0 -0
  56. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/llm/providers/sanitize.py +0 -0
  57. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/loop.py +0 -0
  58. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/permissions.py +0 -0
  59. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/py.typed +0 -0
  60. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/themes.py +0 -0
  61. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/__init__.py +0 -0
  62. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/_read_image.py +0 -0
  63. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/_tool_utils.py +0 -0
  64. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/base.py +0 -0
  65. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/bash.py +0 -0
  66. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/edit.py +0 -0
  67. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/find.py +0 -0
  68. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/grep.py +0 -0
  69. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/read.py +0 -0
  70. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/web_fetch.py +0 -0
  71. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/web_search.py +0 -0
  72. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools/write.py +0 -0
  73. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/tools_manager.py +0 -0
  74. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/turn.py +0 -0
  75. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/__init__.py +0 -0
  76. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/app_protocol.py +0 -0
  77. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/autocomplete.py +0 -0
  78. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/blocks.py +0 -0
  79. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/chat.py +0 -0
  80. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/clipboard.py +0 -0
  81. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/commands.py +0 -0
  82. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/export.py +0 -0
  83. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/floating_list.py +0 -0
  84. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/formatting.py +0 -0
  85. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/input.py +0 -0
  86. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/path_complete.py +0 -0
  87. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/prompt_history.py +0 -0
  88. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/selection_mode.py +0 -0
  89. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/session_ui.py +0 -0
  90. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/styles.py +0 -0
  91. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/ui/widgets.py +0 -0
  92. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/src/kon/update_check.py +0 -0
  93. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/conftest.py +0 -0
  94. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/context/test_agents.py +0 -0
  95. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/context/test_skills.py +0 -0
  96. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/__init__.py +0 -0
  97. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_anthropic_provider.py +0 -0
  98. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_azure_ai_foundry_provider.py +0 -0
  99. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_mock_provider.py +0 -0
  100. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_openai_codex_provider_errors.py +0 -0
  101. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/llm/test_openai_oauth.py +0 -0
  102. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_agentic_loop.py +0 -0
  103. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_cli_auth_flags.py +0 -0
  104. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_cli_provider_resolution.py +0 -0
  105. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_compaction.py +0 -0
  106. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_config_binaries.py +0 -0
  107. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_config_error_fallback.py +0 -0
  108. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_config_injection.py +0 -0
  109. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_config_migration.py +0 -0
  110. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_handoff.py +0 -0
  111. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_handoff_link_interrupt.py +0 -0
  112. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_launch_warnings.py +0 -0
  113. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_local_auth_config.py +0 -0
  114. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_model_provider_resolution.py +0 -0
  115. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_permissions.py +0 -0
  116. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_session_persistence.py +0 -0
  117. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_session_resume.py +0 -0
  118. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_system_prompt.py +0 -0
  119. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_system_prompt_git_context.py +0 -0
  120. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_tools_manager.py +0 -0
  121. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_update_check.py +0 -0
  122. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/test_update_notice_behavior.py +0 -0
  123. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_diff.py +0 -0
  124. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_edit.py +0 -0
  125. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_edit_display.py +0 -0
  126. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_read.py +0 -0
  127. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_read_image.py +0 -0
  128. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_read_image_integration.py +0 -0
  129. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_subprocess_cancellation.py +0 -0
  130. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/tools/test_write.py +0 -0
  131. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_autocomplete.py +0 -0
  132. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_floating_list.py +0 -0
  133. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_input_handoff.py +0 -0
  134. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_input_paste.py +0 -0
  135. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_prompt_history.py +0 -0
  136. {kon_coding_agent-0.3.3 → kon_coding_agent-0.3.4}/tests/ui/test_status_line.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kon-coding-agent
3
- Version: 0.3.3
3
+ Version: 0.3.4
4
4
  Summary: Minimal coding agent
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.12
@@ -0,0 +1,80 @@
1
+ # Local Models
2
+
3
+ This document provides detailed information about running and configuring local models with Kon.
4
+
5
+ ## Tested Models
6
+
7
+ > Tested on llama server build b8740
8
+
9
+ | Model | Quantization | Context Length | TPS | System Specs |
10
+ | ----- | -------------- | -------------- | --- | ------------ |
11
+ | `zai-org/glm-4.7-flash` | Q4_K_M | 65,536 | N/A | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
12
+ | `unsloth/Qwen3.5-27B-GGUF` | Q4_K_M | 32,768 | ~30 | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
13
+ | `unsloth/gemma-4-26B-A4B-it-GGUF` | UD-Q4_K_M | 32,768 | ~100 | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
14
+
15
+ Run Qwen3.5 27B on an RTX 3090 with a 32k context window using llama-server:
16
+
17
+ ```bash
18
+ /path-to-llama-server/llama-server \
19
+ --model /path-to-model/Qwen3.5-27B-Q4_K_M.gguf \
20
+ --port 5000 \
21
+ --ctx-size 32768 \
22
+ --gpu-layers all \
23
+ --threads 8 \
24
+ --threads-batch 8 \
25
+ --batch-size 1024 \
26
+ --ubatch-size 512 \
27
+ --flash-attn on
28
+ ```
29
+
30
+ On this machine, that setup generates at roughly 30 tokens per second.
31
+
32
+ Then start Kon for a one-off local session:
33
+
34
+ ```bash
35
+ kon --model unsloth/Qwen3.5-27B-GGUF --provider openai \
36
+ --base-url http://localhost:5000/v1 \
37
+ --openai-compat-auth none
38
+ ```
39
+
40
+ Run Gemma 4 26B A4B on the same machine using llama-server:
41
+
42
+ ```bash
43
+ /path-to-llama-server/llama-server \
44
+ --model /path-to-model/gemma-4-26B-A4B-it-UD-Q4_K_M.gguf \
45
+ --port 5000 \
46
+ --ctx-size 32768 \
47
+ --gpu-layers all \
48
+ --threads 8 \
49
+ --threads-batch 8 \
50
+ --batch-size 1024 \
51
+ --ubatch-size 512 \
52
+ --flash-attn on \
53
+ --temperature 1.5
54
+ ```
55
+
56
+ Then start Kon against that local server:
57
+
58
+ ```bash
59
+ kon --model unsloth/gemma-4-26B-A4B-it-GGUF --provider openai \
60
+ --base-url http://localhost:5000/v1 \
61
+ --openai-compat-auth none
62
+ ```
63
+
64
+ To avoid passing provider, model, and auth flags every time you start Kon, you can define your local setup in `~/.kon/config.toml`. This also allows you to tune compaction to trigger at a specific point relative to your model's context window.
65
+
66
+ If this is your default setup, put it in `~/.kon/config.toml` instead:
67
+
68
+ ```toml
69
+ [llm]
70
+ default_provider = "openai"
71
+ default_model = "unsloth/gemma-4-26B-A4B-it-GGUF"
72
+ default_base_url = "http://localhost:5000/v1"
73
+
74
+ [llm.auth]
75
+ openai_compat = "none" # or "auto"
76
+
77
+ [compaction]
78
+ # Set this close to your model's context size (e.g., 30000 for a 32k window)
79
+ buffer_tokens = 27768 # 32768 - 5000 (safety margin)
80
+ ```
@@ -14,7 +14,7 @@ default = true
14
14
 
15
15
  [project]
16
16
  name = "kon-coding-agent"
17
- version = "0.3.3"
17
+ version = "0.3.4"
18
18
  description = "Minimal coding agent"
19
19
  readme = "README.md"
20
20
  requires-python = ">=3.12"
@@ -30,7 +30,7 @@ from ...core.types import (
30
30
  Usage,
31
31
  UserMessage,
32
32
  )
33
- from ..base import BaseProvider, LLMStream, ProviderConfig, resolve_api_key
33
+ from ..base import BaseProvider, LLMStream, ProviderConfig, is_local_base_url, resolve_api_key
34
34
  from .openai_compat import supports_developer_role
35
35
  from .sanitize import sanitize_surrogates
36
36
 
@@ -41,12 +41,13 @@ class OpenAICompletionsCompat:
41
41
  supports_developer_role: bool = True
42
42
  supports_reasoning_effort: bool = True
43
43
  max_tokens_field: Literal["max_tokens", "max_completion_tokens"] = "max_completion_tokens"
44
- thinking_format: Literal["openai", "zai", "qwen"] = "openai"
44
+ thinking_format: Literal["openai", "zai", "qwen", "llama_gemma"] = "openai"
45
45
 
46
46
 
47
- def _detect_compat(provider: str, base_url: str) -> OpenAICompletionsCompat:
47
+ def _detect_compat(provider: str, base_url: str, model: str = "") -> OpenAICompletionsCompat:
48
48
  normalized_provider = provider.lower()
49
49
  normalized_base_url = base_url.lower()
50
+ normalized_model = model.lower()
50
51
  is_zai = (
51
52
  normalized_provider == "zai"
52
53
  or normalized_provider == "zhipu"
@@ -61,6 +62,13 @@ def _detect_compat(provider: str, base_url: str) -> OpenAICompletionsCompat:
61
62
  thinking_format="zai",
62
63
  )
63
64
 
65
+ if is_local_base_url(base_url) and "gemma" in normalized_model:
66
+ return OpenAICompletionsCompat(
67
+ supports_developer_role=supports_developer_role(provider, base_url),
68
+ supports_reasoning_effort=False,
69
+ thinking_format="llama_gemma",
70
+ )
71
+
64
72
  return OpenAICompletionsCompat(
65
73
  supports_developer_role=supports_developer_role(provider, base_url)
66
74
  )
@@ -92,7 +100,9 @@ class OpenAICompletionsProvider(BaseProvider):
92
100
  'or configure llm.auth.openai_compat = "auto"/"none" for local endpoints.'
93
101
  )
94
102
  self._client = AsyncOpenAI(api_key=api_key, base_url=config.base_url)
95
- self._compat = _detect_compat(config.provider or "", config.base_url or "")
103
+ self._compat = _detect_compat(
104
+ config.provider or "", config.base_url or "", config.model or ""
105
+ )
96
106
 
97
107
  async def _stream_impl(
98
108
  self,
@@ -138,7 +148,7 @@ class OpenAICompletionsProvider(BaseProvider):
138
148
  if compat.thinking_format == "zai":
139
149
  if thinking_level and thinking_level != "none":
140
150
  extra_body["thinking"] = {"type": "enabled"}
141
- elif compat.thinking_format == "qwen":
151
+ elif compat.thinking_format in {"qwen", "llama_gemma"}:
142
152
  extra_body["enable_thinking"] = bool(thinking_level and thinking_level != "none")
143
153
  elif (
144
154
  self.supports_reasoning_effort
@@ -238,11 +248,16 @@ class OpenAICompletionsProvider(BaseProvider):
238
248
 
239
249
  if system_prompt:
240
250
  role = "developer" if (compat and compat.supports_developer_role) else "system"
251
+ prompt_content = sanitize_surrogates(system_prompt)
252
+ if (
253
+ compat
254
+ and compat.thinking_format == "llama_gemma"
255
+ and self.config.thinking_level != "none"
256
+ and not prompt_content.startswith("<|think|>")
257
+ ):
258
+ prompt_content = "<|think|>" + prompt_content
241
259
  result.append(
242
- cast(
243
- ChatCompletionMessageParam,
244
- {"role": role, "content": sanitize_surrogates(system_prompt)},
245
- )
260
+ cast(ChatCompletionMessageParam, {"role": role, "content": prompt_content})
246
261
  )
247
262
 
248
263
  pending_images: list[ImageContent] = []
@@ -226,7 +226,7 @@ class Session:
226
226
  self._flushed = True
227
227
  self._persisted_entries_count = len(self._entries)
228
228
  else:
229
- with open(self._session_file, "a") as f:
229
+ with open(self._session_file, "a", encoding="utf-8") as f:
230
230
  f.write(entry.model_dump_json() + "\n")
231
231
  self._persisted_entries_count += 1
232
232
 
@@ -236,7 +236,7 @@ class Session:
236
236
 
237
237
  self._session_file.parent.mkdir(parents=True, exist_ok=True)
238
238
 
239
- with open(self._session_file, "w") as f:
239
+ with open(self._session_file, "w", encoding="utf-8") as f:
240
240
  if self._header:
241
241
  f.write(self._header.model_dump_json() + "\n")
242
242
  for entry in self._entries:
@@ -469,7 +469,7 @@ class Session:
469
469
  header: SessionHeader | None = None
470
470
  entries: list[SessionEntry] = []
471
471
 
472
- with open(path) as f:
472
+ with open(path, encoding="utf-8") as f:
473
473
  for line in f:
474
474
  line = line.strip()
475
475
  if not line:
@@ -605,7 +605,7 @@ class Session:
605
605
  message_count = 0
606
606
  first_message = ""
607
607
 
608
- with open(path) as f:
608
+ with open(path, encoding="utf-8") as f:
609
609
  for line in f:
610
610
  line = line.strip()
611
611
  if not line:
@@ -94,7 +94,7 @@ _CHANGELOG_URL = "https://github.com/0xku/kon/blob/main/CHANGELOG.md"
94
94
  try:
95
95
  VERSION = version(_PYPI_PACKAGE_NAME)
96
96
  except PackageNotFoundError:
97
- VERSION = "0.3.3"
97
+ VERSION = "0.3.4"
98
98
 
99
99
  _COPILOT_API_TYPES: frozenset[ApiType] = frozenset(
100
100
  {ApiType.GITHUB_COPILOT, ApiType.GITHUB_COPILOT_RESPONSES, ApiType.ANTHROPIC_COPILOT}
@@ -1,9 +1,11 @@
1
+ from typing import Any, cast
2
+
1
3
  import pytest
2
4
 
3
5
  from kon.llm.base import ProviderConfig, is_local_base_url, resolve_api_key
4
6
  from kon.llm.providers.openai_codex_responses import OpenAICodexResponsesProvider
5
7
  from kon.llm.providers.openai_compat import supports_developer_role
6
- from kon.llm.providers.openai_completions import _detect_compat
8
+ from kon.llm.providers.openai_completions import OpenAICompletionsProvider, _detect_compat
7
9
  from kon.llm.providers.openai_responses import OpenAIResponsesProvider
8
10
 
9
11
 
@@ -26,6 +28,111 @@ def test_detect_compat_disables_developer_role_for_local_api() -> None:
26
28
  assert compat.supports_reasoning_effort is True
27
29
 
28
30
 
31
+ def test_detect_compat_uses_llama_gemma_for_local_gemma_models() -> None:
32
+ compat = _detect_compat(
33
+ "openai", "http://127.0.0.1:1234/v1", "unsloth/gemma-4-26B-A4B-it-GGUF"
34
+ )
35
+
36
+ assert compat.supports_developer_role is False
37
+ assert compat.supports_reasoning_effort is False
38
+ assert compat.thinking_format == "llama_gemma"
39
+
40
+
41
+ def test_openai_completions_prefixes_think_token_for_local_gemma() -> None:
42
+ provider = OpenAICompletionsProvider(
43
+ ProviderConfig(
44
+ api_key="test-key",
45
+ base_url="http://127.0.0.1:1234/v1",
46
+ model="unsloth/gemma-4-26B-A4B-it-GGUF",
47
+ provider="openai",
48
+ thinking_level="medium",
49
+ )
50
+ )
51
+
52
+ messages = provider._convert_messages([], "You are helpful", provider._compat)
53
+
54
+ assert messages[0]["role"] == "system"
55
+ assert messages[0]["content"] == "<|think|>You are helpful"
56
+
57
+
58
+ def test_openai_completions_does_not_prefix_think_token_when_thinking_disabled() -> None:
59
+ provider = OpenAICompletionsProvider(
60
+ ProviderConfig(
61
+ api_key="test-key",
62
+ base_url="http://127.0.0.1:1234/v1",
63
+ model="unsloth/gemma-4-26B-A4B-it-GGUF",
64
+ provider="openai",
65
+ thinking_level="none",
66
+ )
67
+ )
68
+
69
+ messages = provider._convert_messages([], "You are helpful", provider._compat)
70
+
71
+ assert messages[0]["role"] == "system"
72
+ assert messages[0]["content"] == "You are helpful"
73
+
74
+
75
+ def test_openai_completions_uses_developer_without_think_prefix_for_openai_api() -> None:
76
+ provider = OpenAICompletionsProvider(
77
+ ProviderConfig(
78
+ api_key="test-key",
79
+ base_url="https://api.openai.com/v1",
80
+ model="gpt-5",
81
+ provider="openai",
82
+ thinking_level="medium",
83
+ )
84
+ )
85
+
86
+ messages = provider._convert_messages([], "You are helpful", provider._compat)
87
+
88
+ assert messages[0]["role"] == "developer"
89
+ assert messages[0]["content"] == "You are helpful"
90
+
91
+
92
+ class _EmptyAsyncIterator:
93
+ def __aiter__(self):
94
+ return self
95
+
96
+ async def __anext__(self):
97
+ raise StopAsyncIteration
98
+
99
+
100
+ class _DummyChatCompletions:
101
+ def __init__(self) -> None:
102
+ self.calls: list[dict[str, Any]] = []
103
+
104
+ async def create(self, **kwargs):
105
+ self.calls.append(kwargs)
106
+ return _EmptyAsyncIterator()
107
+
108
+
109
+ @pytest.mark.asyncio
110
+ async def test_openai_completions_sends_enable_thinking_for_local_gemma() -> None:
111
+ provider = OpenAICompletionsProvider(
112
+ ProviderConfig(
113
+ api_key="test-key",
114
+ base_url="http://127.0.0.1:1234/v1",
115
+ model="unsloth/gemma-4-26B-A4B-it-GGUF",
116
+ provider="openai",
117
+ thinking_level="medium",
118
+ )
119
+ )
120
+ dummy_chat = _DummyChatCompletions()
121
+ provider._client = cast(
122
+ Any,
123
+ type("DummyClient", (), {"chat": type("DummyChat", (), {"completions": dummy_chat})()})(),
124
+ )
125
+
126
+ stream = await provider._stream_impl(messages=[], system_prompt="You are helpful")
127
+ async for _ in stream:
128
+ pass
129
+
130
+ kwargs = dummy_chat.calls[0]
131
+ assert kwargs["extra_body"] == {"enable_thinking": True}
132
+ assert "reasoning_effort" not in kwargs
133
+ assert kwargs["messages"][0]["content"] == "<|think|>You are helpful"
134
+
135
+
29
136
  def test_openai_responses_uses_system_for_local_api() -> None:
30
137
  provider = OpenAIResponsesProvider(
31
138
  ProviderConfig(
@@ -714,7 +714,7 @@ wheels = [
714
714
 
715
715
  [[package]]
716
716
  name = "kon-coding-agent"
717
- version = "0.3.3"
717
+ version = "0.3.4"
718
718
  source = { editable = "." }
719
719
  dependencies = [
720
720
  { name = "aiofiles" },
@@ -1,42 +0,0 @@
1
- # Local Models
2
-
3
- This document provides detailed information about running and configuring local models with Kon.
4
-
5
- ## Tested Models
6
-
7
- | Model | Quantization | Context Length | TPS | System Specs |
8
- | ----- | -------------- | -------------- | --- | ------------ |
9
- | `qwen/qwen3-coder-next` | Q4_K_M | 65,536 | N/A | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
10
- | `zai-org/glm-4.7-flash` | Q4_K_M | 65,536 | N/A | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
11
- | `unsloth/Qwen3.5-9B-GGUF` | Q4_K_M | 65,536 | N/A | i7-14700F × 28, 64GB RAM, 24GB VRAM (RTX 3090) |
12
-
13
- Run a local model using llama-server with the following command:
14
-
15
- ```bash
16
- ./llama-server \
17
- --hf-repo unsloth/Qwen3.5-9B-GGUF \
18
- --hf-file Qwen3.5-9B-Q4_K_M.gguf \
19
- --port 5000 \
20
- -c 65536
21
- ```
22
-
23
- Then start Kon for a one-off local session:
24
-
25
- ```bash
26
- kon --model unsloth/Qwen3.5-9B-GGUF --provider openai \
27
- --base-url http://localhost:5000/v1 \
28
- --openai-compat-auth none
29
- ```
30
-
31
- If this is your default setup, put it in `~/.kon/config.toml` instead:
32
-
33
- ```toml
34
- [llm]
35
- default_provider = "openai"
36
- default_model = "unsloth/Qwen3.5-9B-GGUF"
37
- default_base_url = "http://localhost:5000/v1"
38
-
39
- [llm.auth]
40
- openai_compat = "auto" # or "none" to always inject a placeholder key
41
- ```
42
-