cluxion-agentplugin-preprocessing 0.3.14__tar.gz → 0.3.16__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/PKG-INFO +1 -1
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/pyproject.toml +1 -1
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/context_compress.py +75 -13
- cluxion_agentplugin_preprocessing-0.3.16/src/cluxion_runtime/core/llm_compress.py +261 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_context_compress_llm_forget.py +110 -0
- cluxion_agentplugin_preprocessing-0.3.14/src/cluxion_runtime/core/llm_compress.py +0 -138
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/.github/profile/README.md +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/.gitignore +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/Docs/README.md +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/LICENSE +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/README.md +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/adapters/claude/.claude-plugin/plugin.json +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/adapters/claude/skills/preprocess/SKILL.md +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/adapters/codex/config-snippet.toml +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/README.md +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/architecture.md +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/harness-logic.md +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/honesty-preprocessing.md +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/install-and-operations.md +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/security.md +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/Cargo.lock +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/Cargo.toml +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/pyproject.toml +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/context.rs +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/dispatch.rs +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/guard.rs +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/lib.rs +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/main.rs +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/queue.rs +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/types.rs +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/__init__.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/cli.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/doctor/__init__.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/doctor/catalog.json +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/doctor/framework.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/doctor/probes.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/guard_watch.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/hermes_config.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/plugin.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/plugin.yaml +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/runner.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/schemas.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/__init__.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/__main__.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/adapters/__init__.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/adapters/contract.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/adapters/grok_build.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/adapters/hermes.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/adapters/spec.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/bootstrap.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/cli.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/__init__.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/clarification.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/dispatch_store.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/harness.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/hybrid_forget.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/intent.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/ledger.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/ledger_codec.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/plan_codec.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/preprocess.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/types.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/work_queue.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/guard_daemon_host.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/models/__init__.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/models/supervisor.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/models/vllm_mlx.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/resources/__init__.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/resources/guard_bridge.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/resources/py_queue.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/resources/queue_bridge.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/resources/rust_bridge.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/web/__init__.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/web/browser_bridge.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_auto_compress_middleware.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_browser_bridge.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_clarification.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_cluxion_runtime_spine.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_context_compress.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_contract.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_dispatch_store.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_guard.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_guard_daemon_host.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_ledger.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_py_queue_concurrency.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_queue_backends.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_runtime_adapter_cli.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_rust_queue.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_supervisor.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_bootstrap.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_doctor.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_guard_watch.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_hermes_config.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_packaging_policy.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_plugin.py +0 -0
- {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_runner.py +0 -0
{cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cluxion-agentplugin-preprocessing
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.16
|
|
4
4
|
Summary: Universal agent plugin for Cluxion preprocessing, honesty contracts, clarification, Rust work queue, and resource-aware harness handoff.
|
|
5
5
|
Project-URL: Homepage, https://github.com/cluxion/cluxion-Agentplugin-preprocessing
|
|
6
6
|
Project-URL: Repository, https://github.com/cluxion/cluxion-Agentplugin-preprocessing
|
{cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/pyproject.toml
RENAMED
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "cluxion-agentplugin-preprocessing"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.16"
|
|
8
8
|
description = "Universal agent plugin for Cluxion preprocessing, honesty contracts, clarification, Rust work queue, and resource-aware harness handoff."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -6,8 +6,10 @@ lockstep so the three backends produce identical Stage-1 output (parity-tested).
|
|
|
6
6
|
|
|
7
7
|
Stages 2 (LLM summarization via ``hermes -z``) and 3 (hybrid forgetting) are
|
|
8
8
|
Python-only; the Rust mirror intentionally does not replicate LLM or forgetforge
|
|
9
|
-
calls.
|
|
10
|
-
|
|
9
|
+
calls. Stage 4 (last-resort truncation of pinned recent turns) is also
|
|
10
|
+
Python-only — it runs when every remaining message is pinned yet still exceeds
|
|
11
|
+
the target. Disable stages 2-4 with ``enable_llm_summary`` / ``enable_forget``
|
|
12
|
+
for Stage-1 parity.
|
|
11
13
|
|
|
12
14
|
What stays untouched: pinned messages (explicit ``pinned``, the first
|
|
13
15
|
user message = task intent, the most recent ``keep_recent`` turns).
|
|
@@ -152,13 +154,26 @@ def compress(payload: Mapping[str, object]) -> dict[str, object]:
|
|
|
152
154
|
stages.append("forget")
|
|
153
155
|
pinned = _pinned_indices(messages, keep_recent)
|
|
154
156
|
|
|
157
|
+
intent_idx = _first_user_index(messages)
|
|
158
|
+
if total > target_tokens and (
|
|
159
|
+
over_target_pinned_only or not any(idx not in pinned for idx in range(len(messages)))
|
|
160
|
+
):
|
|
161
|
+
total, changed = _stage_truncate_pinned_recent(
|
|
162
|
+
messages, keep_recent, total, target_tokens, intent_idx=intent_idx
|
|
163
|
+
)
|
|
164
|
+
if changed:
|
|
165
|
+
stages.append("truncate_pinned_recent")
|
|
166
|
+
over_target_pinned_only = False
|
|
167
|
+
|
|
155
168
|
if total > target_tokens:
|
|
156
169
|
if summary_request is None:
|
|
157
170
|
summary_request = _build_summary_request(messages, pinned, total, target_tokens)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
171
|
+
intent_tokens = (
|
|
172
|
+
estimate_tokens(messages[intent_idx].content) if intent_idx is not None else 0
|
|
173
|
+
)
|
|
174
|
+
if intent_tokens > target_tokens:
|
|
161
175
|
forced_over_target = True
|
|
176
|
+
over_target_pinned_only = True
|
|
162
177
|
|
|
163
178
|
return _result_payload(
|
|
164
179
|
messages,
|
|
@@ -208,6 +223,10 @@ def _bool_flag(payload: Mapping[str, object], key: str, default: bool) -> bool:
|
|
|
208
223
|
return default
|
|
209
224
|
|
|
210
225
|
|
|
226
|
+
def _first_user_index(messages: list[_Msg]) -> int | None:
|
|
227
|
+
return next((idx for idx, msg in enumerate(messages) if msg.role == "user"), None)
|
|
228
|
+
|
|
229
|
+
|
|
211
230
|
def _pinned_indices(messages: list[_Msg], keep_recent: int) -> list[int]:
|
|
212
231
|
pinned = [idx for idx, msg in enumerate(messages) if msg.pinned]
|
|
213
232
|
first_user = next((idx for idx, msg in enumerate(messages) if msg.role == "user"), None)
|
|
@@ -221,6 +240,17 @@ def _pinned_indices(messages: list[_Msg], keep_recent: int) -> list[int]:
|
|
|
221
240
|
return pinned
|
|
222
241
|
|
|
223
242
|
|
|
243
|
+
def _apply_head_tail_truncate(content: str) -> str | None:
|
|
244
|
+
if estimate_tokens(content) <= TRUNCATE_MIN_TOKENS:
|
|
245
|
+
return None
|
|
246
|
+
if len(content) <= TRUNCATE_HEAD_CHARS + TRUNCATE_TAIL_CHARS:
|
|
247
|
+
return None
|
|
248
|
+
elided = len(content) - TRUNCATE_HEAD_CHARS - TRUNCATE_TAIL_CHARS
|
|
249
|
+
head = content[:TRUNCATE_HEAD_CHARS]
|
|
250
|
+
tail = content[len(content) - TRUNCATE_TAIL_CHARS :]
|
|
251
|
+
return f"{head}\n[...cluxion: {elided} chars elided...]\n{tail}"
|
|
252
|
+
|
|
253
|
+
|
|
224
254
|
def _stage_truncate(messages: list[_Msg], pinned: list[int], total: int, target: int) -> tuple[int, bool]:
|
|
225
255
|
changed = False
|
|
226
256
|
for idx, msg in enumerate(messages):
|
|
@@ -228,21 +258,53 @@ def _stage_truncate(messages: list[_Msg], pinned: list[int], total: int, target:
|
|
|
228
258
|
break
|
|
229
259
|
if idx in pinned:
|
|
230
260
|
continue
|
|
231
|
-
|
|
232
|
-
if
|
|
261
|
+
replacement = _apply_head_tail_truncate(msg.content)
|
|
262
|
+
if replacement is None:
|
|
233
263
|
continue
|
|
234
|
-
|
|
235
|
-
continue
|
|
236
|
-
elided = len(msg.content) - TRUNCATE_HEAD_CHARS - TRUNCATE_TAIL_CHARS
|
|
237
|
-
head = msg.content[:TRUNCATE_HEAD_CHARS]
|
|
238
|
-
tail = msg.content[len(msg.content) - TRUNCATE_TAIL_CHARS :]
|
|
239
|
-
replacement = f"{head}\n[...cluxion: {elided} chars elided...]\n{tail}"
|
|
264
|
+
tokens = estimate_tokens(msg.content)
|
|
240
265
|
total = total - tokens + estimate_tokens(replacement)
|
|
241
266
|
msg.content = replacement
|
|
242
267
|
changed = True
|
|
243
268
|
return total, changed
|
|
244
269
|
|
|
245
270
|
|
|
271
|
+
def _pinned_recent_indices(messages: list[_Msg], keep_recent: int, intent_idx: int | None) -> list[int]:
|
|
272
|
+
recent_start = max(0, len(messages) - keep_recent)
|
|
273
|
+
return [idx for idx in range(recent_start, len(messages)) if idx != intent_idx]
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _stage_truncate_pinned_recent(
|
|
277
|
+
messages: list[_Msg],
|
|
278
|
+
keep_recent: int,
|
|
279
|
+
total: int,
|
|
280
|
+
target: int,
|
|
281
|
+
*,
|
|
282
|
+
intent_idx: int | None,
|
|
283
|
+
) -> tuple[int, bool]:
|
|
284
|
+
"""Last-resort: truncate pinned recent turns (never intent) until total <= target."""
|
|
285
|
+
if total <= target:
|
|
286
|
+
return total, False
|
|
287
|
+
|
|
288
|
+
candidates = _pinned_recent_indices(messages, keep_recent, intent_idx)
|
|
289
|
+
changed = False
|
|
290
|
+
while total > target:
|
|
291
|
+
progressed = False
|
|
292
|
+
for idx in candidates:
|
|
293
|
+
if total <= target:
|
|
294
|
+
break
|
|
295
|
+
replacement = _apply_head_tail_truncate(messages[idx].content)
|
|
296
|
+
if replacement is None:
|
|
297
|
+
continue
|
|
298
|
+
tokens = estimate_tokens(messages[idx].content)
|
|
299
|
+
total = total - tokens + estimate_tokens(replacement)
|
|
300
|
+
messages[idx].content = replacement
|
|
301
|
+
changed = True
|
|
302
|
+
progressed = True
|
|
303
|
+
if not progressed:
|
|
304
|
+
break
|
|
305
|
+
return total, changed
|
|
306
|
+
|
|
307
|
+
|
|
246
308
|
def _stage_dedup(messages: list[_Msg], pinned: list[int], total: int, target: int) -> tuple[int, bool]:
|
|
247
309
|
changed = False
|
|
248
310
|
seen: dict[str, int] = {}
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""LLM-backed message summarization for context compression stage 2.
|
|
2
|
+
|
|
3
|
+
Calls the main model via ``hermes -z`` (or ``cluxion_hermes_call`` when available).
|
|
4
|
+
Stage 2 is Python-only; the Rust ``context.rs`` mirror intentionally does not
|
|
5
|
+
replicate LLM calls.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import shutil
|
|
15
|
+
import subprocess
|
|
16
|
+
from typing import TYPE_CHECKING, Protocol
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from collections.abc import Mapping, Sequence
|
|
20
|
+
|
|
21
|
+
DEFAULT_TIMEOUT_S = 120.0
|
|
22
|
+
_HERMES_BIN = "hermes"
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
_SUMMARY_INSTRUCTIONS = (
|
|
26
|
+
"Summarize each message by importance. PRESERVE ABOVE ALL: the user's intent and "
|
|
27
|
+
"direction, decisions made, unresolved items, file paths / identifiers / commands — "
|
|
28
|
+
"regardless of language (Korean or English). "
|
|
29
|
+
"ONLY summarize content actually present in the message. NEVER invent, add, infer, "
|
|
30
|
+
"or fabricate any identifier, number, name, port, path, or fact that is not in the "
|
|
31
|
+
"original. If unsure whether something is in the source, OMIT it. "
|
|
32
|
+
"Compress everything else. Each summary < 10% of the original. "
|
|
33
|
+
'Output STRICT JSON: {"<index>": "<summary>", ...} only.'
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
_HARD_TOKEN_RE = re.compile(
|
|
37
|
+
r"\b(?:"
|
|
38
|
+
r"\d+(?:\.\d+)?(?:k|m|만|억)?"
|
|
39
|
+
r"|[A-Za-z][\w.-]*\d[\w.-]*"
|
|
40
|
+
r"|\d[\w.-]+"
|
|
41
|
+
r")\b",
|
|
42
|
+
re.IGNORECASE,
|
|
43
|
+
)
|
|
44
|
+
_NUMERIC_SUFFIX_RE = re.compile(r"^(\d+(?:\.\d+)?)(k|m|만|억)?$", re.IGNORECASE)
|
|
45
|
+
_STRIP_LABEL_PREFIX_RE = re.compile(r"(?:\w+:\s*)+", re.IGNORECASE)
|
|
46
|
+
_SUFFIX_MULTIPLIERS = {"k": 1000, "m": 1_000_000, "만": 10_000, "억": 100_000_000}
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class _MessageLike(Protocol):
|
|
50
|
+
role: str
|
|
51
|
+
content: str
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def hermes_available() -> bool:
|
|
55
|
+
return shutil.which(_HERMES_BIN) is not None
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def summarize_messages(
|
|
59
|
+
messages: Sequence[_MessageLike],
|
|
60
|
+
indices: Sequence[int],
|
|
61
|
+
instructions: str | None = None,
|
|
62
|
+
*,
|
|
63
|
+
model: str | None = None,
|
|
64
|
+
timeout_s: float = DEFAULT_TIMEOUT_S,
|
|
65
|
+
) -> dict[int, str] | None:
|
|
66
|
+
"""Summarize selected messages via hermes -z. Returns None on any failure."""
|
|
67
|
+
if not indices:
|
|
68
|
+
return {}
|
|
69
|
+
if not hermes_available():
|
|
70
|
+
return None
|
|
71
|
+
prompt = _build_prompt(messages, indices, instructions or _SUMMARY_INSTRUCTIONS)
|
|
72
|
+
try:
|
|
73
|
+
stdout = _call_hermes_oneshot(prompt, model=model, timeout_s=timeout_s)
|
|
74
|
+
except (OSError, subprocess.TimeoutExpired, subprocess.CalledProcessError):
|
|
75
|
+
return None
|
|
76
|
+
parsed = _parse_summary_json(stdout)
|
|
77
|
+
if parsed is None:
|
|
78
|
+
return None
|
|
79
|
+
result: dict[int, str] = {}
|
|
80
|
+
hallucination_stripped = 0
|
|
81
|
+
for idx in indices:
|
|
82
|
+
key = str(idx)
|
|
83
|
+
if key not in parsed or not isinstance(parsed[key], str) or not parsed[key].strip():
|
|
84
|
+
continue
|
|
85
|
+
summary = parsed[key].strip()
|
|
86
|
+
if idx < 0 or idx >= len(messages):
|
|
87
|
+
result[idx] = summary
|
|
88
|
+
continue
|
|
89
|
+
try:
|
|
90
|
+
guarded, stripped = _apply_hallucination_guard(summary, messages[idx].content)
|
|
91
|
+
except Exception:
|
|
92
|
+
logger.exception("llm_compress: hallucination guard failed for message %s", idx)
|
|
93
|
+
return None
|
|
94
|
+
if guarded is None:
|
|
95
|
+
return None
|
|
96
|
+
hallucination_stripped += stripped
|
|
97
|
+
result[idx] = guarded
|
|
98
|
+
if not result:
|
|
99
|
+
return None
|
|
100
|
+
if hallucination_stripped > 0:
|
|
101
|
+
logger.info(
|
|
102
|
+
"llm_compress: stripped %d hallucinated token(s) from summaries",
|
|
103
|
+
hallucination_stripped,
|
|
104
|
+
)
|
|
105
|
+
return result
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _normalize_for_match(text: str) -> str:
|
|
109
|
+
return re.sub(r"[,\s]+", "", text.lower())
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _numeric_variants(token: str) -> set[str]:
|
|
113
|
+
norm = _normalize_for_match(token)
|
|
114
|
+
variants = {norm}
|
|
115
|
+
match = _NUMERIC_SUFFIX_RE.match(norm)
|
|
116
|
+
if not match:
|
|
117
|
+
return variants
|
|
118
|
+
base, suffix = match.group(1), match.group(2)
|
|
119
|
+
variants.add(_normalize_for_match(base))
|
|
120
|
+
if suffix:
|
|
121
|
+
multiplier = _SUFFIX_MULTIPLIERS.get(suffix.lower())
|
|
122
|
+
if multiplier is not None:
|
|
123
|
+
try:
|
|
124
|
+
expanded = str(int(float(base) * multiplier))
|
|
125
|
+
except ValueError:
|
|
126
|
+
expanded = None
|
|
127
|
+
if expanded:
|
|
128
|
+
variants.add(expanded)
|
|
129
|
+
return variants
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def _token_traceable_in_source(token: str, source: str) -> bool:
|
|
133
|
+
norm_source = _normalize_for_match(source)
|
|
134
|
+
norm_token = _normalize_for_match(token)
|
|
135
|
+
|
|
136
|
+
if norm_token in norm_source:
|
|
137
|
+
return True
|
|
138
|
+
|
|
139
|
+
for variant in _numeric_variants(token):
|
|
140
|
+
if variant in norm_source:
|
|
141
|
+
return True
|
|
142
|
+
|
|
143
|
+
digit_groups = re.findall(r"\d+", norm_token)
|
|
144
|
+
if digit_groups:
|
|
145
|
+
all_digits_traceable = True
|
|
146
|
+
for digits in digit_groups:
|
|
147
|
+
if digits in norm_source:
|
|
148
|
+
continue
|
|
149
|
+
traceable = any(variant in norm_source for variant in _numeric_variants(digits))
|
|
150
|
+
if not traceable:
|
|
151
|
+
all_digits_traceable = False
|
|
152
|
+
break
|
|
153
|
+
if all_digits_traceable:
|
|
154
|
+
alpha_prefix = re.sub(r"[\d._-]+", "", norm_token)
|
|
155
|
+
if not alpha_prefix or alpha_prefix in norm_source:
|
|
156
|
+
return True
|
|
157
|
+
|
|
158
|
+
if "." in norm_token:
|
|
159
|
+
without_dots = norm_token.replace(".", "")
|
|
160
|
+
if without_dots in norm_source or without_dots in norm_source.replace(".", ""):
|
|
161
|
+
return True
|
|
162
|
+
|
|
163
|
+
return False
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _extract_hard_tokens(summary: str) -> list[str]:
|
|
167
|
+
return list(dict.fromkeys(_HARD_TOKEN_RE.findall(summary)))
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _strip_fabricated_token(summary: str, token: str) -> str | None:
|
|
171
|
+
escaped = re.escape(token)
|
|
172
|
+
pattern = rf"(?:{_STRIP_LABEL_PREFIX_RE.pattern})?{escaped}\b"
|
|
173
|
+
stripped = re.sub(pattern, "", summary, count=1, flags=re.IGNORECASE)
|
|
174
|
+
stripped = re.sub(r"\s+", " ", stripped).strip(" \t\n\r,;:-")
|
|
175
|
+
stripped = re.sub(r"[,;:\-]\s*$", "", stripped).strip()
|
|
176
|
+
if not stripped or not re.search(r"\w", stripped):
|
|
177
|
+
return None
|
|
178
|
+
return stripped
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _apply_hallucination_guard(summary: str, source: str) -> tuple[str | None, int]:
|
|
182
|
+
guarded = summary
|
|
183
|
+
stripped_count = 0
|
|
184
|
+
for token in _extract_hard_tokens(summary):
|
|
185
|
+
if _token_traceable_in_source(token, source):
|
|
186
|
+
continue
|
|
187
|
+
updated = _strip_fabricated_token(guarded, token)
|
|
188
|
+
if updated is None:
|
|
189
|
+
return None, stripped_count
|
|
190
|
+
guarded = updated
|
|
191
|
+
stripped_count += 1
|
|
192
|
+
return guarded, stripped_count
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def _build_prompt(
|
|
196
|
+
messages: Sequence[_MessageLike],
|
|
197
|
+
indices: Sequence[int],
|
|
198
|
+
instructions: str,
|
|
199
|
+
) -> str:
|
|
200
|
+
blocks: list[str] = [instructions, "", "Messages to summarize:"]
|
|
201
|
+
for idx in indices:
|
|
202
|
+
if idx < 0 or idx >= len(messages):
|
|
203
|
+
continue
|
|
204
|
+
msg = messages[idx]
|
|
205
|
+
blocks.append(f"--- message index {idx} ({msg.role}) ---")
|
|
206
|
+
blocks.append(msg.content)
|
|
207
|
+
blocks.append("")
|
|
208
|
+
return "\n".join(blocks)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _call_hermes_oneshot(prompt: str, *, model: str | None, timeout_s: float) -> str:
|
|
212
|
+
prev = os.environ.get("CLUXION_PREPROCESS_IN_COMPRESS")
|
|
213
|
+
os.environ["CLUXION_PREPROCESS_IN_COMPRESS"] = "1"
|
|
214
|
+
try:
|
|
215
|
+
try:
|
|
216
|
+
from cluxion_hermes_call.core import hermes_oneshot # type: ignore[import-not-found]
|
|
217
|
+
|
|
218
|
+
return str(hermes_oneshot(prompt, model=model, timeout_s=timeout_s))
|
|
219
|
+
except ImportError:
|
|
220
|
+
pass
|
|
221
|
+
|
|
222
|
+
cmd = [_HERMES_BIN, "-z", prompt]
|
|
223
|
+
if model:
|
|
224
|
+
cmd[1:1] = ["-m", model]
|
|
225
|
+
completed = subprocess.run(
|
|
226
|
+
cmd,
|
|
227
|
+
check=True,
|
|
228
|
+
capture_output=True,
|
|
229
|
+
text=True,
|
|
230
|
+
timeout=timeout_s,
|
|
231
|
+
)
|
|
232
|
+
return completed.stdout
|
|
233
|
+
finally:
|
|
234
|
+
if prev is None:
|
|
235
|
+
os.environ.pop("CLUXION_PREPROCESS_IN_COMPRESS", None)
|
|
236
|
+
else:
|
|
237
|
+
os.environ["CLUXION_PREPROCESS_IN_COMPRESS"] = prev
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def _parse_summary_json(stdout: str) -> Mapping[str, object] | None:
|
|
241
|
+
text = stdout.strip()
|
|
242
|
+
if not text:
|
|
243
|
+
return None
|
|
244
|
+
fence = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
|
|
245
|
+
if fence:
|
|
246
|
+
text = fence.group(1)
|
|
247
|
+
else:
|
|
248
|
+
start = text.find("{")
|
|
249
|
+
end = text.rfind("}")
|
|
250
|
+
if start >= 0 and end > start:
|
|
251
|
+
text = text[start : end + 1]
|
|
252
|
+
try:
|
|
253
|
+
parsed = json.loads(text)
|
|
254
|
+
except json.JSONDecodeError:
|
|
255
|
+
return None
|
|
256
|
+
if not isinstance(parsed, dict):
|
|
257
|
+
return None
|
|
258
|
+
return parsed
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
__all__ = ["DEFAULT_TIMEOUT_S", "hermes_available", "summarize_messages"]
|
|
@@ -179,6 +179,116 @@ def test_summarize_messages_returns_none_on_bad_json(monkeypatch) -> None:
|
|
|
179
179
|
assert llm_compress.summarize_messages([type("M", (), {"role": "user", "content": "hi"})()], [0]) is None
|
|
180
180
|
|
|
181
181
|
|
|
182
|
+
def _msg(content: str):
|
|
183
|
+
return type("M", (), {"role": "user", "content": content})()
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def test_hallucination_guard_strips_fabricated_port(monkeypatch) -> None:
|
|
187
|
+
from cluxion_runtime.core import llm_compress
|
|
188
|
+
|
|
189
|
+
source = "Connect to Redis on port 5433 for caching. File: recon_v4.py"
|
|
190
|
+
llm_json = (
|
|
191
|
+
'{"0": "Redis caching on port 5433. recon_v4.py. Hot: Redis:6390"}'
|
|
192
|
+
)
|
|
193
|
+
monkeypatch.setattr(llm_compress, "hermes_available", lambda: True)
|
|
194
|
+
monkeypatch.setattr(llm_compress, "_call_hermes_oneshot", lambda *a, **k: llm_json)
|
|
195
|
+
|
|
196
|
+
result = llm_compress.summarize_messages([_msg(source)], [0])
|
|
197
|
+
assert result is not None
|
|
198
|
+
assert "6390" not in result[0]
|
|
199
|
+
assert "Hot:" not in result[0]
|
|
200
|
+
assert "5433" in result[0]
|
|
201
|
+
assert "recon_v4.py" in result[0]
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def test_hallucination_guard_keeps_normalized_number(monkeypatch) -> None:
|
|
205
|
+
from cluxion_runtime.core import llm_compress
|
|
206
|
+
|
|
207
|
+
source = "Daily traffic is 482,000 requests with peak at 14 months uptime."
|
|
208
|
+
llm_json = '{"0": "Traffic 482k requests, 14mo uptime."}'
|
|
209
|
+
monkeypatch.setattr(llm_compress, "hermes_available", lambda: True)
|
|
210
|
+
monkeypatch.setattr(llm_compress, "_call_hermes_oneshot", lambda *a, **k: llm_json)
|
|
211
|
+
|
|
212
|
+
result = llm_compress.summarize_messages([_msg(source)], [0])
|
|
213
|
+
assert result is not None
|
|
214
|
+
assert "482k" in result[0]
|
|
215
|
+
assert "14mo" in result[0]
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def test_hallucination_guard_keeps_korean_normalized_number(monkeypatch) -> None:
|
|
219
|
+
from cluxion_runtime.core import llm_compress
|
|
220
|
+
|
|
221
|
+
source = "일평균 482,000건 처리, Redis 포트 5433 사용."
|
|
222
|
+
llm_json = '{"0": "일평균 482k/day, Redis 5433."}'
|
|
223
|
+
monkeypatch.setattr(llm_compress, "hermes_available", lambda: True)
|
|
224
|
+
monkeypatch.setattr(llm_compress, "_call_hermes_oneshot", lambda *a, **k: llm_json)
|
|
225
|
+
|
|
226
|
+
result = llm_compress.summarize_messages([_msg(source)], [0])
|
|
227
|
+
assert result is not None
|
|
228
|
+
assert "482k" in result[0]
|
|
229
|
+
assert "5433" in result[0]
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def test_hallucination_guard_all_fabricated_returns_none(monkeypatch) -> None:
|
|
233
|
+
from cluxion_runtime.core import llm_compress
|
|
234
|
+
|
|
235
|
+
source = "Discuss caching strategy for the API layer."
|
|
236
|
+
llm_json = '{"0": "Hot: Redis:6390"}'
|
|
237
|
+
monkeypatch.setattr(llm_compress, "hermes_available", lambda: True)
|
|
238
|
+
monkeypatch.setattr(llm_compress, "_call_hermes_oneshot", lambda *a, **k: llm_json)
|
|
239
|
+
|
|
240
|
+
assert llm_compress.summarize_messages([_msg(source)], [0]) is None
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def test_pinned_recent_last_resort_brings_under_target(monkeypatch) -> None:
|
|
244
|
+
"""Live edge case: all messages pinned and huge — intent preserved, usage <= target."""
|
|
245
|
+
monkeypatch.setattr(context_compress, "hermes_available", lambda: False)
|
|
246
|
+
intent = "TASK_INTENT: implement pinned-overflow guard"
|
|
247
|
+
payload = {
|
|
248
|
+
"messages": [
|
|
249
|
+
{"role": "user", "content": intent + _long(160_000)},
|
|
250
|
+
{"role": "assistant", "content": _long(160_000)},
|
|
251
|
+
{"role": "tool", "content": _long(160_000)},
|
|
252
|
+
{"role": "assistant", "content": _long(160_000)},
|
|
253
|
+
{"role": "user", "content": _long(160_000)},
|
|
254
|
+
],
|
|
255
|
+
# 5 x ~40k tokens ~ 200k total -> usage 0.80 at this limit (live edge case).
|
|
256
|
+
"context_limit_tokens": 250_000,
|
|
257
|
+
"keep_recent_turns": 4,
|
|
258
|
+
"enable_llm_summary": False,
|
|
259
|
+
"enable_forget": True,
|
|
260
|
+
}
|
|
261
|
+
result = context_compress.compress(payload)
|
|
262
|
+
target = int(0.30 * result["context_limit"])
|
|
263
|
+
assert result["usage_before"] >= 0.70
|
|
264
|
+
assert result["messages"][0]["content"].startswith(intent)
|
|
265
|
+
assert result["tokens_after"] <= target
|
|
266
|
+
assert result.get("over_target_pinned_only") is not True
|
|
267
|
+
assert "truncate_pinned_recent" in result["stages_applied"]
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def test_lone_giant_intent_forced_over_target(monkeypatch) -> None:
|
|
271
|
+
"""When intent alone exceeds target, truncate everything else and flag forced_over_target."""
|
|
272
|
+
monkeypatch.setattr(context_compress, "hermes_available", lambda: False)
|
|
273
|
+
intent = "GIANT_INTENT"
|
|
274
|
+
payload = {
|
|
275
|
+
"messages": [
|
|
276
|
+
{"role": "user", "content": intent + _long(12_000)},
|
|
277
|
+
{"role": "assistant", "content": _long(12_000)},
|
|
278
|
+
],
|
|
279
|
+
"context_limit_tokens": 1000,
|
|
280
|
+
"keep_recent_turns": 2,
|
|
281
|
+
"enable_llm_summary": False,
|
|
282
|
+
"enable_forget": True,
|
|
283
|
+
}
|
|
284
|
+
result = context_compress.compress(payload)
|
|
285
|
+
assert result["messages"][0]["content"].startswith(intent)
|
|
286
|
+
assert result.get("forced_over_target") is True
|
|
287
|
+
assert result.get("over_target_pinned_only") is True
|
|
288
|
+
assert "[...cluxion:" in result["messages"][1]["content"]
|
|
289
|
+
assert result["tokens_after"] > int(0.30 * result["context_limit"])
|
|
290
|
+
|
|
291
|
+
|
|
182
292
|
def test_korean_decision_survives_stage3() -> None:
|
|
183
293
|
body = _long(4000)
|
|
184
294
|
digest = f"[cluxion digest] tool: {body[:80]} [900 tokens elided]"
|
|
@@ -1,138 +0,0 @@
|
|
|
1
|
-
"""LLM-backed message summarization for context compression stage 2.
|
|
2
|
-
|
|
3
|
-
Calls the main model via ``hermes -z`` (or ``cluxion_hermes_call`` when available).
|
|
4
|
-
Stage 2 is Python-only; the Rust ``context.rs`` mirror intentionally does not
|
|
5
|
-
replicate LLM calls.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
from __future__ import annotations
|
|
9
|
-
|
|
10
|
-
import json
|
|
11
|
-
import os
|
|
12
|
-
import re
|
|
13
|
-
import shutil
|
|
14
|
-
import subprocess
|
|
15
|
-
from typing import TYPE_CHECKING, Protocol
|
|
16
|
-
|
|
17
|
-
if TYPE_CHECKING:
|
|
18
|
-
from collections.abc import Mapping, Sequence
|
|
19
|
-
|
|
20
|
-
DEFAULT_TIMEOUT_S = 120.0
|
|
21
|
-
_HERMES_BIN = "hermes"
|
|
22
|
-
|
|
23
|
-
_SUMMARY_INSTRUCTIONS = (
|
|
24
|
-
"Summarize each message by importance. PRESERVE ABOVE ALL: the user's intent and "
|
|
25
|
-
"direction, decisions made, unresolved items, file paths / identifiers / commands — "
|
|
26
|
-
"regardless of language (Korean or English). "
|
|
27
|
-
"Compress everything else. Each summary < 10% of the original. "
|
|
28
|
-
'Output STRICT JSON: {"<index>": "<summary>", ...} only.'
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class _MessageLike(Protocol):
|
|
33
|
-
role: str
|
|
34
|
-
content: str
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def hermes_available() -> bool:
|
|
38
|
-
return shutil.which(_HERMES_BIN) is not None
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def summarize_messages(
|
|
42
|
-
messages: Sequence[_MessageLike],
|
|
43
|
-
indices: Sequence[int],
|
|
44
|
-
instructions: str | None = None,
|
|
45
|
-
*,
|
|
46
|
-
model: str | None = None,
|
|
47
|
-
timeout_s: float = DEFAULT_TIMEOUT_S,
|
|
48
|
-
) -> dict[int, str] | None:
|
|
49
|
-
"""Summarize selected messages via hermes -z. Returns None on any failure."""
|
|
50
|
-
if not indices:
|
|
51
|
-
return {}
|
|
52
|
-
if not hermes_available():
|
|
53
|
-
return None
|
|
54
|
-
prompt = _build_prompt(messages, indices, instructions or _SUMMARY_INSTRUCTIONS)
|
|
55
|
-
try:
|
|
56
|
-
stdout = _call_hermes_oneshot(prompt, model=model, timeout_s=timeout_s)
|
|
57
|
-
except (OSError, subprocess.TimeoutExpired, subprocess.CalledProcessError):
|
|
58
|
-
return None
|
|
59
|
-
parsed = _parse_summary_json(stdout)
|
|
60
|
-
if parsed is None:
|
|
61
|
-
return None
|
|
62
|
-
result: dict[int, str] = {}
|
|
63
|
-
for idx in indices:
|
|
64
|
-
key = str(idx)
|
|
65
|
-
if key in parsed and isinstance(parsed[key], str) and parsed[key].strip():
|
|
66
|
-
result[idx] = parsed[key].strip()
|
|
67
|
-
if not result:
|
|
68
|
-
return None
|
|
69
|
-
return result
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def _build_prompt(
|
|
73
|
-
messages: Sequence[_MessageLike],
|
|
74
|
-
indices: Sequence[int],
|
|
75
|
-
instructions: str,
|
|
76
|
-
) -> str:
|
|
77
|
-
blocks: list[str] = [instructions, "", "Messages to summarize:"]
|
|
78
|
-
for idx in indices:
|
|
79
|
-
if idx < 0 or idx >= len(messages):
|
|
80
|
-
continue
|
|
81
|
-
msg = messages[idx]
|
|
82
|
-
blocks.append(f"--- message index {idx} ({msg.role}) ---")
|
|
83
|
-
blocks.append(msg.content)
|
|
84
|
-
blocks.append("")
|
|
85
|
-
return "\n".join(blocks)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def _call_hermes_oneshot(prompt: str, *, model: str | None, timeout_s: float) -> str:
|
|
89
|
-
prev = os.environ.get("CLUXION_PREPROCESS_IN_COMPRESS")
|
|
90
|
-
os.environ["CLUXION_PREPROCESS_IN_COMPRESS"] = "1"
|
|
91
|
-
try:
|
|
92
|
-
try:
|
|
93
|
-
from cluxion_hermes_call.core import hermes_oneshot # type: ignore[import-not-found]
|
|
94
|
-
|
|
95
|
-
return str(hermes_oneshot(prompt, model=model, timeout_s=timeout_s))
|
|
96
|
-
except ImportError:
|
|
97
|
-
pass
|
|
98
|
-
|
|
99
|
-
cmd = [_HERMES_BIN, "-z", prompt]
|
|
100
|
-
if model:
|
|
101
|
-
cmd[1:1] = ["-m", model]
|
|
102
|
-
completed = subprocess.run(
|
|
103
|
-
cmd,
|
|
104
|
-
check=True,
|
|
105
|
-
capture_output=True,
|
|
106
|
-
text=True,
|
|
107
|
-
timeout=timeout_s,
|
|
108
|
-
)
|
|
109
|
-
return completed.stdout
|
|
110
|
-
finally:
|
|
111
|
-
if prev is None:
|
|
112
|
-
os.environ.pop("CLUXION_PREPROCESS_IN_COMPRESS", None)
|
|
113
|
-
else:
|
|
114
|
-
os.environ["CLUXION_PREPROCESS_IN_COMPRESS"] = prev
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def _parse_summary_json(stdout: str) -> Mapping[str, object] | None:
|
|
118
|
-
text = stdout.strip()
|
|
119
|
-
if not text:
|
|
120
|
-
return None
|
|
121
|
-
fence = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
|
|
122
|
-
if fence:
|
|
123
|
-
text = fence.group(1)
|
|
124
|
-
else:
|
|
125
|
-
start = text.find("{")
|
|
126
|
-
end = text.rfind("}")
|
|
127
|
-
if start >= 0 and end > start:
|
|
128
|
-
text = text[start : end + 1]
|
|
129
|
-
try:
|
|
130
|
-
parsed = json.loads(text)
|
|
131
|
-
except json.JSONDecodeError:
|
|
132
|
-
return None
|
|
133
|
-
if not isinstance(parsed, dict):
|
|
134
|
-
return None
|
|
135
|
-
return parsed
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
__all__ = ["DEFAULT_TIMEOUT_S", "hermes_available", "summarize_messages"]
|
|
File without changes
|
{cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/.gitignore
RENAMED
|
File without changes
|
{cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/Docs/README.md
RENAMED
|
File without changes
|
{cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/LICENSE
RENAMED
|
File without changes
|
{cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/README.md
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|