cluxion-agentplugin-preprocessing 0.3.14__tar.gz → 0.3.16__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/PKG-INFO +1 -1
  2. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/pyproject.toml +1 -1
  3. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/context_compress.py +75 -13
  4. cluxion_agentplugin_preprocessing-0.3.16/src/cluxion_runtime/core/llm_compress.py +261 -0
  5. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_context_compress_llm_forget.py +110 -0
  6. cluxion_agentplugin_preprocessing-0.3.14/src/cluxion_runtime/core/llm_compress.py +0 -138
  7. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/.github/profile/README.md +0 -0
  8. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/.gitignore +0 -0
  9. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/Docs/README.md +0 -0
  10. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/LICENSE +0 -0
  11. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/README.md +0 -0
  12. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/adapters/claude/.claude-plugin/plugin.json +0 -0
  13. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/adapters/claude/skills/preprocess/SKILL.md +0 -0
  14. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/adapters/codex/config-snippet.toml +0 -0
  15. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/README.md +0 -0
  16. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/architecture.md +0 -0
  17. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/harness-logic.md +0 -0
  18. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/honesty-preprocessing.md +0 -0
  19. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/install-and-operations.md +0 -0
  20. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/cluxion-Docs/security.md +0 -0
  21. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/Cargo.lock +0 -0
  22. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/Cargo.toml +0 -0
  23. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/pyproject.toml +0 -0
  24. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/context.rs +0 -0
  25. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/dispatch.rs +0 -0
  26. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/guard.rs +0 -0
  27. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/lib.rs +0 -0
  28. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/main.rs +0 -0
  29. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/queue.rs +0 -0
  30. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/rust/cluxion_queue/src/types.rs +0 -0
  31. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/__init__.py +0 -0
  32. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/cli.py +0 -0
  33. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/doctor/__init__.py +0 -0
  34. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/doctor/catalog.json +0 -0
  35. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/doctor/framework.py +0 -0
  36. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/doctor/probes.py +0 -0
  37. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/guard_watch.py +0 -0
  38. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/hermes_config.py +0 -0
  39. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/plugin.py +0 -0
  40. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/plugin.yaml +0 -0
  41. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/runner.py +0 -0
  42. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_agentplugin_preprocessing/schemas.py +0 -0
  43. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/__init__.py +0 -0
  44. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/__main__.py +0 -0
  45. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/adapters/__init__.py +0 -0
  46. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/adapters/contract.py +0 -0
  47. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/adapters/grok_build.py +0 -0
  48. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/adapters/hermes.py +0 -0
  49. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/adapters/spec.py +0 -0
  50. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/bootstrap.py +0 -0
  51. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/cli.py +0 -0
  52. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/__init__.py +0 -0
  53. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/clarification.py +0 -0
  54. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/dispatch_store.py +0 -0
  55. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/harness.py +0 -0
  56. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/hybrid_forget.py +0 -0
  57. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/intent.py +0 -0
  58. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/ledger.py +0 -0
  59. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/ledger_codec.py +0 -0
  60. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/plan_codec.py +0 -0
  61. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/preprocess.py +0 -0
  62. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/types.py +0 -0
  63. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/core/work_queue.py +0 -0
  64. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/guard_daemon_host.py +0 -0
  65. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/models/__init__.py +0 -0
  66. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/models/supervisor.py +0 -0
  67. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/models/vllm_mlx.py +0 -0
  68. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/resources/__init__.py +0 -0
  69. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/resources/guard_bridge.py +0 -0
  70. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/resources/py_queue.py +0 -0
  71. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/resources/queue_bridge.py +0 -0
  72. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/resources/rust_bridge.py +0 -0
  73. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/web/__init__.py +0 -0
  74. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/src/cluxion_runtime/web/browser_bridge.py +0 -0
  75. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_auto_compress_middleware.py +0 -0
  76. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_browser_bridge.py +0 -0
  77. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_clarification.py +0 -0
  78. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_cluxion_runtime_spine.py +0 -0
  79. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_context_compress.py +0 -0
  80. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_contract.py +0 -0
  81. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_dispatch_store.py +0 -0
  82. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_guard.py +0 -0
  83. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_guard_daemon_host.py +0 -0
  84. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_ledger.py +0 -0
  85. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_py_queue_concurrency.py +0 -0
  86. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_queue_backends.py +0 -0
  87. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_runtime_adapter_cli.py +0 -0
  88. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_rust_queue.py +0 -0
  89. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/runtime/test_supervisor.py +0 -0
  90. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_bootstrap.py +0 -0
  91. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_doctor.py +0 -0
  92. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_guard_watch.py +0 -0
  93. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_hermes_config.py +0 -0
  94. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_packaging_policy.py +0 -0
  95. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_plugin.py +0 -0
  96. {cluxion_agentplugin_preprocessing-0.3.14 → cluxion_agentplugin_preprocessing-0.3.16}/tests/test_runner.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cluxion-agentplugin-preprocessing
3
- Version: 0.3.14
3
+ Version: 0.3.16
4
4
  Summary: Universal agent plugin for Cluxion preprocessing, honesty contracts, clarification, Rust work queue, and resource-aware harness handoff.
5
5
  Project-URL: Homepage, https://github.com/cluxion/cluxion-Agentplugin-preprocessing
6
6
  Project-URL: Repository, https://github.com/cluxion/cluxion-Agentplugin-preprocessing
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "cluxion-agentplugin-preprocessing"
7
- version = "0.3.14"
7
+ version = "0.3.16"
8
8
  description = "Universal agent plugin for Cluxion preprocessing, honesty contracts, clarification, Rust work queue, and resource-aware harness handoff."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -6,8 +6,10 @@ lockstep so the three backends produce identical Stage-1 output (parity-tested).
6
6
 
7
7
  Stages 2 (LLM summarization via ``hermes -z``) and 3 (hybrid forgetting) are
8
8
  Python-only; the Rust mirror intentionally does not replicate LLM or forgetforge
9
- calls. Disable them with ``enable_llm_summary`` / ``enable_forget`` for Stage-1
10
- parity.
9
+ calls. Stage 4 (last-resort truncation of pinned recent turns) is also
10
+ Python-only — it runs when every remaining message is pinned yet still exceeds
11
+ the target. Disable stages 2-4 with ``enable_llm_summary`` / ``enable_forget``
12
+ for Stage-1 parity.
11
13
 
12
14
  What stays untouched: pinned messages (explicit ``pinned``, the first
13
15
  user message = task intent, the most recent ``keep_recent`` turns).
@@ -152,13 +154,26 @@ def compress(payload: Mapping[str, object]) -> dict[str, object]:
152
154
  stages.append("forget")
153
155
  pinned = _pinned_indices(messages, keep_recent)
154
156
 
157
+ intent_idx = _first_user_index(messages)
158
+ if total > target_tokens and (
159
+ over_target_pinned_only or not any(idx not in pinned for idx in range(len(messages)))
160
+ ):
161
+ total, changed = _stage_truncate_pinned_recent(
162
+ messages, keep_recent, total, target_tokens, intent_idx=intent_idx
163
+ )
164
+ if changed:
165
+ stages.append("truncate_pinned_recent")
166
+ over_target_pinned_only = False
167
+
155
168
  if total > target_tokens:
156
169
  if summary_request is None:
157
170
  summary_request = _build_summary_request(messages, pinned, total, target_tokens)
158
- if over_target_pinned_only or not any(idx not in pinned for idx in range(len(messages))):
159
- over_target_pinned_only = True
160
- if total / context_limit > trigger_ratio:
171
+ intent_tokens = (
172
+ estimate_tokens(messages[intent_idx].content) if intent_idx is not None else 0
173
+ )
174
+ if intent_tokens > target_tokens:
161
175
  forced_over_target = True
176
+ over_target_pinned_only = True
162
177
 
163
178
  return _result_payload(
164
179
  messages,
@@ -208,6 +223,10 @@ def _bool_flag(payload: Mapping[str, object], key: str, default: bool) -> bool:
208
223
  return default
209
224
 
210
225
 
226
+ def _first_user_index(messages: list[_Msg]) -> int | None:
227
+ return next((idx for idx, msg in enumerate(messages) if msg.role == "user"), None)
228
+
229
+
211
230
  def _pinned_indices(messages: list[_Msg], keep_recent: int) -> list[int]:
212
231
  pinned = [idx for idx, msg in enumerate(messages) if msg.pinned]
213
232
  first_user = next((idx for idx, msg in enumerate(messages) if msg.role == "user"), None)
@@ -221,6 +240,17 @@ def _pinned_indices(messages: list[_Msg], keep_recent: int) -> list[int]:
221
240
  return pinned
222
241
 
223
242
 
243
+ def _apply_head_tail_truncate(content: str) -> str | None:
244
+ if estimate_tokens(content) <= TRUNCATE_MIN_TOKENS:
245
+ return None
246
+ if len(content) <= TRUNCATE_HEAD_CHARS + TRUNCATE_TAIL_CHARS:
247
+ return None
248
+ elided = len(content) - TRUNCATE_HEAD_CHARS - TRUNCATE_TAIL_CHARS
249
+ head = content[:TRUNCATE_HEAD_CHARS]
250
+ tail = content[len(content) - TRUNCATE_TAIL_CHARS :]
251
+ return f"{head}\n[...cluxion: {elided} chars elided...]\n{tail}"
252
+
253
+
224
254
  def _stage_truncate(messages: list[_Msg], pinned: list[int], total: int, target: int) -> tuple[int, bool]:
225
255
  changed = False
226
256
  for idx, msg in enumerate(messages):
@@ -228,21 +258,53 @@ def _stage_truncate(messages: list[_Msg], pinned: list[int], total: int, target:
228
258
  break
229
259
  if idx in pinned:
230
260
  continue
231
- tokens = estimate_tokens(msg.content)
232
- if tokens <= TRUNCATE_MIN_TOKENS:
261
+ replacement = _apply_head_tail_truncate(msg.content)
262
+ if replacement is None:
233
263
  continue
234
- if len(msg.content) <= TRUNCATE_HEAD_CHARS + TRUNCATE_TAIL_CHARS:
235
- continue
236
- elided = len(msg.content) - TRUNCATE_HEAD_CHARS - TRUNCATE_TAIL_CHARS
237
- head = msg.content[:TRUNCATE_HEAD_CHARS]
238
- tail = msg.content[len(msg.content) - TRUNCATE_TAIL_CHARS :]
239
- replacement = f"{head}\n[...cluxion: {elided} chars elided...]\n{tail}"
264
+ tokens = estimate_tokens(msg.content)
240
265
  total = total - tokens + estimate_tokens(replacement)
241
266
  msg.content = replacement
242
267
  changed = True
243
268
  return total, changed
244
269
 
245
270
 
271
+ def _pinned_recent_indices(messages: list[_Msg], keep_recent: int, intent_idx: int | None) -> list[int]:
272
+ recent_start = max(0, len(messages) - keep_recent)
273
+ return [idx for idx in range(recent_start, len(messages)) if idx != intent_idx]
274
+
275
+
276
+ def _stage_truncate_pinned_recent(
277
+ messages: list[_Msg],
278
+ keep_recent: int,
279
+ total: int,
280
+ target: int,
281
+ *,
282
+ intent_idx: int | None,
283
+ ) -> tuple[int, bool]:
284
+ """Last-resort: truncate pinned recent turns (never intent) until total <= target."""
285
+ if total <= target:
286
+ return total, False
287
+
288
+ candidates = _pinned_recent_indices(messages, keep_recent, intent_idx)
289
+ changed = False
290
+ while total > target:
291
+ progressed = False
292
+ for idx in candidates:
293
+ if total <= target:
294
+ break
295
+ replacement = _apply_head_tail_truncate(messages[idx].content)
296
+ if replacement is None:
297
+ continue
298
+ tokens = estimate_tokens(messages[idx].content)
299
+ total = total - tokens + estimate_tokens(replacement)
300
+ messages[idx].content = replacement
301
+ changed = True
302
+ progressed = True
303
+ if not progressed:
304
+ break
305
+ return total, changed
306
+
307
+
246
308
  def _stage_dedup(messages: list[_Msg], pinned: list[int], total: int, target: int) -> tuple[int, bool]:
247
309
  changed = False
248
310
  seen: dict[str, int] = {}
@@ -0,0 +1,261 @@
1
+ """LLM-backed message summarization for context compression stage 2.
2
+
3
+ Calls the main model via ``hermes -z`` (or ``cluxion_hermes_call`` when available).
4
+ Stage 2 is Python-only; the Rust ``context.rs`` mirror intentionally does not
5
+ replicate LLM calls.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ import os
13
+ import re
14
+ import shutil
15
+ import subprocess
16
+ from typing import TYPE_CHECKING, Protocol
17
+
18
+ if TYPE_CHECKING:
19
+ from collections.abc import Mapping, Sequence
20
+
21
+ DEFAULT_TIMEOUT_S = 120.0
22
+ _HERMES_BIN = "hermes"
23
+ logger = logging.getLogger(__name__)
24
+
25
+ _SUMMARY_INSTRUCTIONS = (
26
+ "Summarize each message by importance. PRESERVE ABOVE ALL: the user's intent and "
27
+ "direction, decisions made, unresolved items, file paths / identifiers / commands — "
28
+ "regardless of language (Korean or English). "
29
+ "ONLY summarize content actually present in the message. NEVER invent, add, infer, "
30
+ "or fabricate any identifier, number, name, port, path, or fact that is not in the "
31
+ "original. If unsure whether something is in the source, OMIT it. "
32
+ "Compress everything else. Each summary < 10% of the original. "
33
+ 'Output STRICT JSON: {"<index>": "<summary>", ...} only.'
34
+ )
35
+
36
+ _HARD_TOKEN_RE = re.compile(
37
+ r"\b(?:"
38
+ r"\d+(?:\.\d+)?(?:k|m|만|억)?"
39
+ r"|[A-Za-z][\w.-]*\d[\w.-]*"
40
+ r"|\d[\w.-]+"
41
+ r")\b",
42
+ re.IGNORECASE,
43
+ )
44
+ _NUMERIC_SUFFIX_RE = re.compile(r"^(\d+(?:\.\d+)?)(k|m|만|억)?$", re.IGNORECASE)
45
+ _STRIP_LABEL_PREFIX_RE = re.compile(r"(?:\w+:\s*)+", re.IGNORECASE)
46
+ _SUFFIX_MULTIPLIERS = {"k": 1000, "m": 1_000_000, "만": 10_000, "억": 100_000_000}
47
+
48
+
49
+ class _MessageLike(Protocol):
50
+ role: str
51
+ content: str
52
+
53
+
54
+ def hermes_available() -> bool:
55
+ return shutil.which(_HERMES_BIN) is not None
56
+
57
+
58
+ def summarize_messages(
59
+ messages: Sequence[_MessageLike],
60
+ indices: Sequence[int],
61
+ instructions: str | None = None,
62
+ *,
63
+ model: str | None = None,
64
+ timeout_s: float = DEFAULT_TIMEOUT_S,
65
+ ) -> dict[int, str] | None:
66
+ """Summarize selected messages via hermes -z. Returns None on any failure."""
67
+ if not indices:
68
+ return {}
69
+ if not hermes_available():
70
+ return None
71
+ prompt = _build_prompt(messages, indices, instructions or _SUMMARY_INSTRUCTIONS)
72
+ try:
73
+ stdout = _call_hermes_oneshot(prompt, model=model, timeout_s=timeout_s)
74
+ except (OSError, subprocess.TimeoutExpired, subprocess.CalledProcessError):
75
+ return None
76
+ parsed = _parse_summary_json(stdout)
77
+ if parsed is None:
78
+ return None
79
+ result: dict[int, str] = {}
80
+ hallucination_stripped = 0
81
+ for idx in indices:
82
+ key = str(idx)
83
+ if key not in parsed or not isinstance(parsed[key], str) or not parsed[key].strip():
84
+ continue
85
+ summary = parsed[key].strip()
86
+ if idx < 0 or idx >= len(messages):
87
+ result[idx] = summary
88
+ continue
89
+ try:
90
+ guarded, stripped = _apply_hallucination_guard(summary, messages[idx].content)
91
+ except Exception:
92
+ logger.exception("llm_compress: hallucination guard failed for message %s", idx)
93
+ return None
94
+ if guarded is None:
95
+ return None
96
+ hallucination_stripped += stripped
97
+ result[idx] = guarded
98
+ if not result:
99
+ return None
100
+ if hallucination_stripped > 0:
101
+ logger.info(
102
+ "llm_compress: stripped %d hallucinated token(s) from summaries",
103
+ hallucination_stripped,
104
+ )
105
+ return result
106
+
107
+
108
+ def _normalize_for_match(text: str) -> str:
109
+ return re.sub(r"[,\s]+", "", text.lower())
110
+
111
+
112
+ def _numeric_variants(token: str) -> set[str]:
113
+ norm = _normalize_for_match(token)
114
+ variants = {norm}
115
+ match = _NUMERIC_SUFFIX_RE.match(norm)
116
+ if not match:
117
+ return variants
118
+ base, suffix = match.group(1), match.group(2)
119
+ variants.add(_normalize_for_match(base))
120
+ if suffix:
121
+ multiplier = _SUFFIX_MULTIPLIERS.get(suffix.lower())
122
+ if multiplier is not None:
123
+ try:
124
+ expanded = str(int(float(base) * multiplier))
125
+ except ValueError:
126
+ expanded = None
127
+ if expanded:
128
+ variants.add(expanded)
129
+ return variants
130
+
131
+
132
+ def _token_traceable_in_source(token: str, source: str) -> bool:
133
+ norm_source = _normalize_for_match(source)
134
+ norm_token = _normalize_for_match(token)
135
+
136
+ if norm_token in norm_source:
137
+ return True
138
+
139
+ for variant in _numeric_variants(token):
140
+ if variant in norm_source:
141
+ return True
142
+
143
+ digit_groups = re.findall(r"\d+", norm_token)
144
+ if digit_groups:
145
+ all_digits_traceable = True
146
+ for digits in digit_groups:
147
+ if digits in norm_source:
148
+ continue
149
+ traceable = any(variant in norm_source for variant in _numeric_variants(digits))
150
+ if not traceable:
151
+ all_digits_traceable = False
152
+ break
153
+ if all_digits_traceable:
154
+ alpha_prefix = re.sub(r"[\d._-]+", "", norm_token)
155
+ if not alpha_prefix or alpha_prefix in norm_source:
156
+ return True
157
+
158
+ if "." in norm_token:
159
+ without_dots = norm_token.replace(".", "")
160
+ if without_dots in norm_source or without_dots in norm_source.replace(".", ""):
161
+ return True
162
+
163
+ return False
164
+
165
+
166
+ def _extract_hard_tokens(summary: str) -> list[str]:
167
+ return list(dict.fromkeys(_HARD_TOKEN_RE.findall(summary)))
168
+
169
+
170
+ def _strip_fabricated_token(summary: str, token: str) -> str | None:
171
+ escaped = re.escape(token)
172
+ pattern = rf"(?:{_STRIP_LABEL_PREFIX_RE.pattern})?{escaped}\b"
173
+ stripped = re.sub(pattern, "", summary, count=1, flags=re.IGNORECASE)
174
+ stripped = re.sub(r"\s+", " ", stripped).strip(" \t\n\r,;:-")
175
+ stripped = re.sub(r"[,;:\-]\s*$", "", stripped).strip()
176
+ if not stripped or not re.search(r"\w", stripped):
177
+ return None
178
+ return stripped
179
+
180
+
181
+ def _apply_hallucination_guard(summary: str, source: str) -> tuple[str | None, int]:
182
+ guarded = summary
183
+ stripped_count = 0
184
+ for token in _extract_hard_tokens(summary):
185
+ if _token_traceable_in_source(token, source):
186
+ continue
187
+ updated = _strip_fabricated_token(guarded, token)
188
+ if updated is None:
189
+ return None, stripped_count
190
+ guarded = updated
191
+ stripped_count += 1
192
+ return guarded, stripped_count
193
+
194
+
195
+ def _build_prompt(
196
+ messages: Sequence[_MessageLike],
197
+ indices: Sequence[int],
198
+ instructions: str,
199
+ ) -> str:
200
+ blocks: list[str] = [instructions, "", "Messages to summarize:"]
201
+ for idx in indices:
202
+ if idx < 0 or idx >= len(messages):
203
+ continue
204
+ msg = messages[idx]
205
+ blocks.append(f"--- message index {idx} ({msg.role}) ---")
206
+ blocks.append(msg.content)
207
+ blocks.append("")
208
+ return "\n".join(blocks)
209
+
210
+
211
+ def _call_hermes_oneshot(prompt: str, *, model: str | None, timeout_s: float) -> str:
212
+ prev = os.environ.get("CLUXION_PREPROCESS_IN_COMPRESS")
213
+ os.environ["CLUXION_PREPROCESS_IN_COMPRESS"] = "1"
214
+ try:
215
+ try:
216
+ from cluxion_hermes_call.core import hermes_oneshot # type: ignore[import-not-found]
217
+
218
+ return str(hermes_oneshot(prompt, model=model, timeout_s=timeout_s))
219
+ except ImportError:
220
+ pass
221
+
222
+ cmd = [_HERMES_BIN, "-z", prompt]
223
+ if model:
224
+ cmd[1:1] = ["-m", model]
225
+ completed = subprocess.run(
226
+ cmd,
227
+ check=True,
228
+ capture_output=True,
229
+ text=True,
230
+ timeout=timeout_s,
231
+ )
232
+ return completed.stdout
233
+ finally:
234
+ if prev is None:
235
+ os.environ.pop("CLUXION_PREPROCESS_IN_COMPRESS", None)
236
+ else:
237
+ os.environ["CLUXION_PREPROCESS_IN_COMPRESS"] = prev
238
+
239
+
240
+ def _parse_summary_json(stdout: str) -> Mapping[str, object] | None:
241
+ text = stdout.strip()
242
+ if not text:
243
+ return None
244
+ fence = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
245
+ if fence:
246
+ text = fence.group(1)
247
+ else:
248
+ start = text.find("{")
249
+ end = text.rfind("}")
250
+ if start >= 0 and end > start:
251
+ text = text[start : end + 1]
252
+ try:
253
+ parsed = json.loads(text)
254
+ except json.JSONDecodeError:
255
+ return None
256
+ if not isinstance(parsed, dict):
257
+ return None
258
+ return parsed
259
+
260
+
261
+ __all__ = ["DEFAULT_TIMEOUT_S", "hermes_available", "summarize_messages"]
@@ -179,6 +179,116 @@ def test_summarize_messages_returns_none_on_bad_json(monkeypatch) -> None:
179
179
  assert llm_compress.summarize_messages([type("M", (), {"role": "user", "content": "hi"})()], [0]) is None
180
180
 
181
181
 
182
+ def _msg(content: str):
183
+ return type("M", (), {"role": "user", "content": content})()
184
+
185
+
186
+ def test_hallucination_guard_strips_fabricated_port(monkeypatch) -> None:
187
+ from cluxion_runtime.core import llm_compress
188
+
189
+ source = "Connect to Redis on port 5433 for caching. File: recon_v4.py"
190
+ llm_json = (
191
+ '{"0": "Redis caching on port 5433. recon_v4.py. Hot: Redis:6390"}'
192
+ )
193
+ monkeypatch.setattr(llm_compress, "hermes_available", lambda: True)
194
+ monkeypatch.setattr(llm_compress, "_call_hermes_oneshot", lambda *a, **k: llm_json)
195
+
196
+ result = llm_compress.summarize_messages([_msg(source)], [0])
197
+ assert result is not None
198
+ assert "6390" not in result[0]
199
+ assert "Hot:" not in result[0]
200
+ assert "5433" in result[0]
201
+ assert "recon_v4.py" in result[0]
202
+
203
+
204
+ def test_hallucination_guard_keeps_normalized_number(monkeypatch) -> None:
205
+ from cluxion_runtime.core import llm_compress
206
+
207
+ source = "Daily traffic is 482,000 requests with peak at 14 months uptime."
208
+ llm_json = '{"0": "Traffic 482k requests, 14mo uptime."}'
209
+ monkeypatch.setattr(llm_compress, "hermes_available", lambda: True)
210
+ monkeypatch.setattr(llm_compress, "_call_hermes_oneshot", lambda *a, **k: llm_json)
211
+
212
+ result = llm_compress.summarize_messages([_msg(source)], [0])
213
+ assert result is not None
214
+ assert "482k" in result[0]
215
+ assert "14mo" in result[0]
216
+
217
+
218
+ def test_hallucination_guard_keeps_korean_normalized_number(monkeypatch) -> None:
219
+ from cluxion_runtime.core import llm_compress
220
+
221
+ source = "일평균 482,000건 처리, Redis 포트 5433 사용."
222
+ llm_json = '{"0": "일평균 482k/day, Redis 5433."}'
223
+ monkeypatch.setattr(llm_compress, "hermes_available", lambda: True)
224
+ monkeypatch.setattr(llm_compress, "_call_hermes_oneshot", lambda *a, **k: llm_json)
225
+
226
+ result = llm_compress.summarize_messages([_msg(source)], [0])
227
+ assert result is not None
228
+ assert "482k" in result[0]
229
+ assert "5433" in result[0]
230
+
231
+
232
+ def test_hallucination_guard_all_fabricated_returns_none(monkeypatch) -> None:
233
+ from cluxion_runtime.core import llm_compress
234
+
235
+ source = "Discuss caching strategy for the API layer."
236
+ llm_json = '{"0": "Hot: Redis:6390"}'
237
+ monkeypatch.setattr(llm_compress, "hermes_available", lambda: True)
238
+ monkeypatch.setattr(llm_compress, "_call_hermes_oneshot", lambda *a, **k: llm_json)
239
+
240
+ assert llm_compress.summarize_messages([_msg(source)], [0]) is None
241
+
242
+
243
+ def test_pinned_recent_last_resort_brings_under_target(monkeypatch) -> None:
244
+ """Live edge case: all messages pinned and huge — intent preserved, usage <= target."""
245
+ monkeypatch.setattr(context_compress, "hermes_available", lambda: False)
246
+ intent = "TASK_INTENT: implement pinned-overflow guard"
247
+ payload = {
248
+ "messages": [
249
+ {"role": "user", "content": intent + _long(160_000)},
250
+ {"role": "assistant", "content": _long(160_000)},
251
+ {"role": "tool", "content": _long(160_000)},
252
+ {"role": "assistant", "content": _long(160_000)},
253
+ {"role": "user", "content": _long(160_000)},
254
+ ],
255
+ # 5 x ~40k tokens ~ 200k total -> usage 0.80 at this limit (live edge case).
256
+ "context_limit_tokens": 250_000,
257
+ "keep_recent_turns": 4,
258
+ "enable_llm_summary": False,
259
+ "enable_forget": True,
260
+ }
261
+ result = context_compress.compress(payload)
262
+ target = int(0.30 * result["context_limit"])
263
+ assert result["usage_before"] >= 0.70
264
+ assert result["messages"][0]["content"].startswith(intent)
265
+ assert result["tokens_after"] <= target
266
+ assert result.get("over_target_pinned_only") is not True
267
+ assert "truncate_pinned_recent" in result["stages_applied"]
268
+
269
+
270
+ def test_lone_giant_intent_forced_over_target(monkeypatch) -> None:
271
+ """When intent alone exceeds target, truncate everything else and flag forced_over_target."""
272
+ monkeypatch.setattr(context_compress, "hermes_available", lambda: False)
273
+ intent = "GIANT_INTENT"
274
+ payload = {
275
+ "messages": [
276
+ {"role": "user", "content": intent + _long(12_000)},
277
+ {"role": "assistant", "content": _long(12_000)},
278
+ ],
279
+ "context_limit_tokens": 1000,
280
+ "keep_recent_turns": 2,
281
+ "enable_llm_summary": False,
282
+ "enable_forget": True,
283
+ }
284
+ result = context_compress.compress(payload)
285
+ assert result["messages"][0]["content"].startswith(intent)
286
+ assert result.get("forced_over_target") is True
287
+ assert result.get("over_target_pinned_only") is True
288
+ assert "[...cluxion:" in result["messages"][1]["content"]
289
+ assert result["tokens_after"] > int(0.30 * result["context_limit"])
290
+
291
+
182
292
  def test_korean_decision_survives_stage3() -> None:
183
293
  body = _long(4000)
184
294
  digest = f"[cluxion digest] tool: {body[:80]} [900 tokens elided]"
@@ -1,138 +0,0 @@
1
- """LLM-backed message summarization for context compression stage 2.
2
-
3
- Calls the main model via ``hermes -z`` (or ``cluxion_hermes_call`` when available).
4
- Stage 2 is Python-only; the Rust ``context.rs`` mirror intentionally does not
5
- replicate LLM calls.
6
- """
7
-
8
- from __future__ import annotations
9
-
10
- import json
11
- import os
12
- import re
13
- import shutil
14
- import subprocess
15
- from typing import TYPE_CHECKING, Protocol
16
-
17
- if TYPE_CHECKING:
18
- from collections.abc import Mapping, Sequence
19
-
20
- DEFAULT_TIMEOUT_S = 120.0
21
- _HERMES_BIN = "hermes"
22
-
23
- _SUMMARY_INSTRUCTIONS = (
24
- "Summarize each message by importance. PRESERVE ABOVE ALL: the user's intent and "
25
- "direction, decisions made, unresolved items, file paths / identifiers / commands — "
26
- "regardless of language (Korean or English). "
27
- "Compress everything else. Each summary < 10% of the original. "
28
- 'Output STRICT JSON: {"<index>": "<summary>", ...} only.'
29
- )
30
-
31
-
32
- class _MessageLike(Protocol):
33
- role: str
34
- content: str
35
-
36
-
37
- def hermes_available() -> bool:
38
- return shutil.which(_HERMES_BIN) is not None
39
-
40
-
41
- def summarize_messages(
42
- messages: Sequence[_MessageLike],
43
- indices: Sequence[int],
44
- instructions: str | None = None,
45
- *,
46
- model: str | None = None,
47
- timeout_s: float = DEFAULT_TIMEOUT_S,
48
- ) -> dict[int, str] | None:
49
- """Summarize selected messages via hermes -z. Returns None on any failure."""
50
- if not indices:
51
- return {}
52
- if not hermes_available():
53
- return None
54
- prompt = _build_prompt(messages, indices, instructions or _SUMMARY_INSTRUCTIONS)
55
- try:
56
- stdout = _call_hermes_oneshot(prompt, model=model, timeout_s=timeout_s)
57
- except (OSError, subprocess.TimeoutExpired, subprocess.CalledProcessError):
58
- return None
59
- parsed = _parse_summary_json(stdout)
60
- if parsed is None:
61
- return None
62
- result: dict[int, str] = {}
63
- for idx in indices:
64
- key = str(idx)
65
- if key in parsed and isinstance(parsed[key], str) and parsed[key].strip():
66
- result[idx] = parsed[key].strip()
67
- if not result:
68
- return None
69
- return result
70
-
71
-
72
- def _build_prompt(
73
- messages: Sequence[_MessageLike],
74
- indices: Sequence[int],
75
- instructions: str,
76
- ) -> str:
77
- blocks: list[str] = [instructions, "", "Messages to summarize:"]
78
- for idx in indices:
79
- if idx < 0 or idx >= len(messages):
80
- continue
81
- msg = messages[idx]
82
- blocks.append(f"--- message index {idx} ({msg.role}) ---")
83
- blocks.append(msg.content)
84
- blocks.append("")
85
- return "\n".join(blocks)
86
-
87
-
88
- def _call_hermes_oneshot(prompt: str, *, model: str | None, timeout_s: float) -> str:
89
- prev = os.environ.get("CLUXION_PREPROCESS_IN_COMPRESS")
90
- os.environ["CLUXION_PREPROCESS_IN_COMPRESS"] = "1"
91
- try:
92
- try:
93
- from cluxion_hermes_call.core import hermes_oneshot # type: ignore[import-not-found]
94
-
95
- return str(hermes_oneshot(prompt, model=model, timeout_s=timeout_s))
96
- except ImportError:
97
- pass
98
-
99
- cmd = [_HERMES_BIN, "-z", prompt]
100
- if model:
101
- cmd[1:1] = ["-m", model]
102
- completed = subprocess.run(
103
- cmd,
104
- check=True,
105
- capture_output=True,
106
- text=True,
107
- timeout=timeout_s,
108
- )
109
- return completed.stdout
110
- finally:
111
- if prev is None:
112
- os.environ.pop("CLUXION_PREPROCESS_IN_COMPRESS", None)
113
- else:
114
- os.environ["CLUXION_PREPROCESS_IN_COMPRESS"] = prev
115
-
116
-
117
- def _parse_summary_json(stdout: str) -> Mapping[str, object] | None:
118
- text = stdout.strip()
119
- if not text:
120
- return None
121
- fence = re.search(r"```(?:json)?\s*(\{.*?\})\s*```", text, re.DOTALL)
122
- if fence:
123
- text = fence.group(1)
124
- else:
125
- start = text.find("{")
126
- end = text.rfind("}")
127
- if start >= 0 and end > start:
128
- text = text[start : end + 1]
129
- try:
130
- parsed = json.loads(text)
131
- except json.JSONDecodeError:
132
- return None
133
- if not isinstance(parsed, dict):
134
- return None
135
- return parsed
136
-
137
-
138
- __all__ = ["DEFAULT_TIMEOUT_S", "hermes_available", "summarize_messages"]