dulus 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. agent.py +363 -0
  2. backend/__init__.py +63 -0
  3. backend/compressor.py +261 -0
  4. backend/context.py +329 -0
  5. backend/githook.py +166 -0
  6. backend/marketplace.py +141 -0
  7. backend/mempalace_bridge.py +182 -0
  8. backend/personas.py +297 -0
  9. backend/plugins.py +222 -0
  10. backend/server.py +411 -0
  11. backend/tasks.py +213 -0
  12. batch_api.py +307 -0
  13. checkpoint/__init__.py +27 -0
  14. checkpoint/hooks.py +90 -0
  15. checkpoint/store.py +314 -0
  16. checkpoint/types.py +80 -0
  17. claude_code_watcher.py +214 -0
  18. clipboard_utils.py +246 -0
  19. cloudsave.py +159 -0
  20. common.py +177 -0
  21. compaction.py +378 -0
  22. config.py +180 -0
  23. context.py +241 -0
  24. dulus-0.2.0.dist-info/METADATA +600 -0
  25. dulus-0.2.0.dist-info/RECORD +101 -0
  26. dulus-0.2.0.dist-info/WHEEL +5 -0
  27. dulus-0.2.0.dist-info/entry_points.txt +2 -0
  28. dulus-0.2.0.dist-info/licenses/LICENSE +674 -0
  29. dulus-0.2.0.dist-info/licenses/license_manager.py +187 -0
  30. dulus-0.2.0.dist-info/top_level.txt +36 -0
  31. dulus.py +8455 -0
  32. dulus_gui.py +331 -0
  33. dulus_mcp/__init__.py +43 -0
  34. dulus_mcp/client.py +546 -0
  35. dulus_mcp/config.py +133 -0
  36. dulus_mcp/tools.py +131 -0
  37. dulus_mcp/types.py +124 -0
  38. gui/__init__.py +18 -0
  39. gui/agent_bridge.py +283 -0
  40. gui/chat_widget.py +448 -0
  41. gui/main_window.py +485 -0
  42. gui/personas.py +230 -0
  43. gui/session_utils.py +189 -0
  44. gui/settings_dialog.py +146 -0
  45. gui/sidebar.py +515 -0
  46. gui/tasks_view.py +499 -0
  47. gui/themes.py +256 -0
  48. gui/tool_panel.py +94 -0
  49. input.py +1030 -0
  50. license_manager.py +187 -0
  51. memory/__init__.py +93 -0
  52. memory/audit.py +51 -0
  53. memory/consolidator.py +312 -0
  54. memory/context.py +270 -0
  55. memory/offload.py +148 -0
  56. memory/palace.py +127 -0
  57. memory/scan.py +146 -0
  58. memory/sessions.py +100 -0
  59. memory/store.py +395 -0
  60. memory/tools.py +408 -0
  61. memory/types.py +114 -0
  62. memory/vector_search.py +92 -0
  63. multi_agent/__init__.py +23 -0
  64. multi_agent/subagent.py +501 -0
  65. multi_agent/tools.py +393 -0
  66. offload_helper.py +183 -0
  67. plugin/__init__.py +22 -0
  68. plugin/autoadapter.py +1641 -0
  69. plugin/loader.py +156 -0
  70. plugin/recommend.py +211 -0
  71. plugin/store.py +387 -0
  72. plugin/types.py +147 -0
  73. providers.py +3750 -0
  74. skill/__init__.py +14 -0
  75. skill/builtin.py +100 -0
  76. skill/clawhub.py +270 -0
  77. skill/executor.py +66 -0
  78. skill/loader.py +199 -0
  79. skill/tools.py +110 -0
  80. skills.py +14 -0
  81. spinner.py +42 -0
  82. string_utils.py +42 -0
  83. subagent.py +11 -0
  84. task/__init__.py +12 -0
  85. task/store.py +199 -0
  86. task/tools.py +265 -0
  87. task/types.py +92 -0
  88. tmux_offloader.py +177 -0
  89. tmux_tools.py +410 -0
  90. tool_registry.py +214 -0
  91. tools.py +2694 -0
  92. ui/__init__.py +1 -0
  93. ui/input.py +464 -0
  94. ui/render.py +272 -0
  95. voice/__init__.py +56 -0
  96. voice/keyterms.py +179 -0
  97. voice/recorder.py +263 -0
  98. voice/stt.py +408 -0
  99. voice/tts.py +570 -0
  100. webchat.py +432 -0
  101. webchat_server.py +1761 -0
agent.py ADDED
@@ -0,0 +1,363 @@
1
+ """Core agent loop: neutral message format, multi-provider streaming."""
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ import queue
6
+ import threading
7
+ import time
8
+ import uuid
9
+ from pathlib import Path
10
+ from dataclasses import dataclass, field
11
+ from typing import Generator
12
+
13
+ from tool_registry import get_tool_schemas, clear_last_output
14
+ from tools import execute_tool
15
+ import tools as _tools_init # ensure built-in tools are registered on import
16
+ from providers import stream, AssistantTurn, TextChunk, ThinkingChunk, detect_provider
17
+ from compaction import maybe_compact
18
+
19
+ _SENTINEL = object()
20
+
21
+ def _interruptible_stream(gen):
22
+ """Run a generator in a daemon thread, yield events via Queue.
23
+ Ctrl+C (KeyboardInterrupt) is always deliverable because the main
24
+ thread only blocks on queue.get(timeout=0.1) — never on a raw socket.
25
+ """
26
+ q: queue.Queue = queue.Queue(maxsize=64)
27
+
28
+ def _producer():
29
+ try:
30
+ for event in gen:
31
+ q.put(event)
32
+ except Exception as exc:
33
+ q.put(exc)
34
+ finally:
35
+ q.put(_SENTINEL)
36
+
37
+ t = threading.Thread(target=_producer, daemon=True)
38
+ t.start()
39
+ while True:
40
+ try:
41
+ item = q.get(timeout=0.1)
42
+ except queue.Empty:
43
+ continue
44
+ if item is _SENTINEL:
45
+ break
46
+ if isinstance(item, BaseException):
47
+ raise item
48
+ yield item
49
+
50
+ # ── Re-export event types (used by dulus) ─────────────────────────────────
51
+ __all__ = [
52
+ "AgentState", "run",
53
+ "TextChunk", "ThinkingChunk",
54
+ "ToolStart", "ToolEnd", "TurnDone", "PermissionRequest",
55
+ ]
56
+
57
+
58
+ @dataclass
59
+ class AgentState:
60
+ """Mutable session state. messages use the neutral provider-independent format."""
61
+ messages: list = field(default_factory=list)
62
+ total_input_tokens: int = 0
63
+ total_output_tokens: int = 0
64
+ total_cache_read_tokens: int = 0
65
+ total_cache_creation_tokens: int = 0
66
+ turn_count: int = 0
67
+
68
+
69
+ @dataclass
70
+ class ToolStart:
71
+ name: str
72
+ inputs: dict
73
+
74
+ @dataclass
75
+ class ToolEnd:
76
+ name: str
77
+ result: str
78
+ permitted: bool = True
79
+
80
+ @dataclass
81
+ class TurnDone:
82
+ input_tokens: int
83
+ output_tokens: int
84
+ cache_read_tokens: int = 0
85
+ cache_creation_tokens: int = 0
86
+
87
+ @dataclass
88
+ class PermissionRequest:
89
+ description: str
90
+ granted: bool = False
91
+
92
+
93
+ # ── Agent loop ─────────────────────────────────────────────────────────────
94
+
95
+ def run(
96
+ user_message: str,
97
+ state: AgentState,
98
+ config: dict,
99
+ system_prompt: str,
100
+ depth: int = 0,
101
+ cancel_check=None,
102
+ ) -> Generator:
103
+ """
104
+ Multi-turn agent loop (generator).
105
+ Yields: TextChunk | ThinkingChunk | ToolStart | ToolEnd |
106
+ PermissionRequest | TurnDone
107
+
108
+ Args:
109
+ depth: sub-agent nesting depth, 0 for top-level
110
+ cancel_check: callable returning True to abort the loop early
111
+ """
112
+ from common import sanitize_text
113
+ # Append user turn in neutral format (sanitize to kill Windows surrogates)
114
+ user_msg = {"role": "user", "content": sanitize_text(user_message)}
115
+ # Attach pending image from /image command if present
116
+ pending_img = config.pop("_pending_image", None)
117
+ if pending_img:
118
+ user_msg["images"] = [pending_img]
119
+ state.messages.append(user_msg)
120
+
121
+ # Inject runtime metadata into config so tools (e.g. Agent) can access it
122
+ config.update({"_depth": depth, "_system_prompt": system_prompt})
123
+
124
+ while True:
125
+ if cancel_check and cancel_check():
126
+ return
127
+ state.turn_count += 1
128
+ assistant_turn: AssistantTurn | None = None
129
+
130
+ # Compact context if approaching window limit
131
+ maybe_compact(state, config)
132
+
133
+ # Sanitize message contents before sending to API (surrogate safety)
134
+ _safe_messages = []
135
+ for m in state.messages:
136
+ _m = dict(m)
137
+ _c = _m.get("content")
138
+ if isinstance(_c, str):
139
+ _m["content"] = sanitize_text(_c)
140
+ _safe_messages.append(_m)
141
+
142
+ # Stream from provider — wrapped so Ctrl+C always fires
143
+ for event in _interruptible_stream(stream(
144
+ model=config["model"],
145
+ system=system_prompt,
146
+ messages=_safe_messages,
147
+ tool_schemas=get_tool_schemas(),
148
+ config=config,
149
+ )):
150
+ if isinstance(event, (TextChunk, ThinkingChunk)):
151
+ yield event
152
+ elif isinstance(event, AssistantTurn):
153
+ assistant_turn = event
154
+
155
+ if assistant_turn is None:
156
+ break
157
+
158
+ if assistant_turn.error:
159
+ # Rollback: remove the user message that caused the error to prevent loops.
160
+ # (e.g. sending an image to a model that doesn't support it)
161
+ if state.messages and state.messages[-1]["role"] == "user":
162
+ state.messages.pop()
163
+ break
164
+
165
+ # Record assistant turn in neutral format
166
+ state.messages.append({
167
+ "role": "assistant",
168
+ "content": sanitize_text(assistant_turn.text),
169
+ "thinking": sanitize_text(assistant_turn.thinking) if assistant_turn.thinking else "",
170
+ "tool_calls": assistant_turn.tool_calls,
171
+ })
172
+
173
+ state.total_input_tokens += assistant_turn.in_tokens
174
+ state.total_output_tokens += assistant_turn.out_tokens
175
+ c_read = getattr(assistant_turn, "cache_read_tokens", 0)
176
+ c_create = getattr(assistant_turn, "cache_creation_tokens", 0)
177
+ state.total_cache_read_tokens += c_read
178
+ state.total_cache_creation_tokens += c_create
179
+ yield TurnDone(
180
+ assistant_turn.in_tokens,
181
+ assistant_turn.out_tokens,
182
+ cache_read_tokens=c_read,
183
+ cache_creation_tokens=c_create,
184
+ )
185
+
186
+ if not assistant_turn.tool_calls:
187
+ break # No tools → conversation turn complete
188
+
189
+ # ── Execute tools ────────────────────────────────────────────────
190
+ for tc in assistant_turn.tool_calls:
191
+ yield ToolStart(tc["name"], tc["input"])
192
+
193
+ # Permission gate
194
+ permitted = _check_permission(tc, config)
195
+ if not permitted:
196
+ if config.get("permission_mode") == "plan":
197
+ # Plan mode: silently deny writes (no user prompt)
198
+ permitted = False
199
+ else:
200
+ req = PermissionRequest(description=_permission_desc(tc))
201
+ yield req
202
+ permitted = req.granted
203
+
204
+ if not permitted:
205
+ if config.get("permission_mode") == "plan":
206
+ plan_file = config.get("_plan_file", "")
207
+ result = (
208
+ f"[Plan mode] Write operations are blocked except to the plan file: {plan_file}\n"
209
+ "Finish your analysis and write the plan to the plan file. "
210
+ "The user will run /plan done to exit plan mode and begin implementation."
211
+ )
212
+ else:
213
+ result = "Denied: user rejected this operation"
214
+ else:
215
+ config["_turn_count"] = state.turn_count
216
+ result = execute_tool(
217
+ tc["name"], tc["input"],
218
+ permission_mode="accept-all", # already gate-checked above
219
+ config=config,
220
+ )
221
+ # time.sleep(1) # Removed delay as requested
222
+
223
+ yield ToolEnd(tc["name"], result, permitted)
224
+
225
+ # Determine what the USER actually saw rendered, based on tool type +
226
+ # auto_show + verbose. Inject a SYSTEM HINT when user saw nothing useful,
227
+ # so the model can decide whether to PrintToConsole the content.
228
+ from tool_registry import is_display_only
229
+ display = is_display_only(tc["name"])
230
+ auto_show_on = config.get("auto_show", True) if config else True
231
+ verbose_on = config.get("verbose", False) if config else False
232
+
233
+ # User-visibility rules (must match dulus.py print_tool_end logic):
234
+ # display tool → user saw full output IF auto_show ON
235
+ # other tool → user saw 500-char preview IF verbose ON
236
+ if display:
237
+ user_saw = auto_show_on
238
+ else:
239
+ user_saw = verbose_on
240
+
241
+ if display and user_saw:
242
+ # Display-only tool the user already saw: replace with placeholder to save tokens.
243
+ result_summary = f"[Display output shown to user: {len(result)} characters]"
244
+ else:
245
+ result_summary = result
246
+
247
+ # Inject the hint when (a) user did not see the content, (b) it's not a
248
+ # purely internal tool, and (c) the call did not error out.
249
+ _internal_tools = {
250
+ "SearchLastOutput", "ReadJob", "TmuxOffload", "MemorySearch",
251
+ "PrintToConsole", "AskUserQuestion", "Write", "Edit",
252
+ }
253
+ if (not user_saw
254
+ and tc["name"] not in _internal_tools
255
+ and not result.startswith(("Error", "Denied"))):
256
+ state_desc = []
257
+ if not auto_show_on: state_desc.append("auto_show OFF")
258
+ if not verbose_on: state_desc.append("verbose OFF")
259
+ state_str = " + ".join(state_desc) or "user-display suppressed"
260
+ result_summary = (
261
+ f"{result_summary}\n\n"
262
+ f"[SYSTEM HINT — {state_str}]\n"
263
+ "The user did NOT see this output rendered (only a brief [OK] line). "
264
+ "If this content is meant for the user (Bash output they asked for, file "
265
+ "they wanted to read, ASCII art they requested), call "
266
+ "PrintToConsole(content=...) or PrintToConsole(file_path=...) NOW to "
267
+ "show it. If this was just internal investigation, ignore this hint."
268
+ )
269
+
270
+ # Record tool result in neutral format
271
+ state.messages.append({
272
+ "role": "tool",
273
+ "tool_call_id": tc["id"],
274
+ "name": tc["name"],
275
+ "content": sanitize_text(result_summary),
276
+ })
277
+
278
+ # ── Truncation Awareness Reminder ────────────────────────────────
279
+ # If the tool output was truncated, the model only saw a fragment.
280
+ # Inject a hard reminder so it cannot honestly claim "X is missing"
281
+ # without first using SearchLastOutput to actually search the file.
282
+ # Skip this check for SearchLastOutput itself to avoid loops.
283
+ if (tc["name"] != "SearchLastOutput"
284
+ and "[TRUNCATED" in result):
285
+ try:
286
+ path = Path.home() / ".dulus" / "last_tool_output.txt"
287
+ if path.exists():
288
+ full_size = path.stat().st_size
289
+ seen_size = len(result)
290
+ if full_size > seen_size:
291
+ with path.open("rb") as _f:
292
+ full_lines = sum(1 for _ in _f)
293
+ state.messages.append({
294
+ "role": "user",
295
+ "content": (
296
+ "[SYSTEM REMINDER — TRUNCATED OUTPUT]\n"
297
+ f"The previous tool result was TRUNCATED. You only saw "
298
+ f"~{seen_size} characters out of {full_size} total "
299
+ f"({full_lines} lines). The full output is saved.\n\n"
300
+ "RULE: You CANNOT claim that any item, font, key, name, "
301
+ "match, or piece of data is missing, absent, or does not "
302
+ "exist based on what you just saw. You only have a fragment.\n\n"
303
+ "BEFORE answering the user's question, you MUST call:\n"
304
+ " SearchLastOutput(pattern=\"<the thing the user asked about>\")\n"
305
+ "to verify against the full saved output. If the user asked "
306
+ "about a specific name, search for that exact name. If they "
307
+ "asked for a count or a list, use SearchLastOutput() with no "
308
+ "pattern to get the full summary.\n\n"
309
+ "Do not answer from memory or guess. Search first."
310
+ ),
311
+ })
312
+ except Exception:
313
+ pass
314
+
315
+
316
+ # ── Helpers ───────────────────────────────────────────────────────────────
317
+
318
+ def _check_permission(tc: dict, config: dict) -> bool:
319
+ """Return True if operation is auto-approved (no need to ask user)."""
320
+ perm_mode = config.get("permission_mode", "auto")
321
+ name = tc["name"]
322
+
323
+ # Plan mode tools are always auto-approved
324
+ if name in ("EnterPlanMode", "ExitPlanMode"):
325
+ return True
326
+
327
+ if perm_mode == "accept-all":
328
+ return True
329
+ if perm_mode == "manual":
330
+ return False # always ask
331
+
332
+ if perm_mode == "plan":
333
+ # Allow writes ONLY to the plan file
334
+ if name in ("Write", "Edit"):
335
+ plan_file = config.get("_plan_file", "")
336
+ target = tc["input"].get("file_path", "")
337
+ if plan_file and target and \
338
+ os.path.normpath(target) == os.path.normpath(plan_file):
339
+ return True
340
+ return False
341
+ if name == "NotebookEdit":
342
+ return False
343
+ if name == "Bash":
344
+ from tools import _is_safe_bash
345
+ return _is_safe_bash(tc["input"].get("command", ""))
346
+ return True # reads are fine
347
+
348
+ # "auto" mode: only ask for writes and non-safe bash
349
+ if name in ("Read", "Glob", "Grep", "WebFetch", "WebSearch"):
350
+ return True
351
+ if name == "Bash":
352
+ from tools import _is_safe_bash
353
+ return _is_safe_bash(tc["input"].get("command", ""))
354
+ return False # Write, Edit → ask
355
+
356
+
357
+ def _permission_desc(tc: dict) -> str:
358
+ name = tc["name"]
359
+ inp = tc["input"]
360
+ if name == "Bash": return f"Run: {inp.get('command', '')}"
361
+ if name == "Write": return f"Write to: {inp.get('file_path', '')}"
362
+ if name == "Edit": return f"Edit: {inp.get('file_path', '')}"
363
+ return f"{name}({list(inp.values())[:1]})"
backend/__init__.py ADDED
@@ -0,0 +1,63 @@
1
+ """Dulus — Backend + Smart Context + Plugins + Personas + MemPalace."""
2
+ __version__ = "0.2.0"
3
+
4
+ # Public API exports
5
+ from backend.context import build_context, build_smart_context, get_compact_context
6
+ from backend.tasks import create_task, load_tasks, update_task
7
+ from backend.personas import (
8
+ get_active_persona,
9
+ get_all_personas,
10
+ get_persona,
11
+ get_personas_for_context,
12
+ set_active_persona,
13
+ create_persona,
14
+ update_persona,
15
+ delete_persona,
16
+ )
17
+ from backend.mempalace_bridge import (
18
+ load_cache,
19
+ refresh_cache,
20
+ get_memories,
21
+ get_mempalace_compact_text,
22
+ get_mempalace_context_block,
23
+ )
24
+ from backend.compressor import compress, compress_compact_context, summarize_memory
25
+ from backend.plugins import load_all_plugins, get_plugin_info, create_example_plugin
26
+ from backend.marketplace import load_registry, get_stats
27
+ __all__ = [
28
+ "__version__",
29
+ # Context
30
+ "build_context",
31
+ "build_smart_context",
32
+ "get_compact_context",
33
+ # Tasks
34
+ "create_task",
35
+ "load_tasks",
36
+ "update_task",
37
+ # Personas
38
+ "get_active_persona",
39
+ "get_all_personas",
40
+ "get_persona",
41
+ "get_personas_for_context",
42
+ "set_active_persona",
43
+ "create_persona",
44
+ "update_persona",
45
+ "delete_persona",
46
+ # MemPalace
47
+ "load_cache",
48
+ "refresh_cache",
49
+ "get_memories",
50
+ "get_mempalace_compact_text",
51
+ "get_mempalace_context_block",
52
+ # Compressor
53
+ "compress",
54
+ "compress_compact_context",
55
+ "summarize_memory",
56
+ # Plugins
57
+ "load_all_plugins",
58
+ "get_plugin_info",
59
+ "create_example_plugin",
60
+ # Marketplace
61
+ "load_registry",
62
+ "get_stats",
63
+ ]
backend/compressor.py ADDED
@@ -0,0 +1,261 @@
1
+ """Hybrid Context Compressor (#29) — qwen2.5:3b via Ollama + rule-based fallback.
2
+
3
+ Zero mandatory dependencies. Uses urllib (stdlib) to probe Ollama.
4
+ If Ollama is unavailable, falls back to intelligent rule-based compression.
5
+ """
6
+ import json
7
+ import re
8
+ import textwrap
9
+ import urllib.request
10
+ from typing import Any
11
+
12
+ OLLAMA_HOST = "http://localhost:11434"
13
+ QWEN_MODEL = "qwen2.5:3b"
14
+ SUMMARIZE_PROMPT = """You are a memory summarizer. Summarize the following user memory into 1-2 sentences that capture the essential meaning. Be concise but preserve all critical facts, names, and relationships.
15
+
16
+ Memory:
17
+ ---
18
+ {text}
19
+ ---
20
+
21
+ Summary:"""
22
+
23
+
24
+ def _ollama_available(timeout: float = 2.0) -> bool:
25
+ """Probe Ollama /api/tags to see if the server is up."""
26
+ try:
27
+ req = urllib.request.Request(
28
+ f"{OLLAMA_HOST}/api/tags",
29
+ method="GET",
30
+ headers={"Accept": "application/json"},
31
+ )
32
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
33
+ return resp.status == 200
34
+ except Exception:
35
+ return False
36
+
37
+
38
+ def _qwen_loaded(timeout: float = 3.0) -> bool:
39
+ """Check if qwen2.5:3b is available in Ollama."""
40
+ try:
41
+ req = urllib.request.Request(
42
+ f"{OLLAMA_HOST}/api/tags",
43
+ method="GET",
44
+ headers={"Accept": "application/json"},
45
+ )
46
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
47
+ data = json.loads(resp.read().decode("utf-8"))
48
+ models = data.get("models", [])
49
+ return any(QWEN_MODEL in m.get("name", "") for m in models)
50
+ except Exception:
51
+ return False
52
+
53
+
54
+ def summarize_with_qwen(text: str, max_tokens: int = 100) -> str:
55
+ """Call Ollama qwen2.5:3b to summarize a memory or text block."""
56
+ prompt = SUMMARIZE_PROMPT.format(text=text[:2000]) # Cap input
57
+ payload = {
58
+ "model": QWEN_MODEL,
59
+ "prompt": prompt,
60
+ "stream": False,
61
+ "options": {
62
+ "num_predict": max_tokens,
63
+ "temperature": 0.2,
64
+ "top_p": 0.7,
65
+ },
66
+ }
67
+ req = urllib.request.Request(
68
+ f"{OLLAMA_HOST}/api/generate",
69
+ data=json.dumps(payload).encode("utf-8"),
70
+ headers={"Content-Type": "application/json"},
71
+ method="POST",
72
+ )
73
+ with urllib.request.urlopen(req, timeout=30) as resp:
74
+ data = json.loads(resp.read().decode("utf-8"))
75
+ return data.get("response", "").strip()
76
+
77
+
78
+ # ─────────── Rule-based Fallback ───────────
79
+
80
+ # Light stopwords — only remove true filler, never technical terms
81
+ STOPWORDS = {
82
+ "the", "a", "an", "is", "are", "was", "were", "be", "been",
83
+ "being", "have", "has", "had", "do", "does", "did",
84
+ "will", "would", "could", "should", "may", "might",
85
+ "to", "of", "in", "for", "on", "with", "at", "by", "from",
86
+ "as", "into", "through", "during", "before", "after",
87
+ "above", "below", "between", "under", "again", "further",
88
+ "then", "once", "here", "there", "when", "where", "why",
89
+ "how", "all", "each", "few", "more", "most", "other",
90
+ "some", "such", "no", "nor", "not", "only", "own", "same",
91
+ "so", "than", "too", "very", "just", "and", "but", "if",
92
+ "or", "because", "until", "while", "this", "that", "these",
93
+ "those",
94
+ }
95
+
96
+
97
+ def _remove_redundant_whitespace(text: str) -> str:
98
+ return re.sub(r"\s+", " ", text).strip()
99
+
100
+
101
+ def _collapse_lists(text: str) -> str:
102
+ """Turn bullet lists into comma-separated when possible."""
103
+ lines = text.split("\n")
104
+ out = []
105
+ i = 0
106
+ while i < len(lines):
107
+ line = lines[i]
108
+ # Detect bullet list block
109
+ if re.match(r"^\s*[•\-\*]\s", line):
110
+ bullets = []
111
+ while i < len(lines) and re.match(r"^\s*[•\-\*]\s", lines[i]):
112
+ bullets.append(re.sub(r"^\s*[•\-\*]\s", "", lines[i]).strip())
113
+ i += 1
114
+ if len(bullets) <= 3:
115
+ out.append("• " + " | ".join(bullets))
116
+ else:
117
+ out.append("• " + bullets[0] + " | ... (" + str(len(bullets)) + " items)")
118
+ continue
119
+ out.append(line)
120
+ i += 1
121
+ return "\n".join(out)
122
+
123
+
124
+ def _strip_stopwords(text: str) -> str:
125
+ """Aggressively remove common stopwords from sentences."""
126
+ words = text.split()
127
+ filtered = []
128
+ for w in words:
129
+ lower = w.lower().strip(".,;:!?()[]{}")
130
+ if lower not in STOPWORDS or w[0].isupper():
131
+ filtered.append(w)
132
+ return " ".join(filtered)
133
+
134
+
135
+ def _abbreviate_status(text: str) -> str:
136
+ """Shorten common status words inside brackets only — avoid damaging names."""
137
+ abbr = {
138
+ "in_progress": "in_prog",
139
+ "completed": "done",
140
+ "cancelled": "canc",
141
+ "deleted": "del",
142
+ }
143
+ # Only replace inside [status] patterns to avoid changing names like "Active Tasks"
144
+ for full, short in abbr.items():
145
+ text = re.sub(rf"\[{full}\]", f"[{short}]", text, flags=re.IGNORECASE)
146
+ return text
147
+
148
+
149
+ def _deduplicate_lines(text: str) -> str:
150
+ """Remove exact duplicate lines."""
151
+ seen = set()
152
+ out = []
153
+ for line in text.split("\n"):
154
+ key = line.strip()
155
+ if key and key in seen:
156
+ continue
157
+ if key:
158
+ seen.add(key)
159
+ out.append(line)
160
+ return "\n".join(out)
161
+
162
+
163
+ def compress_with_rules(text: str, target_tokens: int = 200) -> str:
164
+ """Intelligent rule-based compression — no LLM required.
165
+
166
+ Strategy: preserve all IDs, names, and statuses. Only remove fluff.
167
+ """
168
+ # Phase 1: structural compression (collapse long lists)
169
+ text = _collapse_lists(text)
170
+ text = _deduplicate_lines(text)
171
+
172
+ # Phase 2: clean whitespace
173
+ text = _remove_redundant_whitespace(text)
174
+
175
+ # Phase 3: mild abbreviation only if severely over budget
176
+ est_tokens = max(1, len(text) // 4)
177
+ if est_tokens > target_tokens * 2:
178
+ text = _abbreviate_status(text)
179
+
180
+ # Phase 4: truncate with indicator if still over
181
+ est_tokens = max(1, len(text) // 4)
182
+ if est_tokens > target_tokens:
183
+ max_chars = target_tokens * 4
184
+ # Try to cut at a newline
185
+ truncated = text[:max_chars]
186
+ last_nl = truncated.rfind("\n")
187
+ if last_nl > max_chars * 0.7:
188
+ truncated = truncated[:last_nl]
189
+ text = truncated + "\n[...truncated]"
190
+
191
+ return text
192
+
193
+
194
+ # ─────────── Public API ───────────
195
+
196
+ def compress(text: str, max_tokens: int = 200) -> dict[str, Any]:
197
+ """Compress context using rule-based method.
198
+
199
+ qwen2.5:3b is reserved for memory summarization (summarize_with_qwen)
200
+ because full-context compression is too destructive.
201
+
202
+ Returns dict with:
203
+ - compressed: str
204
+ - method: "rules"
205
+ - before_tokens: int
206
+ - after_tokens: int
207
+ - saved_tokens: int
208
+ """
209
+ before = max(1, len(text) // 4)
210
+ result = compress_with_rules(text, max_tokens)
211
+ after = max(1, len(result) // 4)
212
+ return {
213
+ "compressed": result,
214
+ "method": "rules",
215
+ "before_tokens": before,
216
+ "after_tokens": after,
217
+ "saved_tokens": before - after,
218
+ }
219
+
220
+
221
+ def compress_compact_context(text: str, max_tokens: int = 200) -> str:
222
+ """One-liner: returns just the compressed string."""
223
+ return compress(text, max_tokens)["compressed"]
224
+
225
+
226
+ # Public API alias used by dulus.__init__
227
+ compact = compress_compact_context
228
+
229
+
230
+ def summarize_memory(name: str, body: str) -> str:
231
+ """Use qwen2.5:3b to summarize a single memory body if Ollama is available.
232
+ Falls back to truncating to 120 chars."""
233
+ if not _ollama_available() or not _qwen_loaded():
234
+ return body[:120] + "..." if len(body) > 120 else body
235
+ try:
236
+ summary = summarize_with_qwen(f"Memory '{name}':\n{body}", max_tokens=60)
237
+ if summary and len(summary) > 10:
238
+ return summary
239
+ except Exception:
240
+ pass
241
+ return body[:120] + "..." if len(body) > 120 else body
242
+
243
+
244
+ if __name__ == "__main__":
245
+ sample = (
246
+ "[DULUS CONTEXT]\n"
247
+ "Session: proactive | Agent: Dulus | User: KevRojo\n"
248
+ "Project: Dulus Command Center | Files: 11 | Lines: 2014\n"
249
+ "Active Tasks:\n"
250
+ " • T-002 [in_progress] Smart Context Manager (#23) (Dulus, Core)\n"
251
+ " • T-003 [pending] Plugin System (Dulus, Extensibility)\n"
252
+ " • T-007 [pending] MemPalace Integration (Dulus, Integration)\n"
253
+ "Agents:\n"
254
+ " • Dulus (primary) - active\n"
255
+ " • kimi-code (coder) - idle\n"
256
+ " • kimi-code3 (coder) - idle\n"
257
+ "Recent Commits:\n"
258
+ " • 865c915 Add Dulus Task Dashboard at docs/dashboard/index.html by Nano Agent\n"
259
+ )
260
+ print("Rule-based compression:")
261
+ print(compress(sample)["compressed"])