clm-plugin 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- clm_plugin-0.1.0/.gitignore +43 -0
- clm_plugin-0.1.0/CHANGELOG.md +43 -0
- clm_plugin-0.1.0/CONTRIBUTING.md +43 -0
- clm_plugin-0.1.0/PKG-INFO +301 -0
- clm_plugin-0.1.0/README.md +275 -0
- clm_plugin-0.1.0/clm/__init__.py +59 -0
- clm_plugin-0.1.0/clm/adapters/__init__.py +11 -0
- clm_plugin-0.1.0/clm/adapters/langchain_adapter.py +121 -0
- clm_plugin-0.1.0/clm/adapters/loop_adapter.py +135 -0
- clm_plugin-0.1.0/clm/adapters/openai_adapter.py +94 -0
- clm_plugin-0.1.0/clm/cognitive_load_manager.py +411 -0
- clm_plugin-0.1.0/clm/core/__init__.py +1 -0
- clm_plugin-0.1.0/clm/core/action_dispatcher.py +292 -0
- clm_plugin-0.1.0/clm/core/chunking_engine.py +277 -0
- clm_plugin-0.1.0/clm/core/config.py +72 -0
- clm_plugin-0.1.0/clm/core/models.py +149 -0
- clm_plugin-0.1.0/clm/core/scorer.py +72 -0
- clm_plugin-0.1.0/clm/core/signal_collector.py +264 -0
- clm_plugin-0.1.0/clm/exceptions.py +66 -0
- clm_plugin-0.1.0/clm/storage/__init__.py +5 -0
- clm_plugin-0.1.0/clm/storage/sidecar_store.py +290 -0
- clm_plugin-0.1.0/clm/utils/__init__.py +1 -0
- clm_plugin-0.1.0/clm/utils/auto_state.py +117 -0
- clm_plugin-0.1.0/clm/utils/embeddings.py +123 -0
- clm_plugin-0.1.0/conftest.py +25 -0
- clm_plugin-0.1.0/examples/basic_agent_loop.py +222 -0
- clm_plugin-0.1.0/examples/langchain_example.py +16 -0
- clm_plugin-0.1.0/examples/quickstart.py +23 -0
- clm_plugin-0.1.0/pyproject.toml +36 -0
- clm_plugin-0.1.0/pytest.ini +3 -0
- clm_plugin-0.1.0/requirements-dev.txt +4 -0
- clm_plugin-0.1.0/requirements.txt +2 -0
- clm_plugin-0.1.0/tests/__init__.py +1 -0
- clm_plugin-0.1.0/tests/fixtures/__init__.py +1 -0
- clm_plugin-0.1.0/tests/fixtures/strategies.py +238 -0
- clm_plugin-0.1.0/tests/integration/__init__.py +1 -0
- clm_plugin-0.1.0/tests/integration/test_end_to_end.py +516 -0
- clm_plugin-0.1.0/tests/property/__init__.py +1 -0
- clm_plugin-0.1.0/tests/unit/__init__.py +1 -0
- clm_plugin-0.1.0/tests/unit/test_action_dispatcher_counters.py +176 -0
- clm_plugin-0.1.0/tests/unit/test_chunking_engine.py +356 -0
- clm_plugin-0.1.0/tests/unit/test_cognitive_load_manager.py +223 -0
- clm_plugin-0.1.0/tests/unit/test_config.py +121 -0
- clm_plugin-0.1.0/tests/unit/test_models.py +420 -0
- clm_plugin-0.1.0/tests/unit/test_scorer.py +151 -0
- clm_plugin-0.1.0/tests/unit/test_sidecar_store.py +177 -0
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Python bytecode
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.pyo
|
|
6
|
+
|
|
7
|
+
# Distribution / packaging
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
*.egg-info/
|
|
11
|
+
*.egg
|
|
12
|
+
.eggs/
|
|
13
|
+
|
|
14
|
+
# Virtual environments
|
|
15
|
+
.venv/
|
|
16
|
+
venv/
|
|
17
|
+
env/
|
|
18
|
+
.env
|
|
19
|
+
|
|
20
|
+
# Testing & coverage
|
|
21
|
+
.coverage
|
|
22
|
+
.coverage.*
|
|
23
|
+
htmlcov/
|
|
24
|
+
.pytest_cache/
|
|
25
|
+
.hypothesis/
|
|
26
|
+
|
|
27
|
+
# IDE & editor
|
|
28
|
+
.idea/
|
|
29
|
+
.vscode/
|
|
30
|
+
*.swp
|
|
31
|
+
*.swo
|
|
32
|
+
.DS_Store
|
|
33
|
+
|
|
34
|
+
# Kiro AI tool specs (internal only)
|
|
35
|
+
.kiro/
|
|
36
|
+
|
|
37
|
+
# CLM runtime artifacts
|
|
38
|
+
clm_sidecar.db
|
|
39
|
+
clm.db
|
|
40
|
+
*.db
|
|
41
|
+
|
|
42
|
+
# Logs
|
|
43
|
+
*.log
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
## [0.1.0] — 2025-03-31
|
|
6
|
+
|
|
7
|
+
### First public release
|
|
8
|
+
|
|
9
|
+
**Core architecture**
|
|
10
|
+
- 5-layer cognitive load management: Signal Collector, CLM Scorer, Chunking Engine, Action Dispatcher, Sidecar Store
|
|
11
|
+
- 4 cognitive load signals: branching factor, repetition rate, uncertainty density, goal distance
|
|
12
|
+
- 3-zone intervention system: Green (pass), Amber (compress), Red (interrupt)
|
|
13
|
+
- Abort action for structurally unresolvable tasks (5 consecutive Red triggers)
|
|
14
|
+
- Amber escalation protection (3 consecutive Amber triggers → Red)
|
|
15
|
+
|
|
16
|
+
**Integration**
|
|
17
|
+
- `CLM()` — zero-argument instantiation with sensible defaults
|
|
18
|
+
- `observe_raw()` — single-line integration, no TaskState construction required
|
|
19
|
+
- `AutoStateBuilder` — automatic task tree inference from LLM outputs
|
|
20
|
+
- LangChain adapter: `CLMCallbackHandler`
|
|
21
|
+
- OpenAI Agents SDK adapter: `CLMOpenAIHook`
|
|
22
|
+
- Generic loop adapter: `CLMLoop` with decorator and context manager support
|
|
23
|
+
|
|
24
|
+
**Observability**
|
|
25
|
+
- `verbose=True` — real-time step-by-step output
|
|
26
|
+
- `get_history()` — full intervention log
|
|
27
|
+
- `summary()` — session aggregate stats
|
|
28
|
+
- `get_score()`, `get_zone()`, `get_sidecar_stats()`
|
|
29
|
+
|
|
30
|
+
**Configuration**
|
|
31
|
+
- `no_embed=True` — keyword-based fallback, zero model download, works offline
|
|
32
|
+
- Fully tunable weights, thresholds, and zone boundaries
|
|
33
|
+
- Domain-specific configuration examples: medical, legal, voice
|
|
34
|
+
|
|
35
|
+
**Storage**
|
|
36
|
+
- SQLite sidecar store, auto-created on first use
|
|
37
|
+
- In-memory mode (default) for ephemeral sessions
|
|
38
|
+
|
|
39
|
+
**Known limitations**
|
|
40
|
+
- Default weights `[0.30, 0.25, 0.25, 0.20]` are heuristic, not empirically validated
|
|
41
|
+
- AutoStateBuilder uses regex heuristics for task tree inference
|
|
42
|
+
- `response.context` replaces task plan section only, not full conversation history
|
|
43
|
+
- Embedding model requires ~90MB download on first use (avoidable with `no_embed=True`)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Contributing to CLM
|
|
2
|
+
|
|
3
|
+
CLM is an open research project. Contributions, bug reports, and real-world usage reports are the most valuable thing you can give right now.
|
|
4
|
+
|
|
5
|
+
## What we need most
|
|
6
|
+
|
|
7
|
+
1. **Real-world usage reports** — Did CLM fire when it shouldn't? Did it miss an overload? Open an issue with your domain, agent type, and what happened.
|
|
8
|
+
2. **Domain-specific weight tuning** — Found weights that work well for medical, legal, financial, or other domains? Submit a PR with a config example.
|
|
9
|
+
3. **Bug reports** — Especially integration issues with specific agent frameworks.
|
|
10
|
+
4. **Adapter contributions** — CrewAI, AutoGen, LlamaIndex, smolagents adapters welcome.
|
|
11
|
+
|
|
12
|
+
## Getting started
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
git clone https://github.com/ragul-rofi/CognitiveLoadManager
|
|
16
|
+
cd CognitiveLoadManager
|
|
17
|
+
pip install -e ".[dev]"
|
|
18
|
+
pytest tests/unit/ -q # fast, no internet needed
|
|
19
|
+
pytest tests/integration/ -q # also offline-safe
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Running tests
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pytest tests/unit/ # unit tests — always offline
|
|
26
|
+
pytest tests/integration/ # integration tests — offline-safe (uses no_embed=True)
|
|
27
|
+
pytest tests/ --cov=clm # with coverage
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Principles
|
|
31
|
+
|
|
32
|
+
- CLM must never crash an agent loop. Every failure path must return a valid InterventionResponse.
|
|
33
|
+
- Every new public method needs a docstring.
|
|
34
|
+
- New signals or intervention types go through an issue first — discuss before building.
|
|
35
|
+
- The core scoring formula is intentionally tunable. Don't hardcode domain assumptions.
|
|
36
|
+
|
|
37
|
+
## Roadmap (planned for v0.2)
|
|
38
|
+
|
|
39
|
+
- Empirically validated default weights from real agent failure data
|
|
40
|
+
- Explicit `clm.expand(task_id)` API for agent-initiated context recovery
|
|
41
|
+
- Async `aobserve()` and `aobserve_raw()` for async agent frameworks
|
|
42
|
+
- CrewAI and AutoGen adapters
|
|
43
|
+
- Compression cooldown to prevent amber loop
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: clm-plugin
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Cognitive Load Manager — real-time metacognitive middleware for LLM agents
|
|
5
|
+
Project-URL: Homepage, https://github.com/ragul-rofi/CognitiveLoadManager
|
|
6
|
+
Project-URL: Documentation, https://github.com/ragul-rofi/CognitiveLoadManager#readme
|
|
7
|
+
Project-URL: Issues, https://github.com/ragul-rofi/CognitiveLoadManager/issues
|
|
8
|
+
License: MIT
|
|
9
|
+
Keywords: agent,ai,cognitive,langchain,llm,memory,metacognition,middleware,openai
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Requires-Dist: numpy>=1.24.0
|
|
18
|
+
Requires-Dist: sentence-transformers>=2.2.0
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: hypothesis>=6.90.0; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest-cov; extra == 'dev'
|
|
22
|
+
Requires-Dist: pytest>=7.4.0; extra == 'dev'
|
|
23
|
+
Provides-Extra: langchain
|
|
24
|
+
Requires-Dist: langchain>=0.1.0; extra == 'langchain'
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# CLM — Cognitive Load Manager
|
|
28
|
+
|
|
29
|
+
Real-time metacognitive middleware for LLM agents. Detects when your agent is cognitively overloaded and intervenes before it hallucinates, drifts, or crashes.
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install clm-agent
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Quickstart — 3 lines
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from clm import CLM
|
|
39
|
+
|
|
40
|
+
clm = CLM(verbose=True)
|
|
41
|
+
|
|
42
|
+
# In your agent loop, replace nothing — just add one line:
|
|
43
|
+
result = clm.observe_raw(llm_output)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
That's it. CLM automatically builds its own internal task tree from your agent's outputs.
|
|
47
|
+
|
|
48
|
+
## What it does
|
|
49
|
+
|
|
50
|
+
CLM wraps your agent loop and monitors 4 cognitive signals after every LLM call:
|
|
51
|
+
|
|
52
|
+
- **Branching** — how many tasks are in flight simultaneously
|
|
53
|
+
- **Repetition** — is the agent going in circles
|
|
54
|
+
- **Uncertainty** — is the agent hedging and guessing
|
|
55
|
+
- **Goal drift** — has the agent wandered from the original intent
|
|
56
|
+
|
|
57
|
+
It combines these into a single CLM score (0–100) and acts:
|
|
58
|
+
|
|
59
|
+
| Zone | Score | Action |
|
|
60
|
+
|------|-------|--------|
|
|
61
|
+
| Green | 0–40 | Pass through — no intervention |
|
|
62
|
+
| Amber | 40–70 | Compress deep task branches, patch context |
|
|
63
|
+
| Red | 70–100 | Full compression + goal re-anchor + clarification request |
|
|
64
|
+
|
|
65
|
+
## Integration patterns
|
|
66
|
+
|
|
67
|
+
### Important: Context Patch Behavior
|
|
68
|
+
|
|
69
|
+
⚠️ **response.context replaces only your task plan section, not your full conversation history**
|
|
70
|
+
|
|
71
|
+
When CLM returns `action="patch"`, the `response.context` field contains a compressed representation of your task tree. You should inject this into the task planning portion of your prompt, not replace your entire conversation history.
|
|
72
|
+
|
|
73
|
+
**Correct usage:**
|
|
74
|
+
```python
|
|
75
|
+
# Maintain conversation history, update only task section
|
|
76
|
+
conversation_history = [...] # Your full conversation
|
|
77
|
+
task_section = result.context if result.action == "patch" else current_task_plan
|
|
78
|
+
|
|
79
|
+
prompt = f"""
|
|
80
|
+
Conversation so far:
|
|
81
|
+
{conversation_history}
|
|
82
|
+
|
|
83
|
+
Current task structure:
|
|
84
|
+
{task_section}
|
|
85
|
+
|
|
86
|
+
Continue working on the task.
|
|
87
|
+
"""
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
**Incorrect usage:**
|
|
91
|
+
```python
|
|
92
|
+
# DON'T DO THIS - overwrites entire conversation
|
|
93
|
+
if result.action == "patch":
|
|
94
|
+
prompt = result.context # ❌ Loses all conversation history
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### Pattern 1 — Minimal (observe_raw)
|
|
98
|
+
No task state construction needed. Just feed outputs.
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from clm import CLM
|
|
102
|
+
|
|
103
|
+
clm = CLM(verbose=True)
|
|
104
|
+
|
|
105
|
+
while not done:
|
|
106
|
+
output = call_llm(prompt)
|
|
107
|
+
result = clm.observe_raw(output)
|
|
108
|
+
|
|
109
|
+
if result.action == "interrupt":
|
|
110
|
+
prompt = f"Clarification needed: {result.clarification}"
|
|
111
|
+
elif result.action == "patch":
|
|
112
|
+
context = result.context # use compressed context in next call
|
|
113
|
+
|
|
114
|
+
print(clm.summary())
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Pattern 2 — LangChain (one line)
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
from clm.adapters import CLMCallbackHandler
|
|
121
|
+
|
|
122
|
+
handler = CLMCallbackHandler(verbose=True)
|
|
123
|
+
agent.run("your task", callbacks=[handler])
|
|
124
|
+
|
|
125
|
+
print(handler.clm.summary())
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Pattern 3 — Decorator (raw loop)
|
|
129
|
+
|
|
130
|
+
```python
|
|
131
|
+
from clm.adapters import CLMLoop
|
|
132
|
+
|
|
133
|
+
loop = CLMLoop(verbose=True)
|
|
134
|
+
|
|
135
|
+
@loop
|
|
136
|
+
def agent_step(prompt: str) -> str:
|
|
137
|
+
return openai_client.chat(prompt)
|
|
138
|
+
|
|
139
|
+
# Call normally — CLM wraps every call
|
|
140
|
+
for i in range(max_steps):
|
|
141
|
+
output = agent_step(current_prompt)
|
|
142
|
+
if loop.clm.get_zone() == "Red":
|
|
143
|
+
break
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Pattern 4 — Full control (manual TaskState)
|
|
147
|
+
|
|
148
|
+
```python
|
|
149
|
+
from clm import CLM, CLMConfig
|
|
150
|
+
from clm.core.models import TaskState, TaskTree, TaskNode
|
|
151
|
+
|
|
152
|
+
clm = CLM(CLMConfig(verbose_signals=True), verbose=True)
|
|
153
|
+
|
|
154
|
+
task_state = TaskState(task_tree=your_tree, ...)
|
|
155
|
+
result = clm.observe(llm_output, task_state)
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
## Observability
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
clm.get_score() # current CLM score (0–100)
|
|
162
|
+
clm.get_zone() # "Green" | "Amber" | "Red"
|
|
163
|
+
clm.get_history() # full step-by-step intervention log
|
|
164
|
+
clm.summary() # aggregate stats for the session
|
|
165
|
+
clm.get_sidecar_stats() # compressed task storage stats
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## No internet? No GPU? Use no_embed mode
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
from clm import CLM, CLMConfig
|
|
172
|
+
|
|
173
|
+
clm = CLM(CLMConfig(no_embed=True)) # keyword-based signals, no model download
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## Configuration
|
|
177
|
+
|
|
178
|
+
```python
|
|
179
|
+
from clm import CLMConfig
|
|
180
|
+
|
|
181
|
+
config = CLMConfig(
|
|
182
|
+
branching_threshold=7, # active tasks before normalising to 1.0
|
|
183
|
+
repetition_threshold=0.85,
|
|
184
|
+
uncertainty_threshold=0.15,
|
|
185
|
+
weights=[0.30, 0.25, 0.25, 0.20], # must sum to 1.0
|
|
186
|
+
green_max=40.0,
|
|
187
|
+
amber_max=70.0,
|
|
188
|
+
no_embed=False, # set True to skip model download
|
|
189
|
+
storage_type="sqlite",
|
|
190
|
+
storage_params={"db_path": "clm.db"}, # omit for in-memory
|
|
191
|
+
)
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### Tuning Weights for Your Domain
|
|
195
|
+
|
|
196
|
+
⚠️ **Default weights are informed heuristics, not empirically validated. Tune them for your domain using `CLMConfig(weights=[...])`.**
|
|
197
|
+
|
|
198
|
+
The default weights `[0.30, 0.25, 0.25, 0.20]` (branching, repetition, uncertainty, goal_distance) are based on reasoning about general agent behavior, not empirical validation across domains.
|
|
199
|
+
|
|
200
|
+
**How to tune:**
|
|
201
|
+
|
|
202
|
+
1. **Start with defaults** and observe your agent's behavior with `verbose=True`
|
|
203
|
+
2. **Identify false positives**: If CLM interrupts when your agent is working correctly, reduce the weight of the signal that triggered the intervention
|
|
204
|
+
3. **Identify false negatives**: If CLM doesn't intervene when your agent is struggling, increase the weight of the signal that should have triggered intervention
|
|
205
|
+
|
|
206
|
+
**Example: Reducing false positives from branching**
|
|
207
|
+
|
|
208
|
+
If your agent legitimately needs to track many parallel tasks (e.g., data pipeline orchestration), but CLM keeps interrupting:
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
# Default: branching weight = 0.30
|
|
212
|
+
config = CLMConfig(weights=[0.15, 0.30, 0.30, 0.25]) # Reduce branching to 0.15
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
**Example: Increasing sensitivity to goal drift**
|
|
216
|
+
|
|
217
|
+
If your agent frequently wanders off-task but CLM doesn't catch it:
|
|
218
|
+
|
|
219
|
+
```python
|
|
220
|
+
# Default: goal_distance weight = 0.20
|
|
221
|
+
config = CLMConfig(weights=[0.25, 0.20, 0.20, 0.35]) # Increase goal_distance to 0.35
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
Remember: weights must sum to 1.0.
|
|
225
|
+
|
|
226
|
+
## Domain-Specific Configuration
|
|
227
|
+
|
|
228
|
+
CLM's default weights are tuned for general-purpose agent tasks. Different domains benefit from different signal priorities:
|
|
229
|
+
|
|
230
|
+
### Medical Diagnosis Assistant
|
|
231
|
+
|
|
232
|
+
Prioritize goal distance (staying on diagnostic protocol) and minimize false interruptions:
|
|
233
|
+
|
|
234
|
+
```python
|
|
235
|
+
from clm import CLM, CLMConfig
|
|
236
|
+
|
|
237
|
+
config = CLMConfig(
|
|
238
|
+
weights=[0.20, 0.20, 0.15, 0.45], # Heavy weight on goal_distance
|
|
239
|
+
green_max=50.0, # Higher tolerance before intervention
|
|
240
|
+
amber_max=75.0,
|
|
241
|
+
branching_threshold=5, # Medical protocols are often sequential
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
clm = CLM(config, verbose=True)
|
|
245
|
+
```
|
|
246
|
+
|
|
247
|
+
**Rationale:** Medical diagnosis requires strict adherence to diagnostic protocols. Goal drift is the most critical signal, while branching is less concerning since medical workflows are often linear.
|
|
248
|
+
|
|
249
|
+
### Legal Document Analysis
|
|
250
|
+
|
|
251
|
+
Prioritize repetition detection (circular reasoning) and uncertainty (hedging language):
|
|
252
|
+
|
|
253
|
+
```python
|
|
254
|
+
config = CLMConfig(
|
|
255
|
+
weights=[0.15, 0.35, 0.35, 0.15], # Heavy weight on repetition and uncertainty
|
|
256
|
+
repetition_threshold=0.75, # Lower threshold for detecting circular reasoning
|
|
257
|
+
uncertainty_threshold=0.20, # Higher tolerance for legal hedging language
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
clm = CLM(config, verbose=True)
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
**Rationale:** Legal analysis must avoid circular reasoning and excessive hedging. Repetition and uncertainty are critical signals, while branching (considering multiple legal precedents) is expected behavior.
|
|
264
|
+
|
|
265
|
+
### Voice Assistant
|
|
266
|
+
|
|
267
|
+
Prioritize branching (context switching) and goal distance (staying on user intent):
|
|
268
|
+
|
|
269
|
+
```python
|
|
270
|
+
config = CLMConfig(
|
|
271
|
+
weights=[0.40, 0.15, 0.15, 0.30], # Heavy weight on branching and goal_distance
|
|
272
|
+
branching_threshold=3, # Voice interactions should stay focused
|
|
273
|
+
green_max=35.0, # Lower tolerance for intervention
|
|
274
|
+
)
|
|
275
|
+
|
|
276
|
+
clm = CLM(config, verbose=True)
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
**Rationale:** Voice assistants must maintain tight focus on user intent and avoid context switching. Branching and goal distance are critical, while repetition and uncertainty are less concerning in conversational contexts.
|
|
280
|
+
|
|
281
|
+
## Architecture
|
|
282
|
+
|
|
283
|
+
5 layers, each independently testable:
|
|
284
|
+
|
|
285
|
+
```
|
|
286
|
+
Agent loop output
|
|
287
|
+
↓
|
|
288
|
+
Signal Collector — extracts 4 cognitive signals
|
|
289
|
+
↓
|
|
290
|
+
CLM Scorer — weighted score → zone classification
|
|
291
|
+
↓
|
|
292
|
+
Action Dispatcher — routes to Green / Amber / Red handler
|
|
293
|
+
↓
|
|
294
|
+
Chunking Engine — compress · anchor · expand
|
|
295
|
+
↓
|
|
296
|
+
Sidecar Store — SQLite persistence for compressed tasks
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
## License
|
|
300
|
+
|
|
301
|
+
MIT
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
# CLM — Cognitive Load Manager
|
|
2
|
+
|
|
3
|
+
Real-time metacognitive middleware for LLM agents. Detects when your agent is cognitively overloaded and intervenes before it hallucinates, drifts, or crashes.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
pip install clm-agent
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
## Quickstart — 3 lines
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
from clm import CLM
|
|
13
|
+
|
|
14
|
+
clm = CLM(verbose=True)
|
|
15
|
+
|
|
16
|
+
# In your agent loop, replace nothing — just add one line:
|
|
17
|
+
result = clm.observe_raw(llm_output)
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
That's it. CLM automatically builds its own internal task tree from your agent's outputs.
|
|
21
|
+
|
|
22
|
+
## What it does
|
|
23
|
+
|
|
24
|
+
CLM wraps your agent loop and monitors 4 cognitive signals after every LLM call:
|
|
25
|
+
|
|
26
|
+
- **Branching** — how many tasks are in flight simultaneously
|
|
27
|
+
- **Repetition** — is the agent going in circles
|
|
28
|
+
- **Uncertainty** — is the agent hedging and guessing
|
|
29
|
+
- **Goal drift** — has the agent wandered from the original intent
|
|
30
|
+
|
|
31
|
+
It combines these into a single CLM score (0–100) and acts:
|
|
32
|
+
|
|
33
|
+
| Zone | Score | Action |
|
|
34
|
+
|------|-------|--------|
|
|
35
|
+
| Green | 0–40 | Pass through — no intervention |
|
|
36
|
+
| Amber | 40–70 | Compress deep task branches, patch context |
|
|
37
|
+
| Red | 70–100 | Full compression + goal re-anchor + clarification request |
|
|
38
|
+
|
|
39
|
+
## Integration patterns
|
|
40
|
+
|
|
41
|
+
### Important: Context Patch Behavior
|
|
42
|
+
|
|
43
|
+
⚠️ **response.context replaces only your task plan section, not your full conversation history**
|
|
44
|
+
|
|
45
|
+
When CLM returns `action="patch"`, the `response.context` field contains a compressed representation of your task tree. You should inject this into the task planning portion of your prompt, not replace your entire conversation history.
|
|
46
|
+
|
|
47
|
+
**Correct usage:**
|
|
48
|
+
```python
|
|
49
|
+
# Maintain conversation history, update only task section
|
|
50
|
+
conversation_history = [...] # Your full conversation
|
|
51
|
+
task_section = result.context if result.action == "patch" else current_task_plan
|
|
52
|
+
|
|
53
|
+
prompt = f"""
|
|
54
|
+
Conversation so far:
|
|
55
|
+
{conversation_history}
|
|
56
|
+
|
|
57
|
+
Current task structure:
|
|
58
|
+
{task_section}
|
|
59
|
+
|
|
60
|
+
Continue working on the task.
|
|
61
|
+
"""
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
**Incorrect usage:**
|
|
65
|
+
```python
|
|
66
|
+
# DON'T DO THIS - overwrites entire conversation
|
|
67
|
+
if result.action == "patch":
|
|
68
|
+
prompt = result.context # ❌ Loses all conversation history
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### Pattern 1 — Minimal (observe_raw)
|
|
72
|
+
No task state construction needed. Just feed outputs.
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from clm import CLM
|
|
76
|
+
|
|
77
|
+
clm = CLM(verbose=True)
|
|
78
|
+
|
|
79
|
+
while not done:
|
|
80
|
+
output = call_llm(prompt)
|
|
81
|
+
result = clm.observe_raw(output)
|
|
82
|
+
|
|
83
|
+
if result.action == "interrupt":
|
|
84
|
+
prompt = f"Clarification needed: {result.clarification}"
|
|
85
|
+
elif result.action == "patch":
|
|
86
|
+
context = result.context # use compressed context in next call
|
|
87
|
+
|
|
88
|
+
print(clm.summary())
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Pattern 2 — LangChain (one line)
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from clm.adapters import CLMCallbackHandler
|
|
95
|
+
|
|
96
|
+
handler = CLMCallbackHandler(verbose=True)
|
|
97
|
+
agent.run("your task", callbacks=[handler])
|
|
98
|
+
|
|
99
|
+
print(handler.clm.summary())
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Pattern 3 — Decorator (raw loop)
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from clm.adapters import CLMLoop
|
|
106
|
+
|
|
107
|
+
loop = CLMLoop(verbose=True)
|
|
108
|
+
|
|
109
|
+
@loop
|
|
110
|
+
def agent_step(prompt: str) -> str:
|
|
111
|
+
return openai_client.chat(prompt)
|
|
112
|
+
|
|
113
|
+
# Call normally — CLM wraps every call
|
|
114
|
+
for i in range(max_steps):
|
|
115
|
+
output = agent_step(current_prompt)
|
|
116
|
+
if loop.clm.get_zone() == "Red":
|
|
117
|
+
break
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Pattern 4 — Full control (manual TaskState)
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
from clm import CLM, CLMConfig
|
|
124
|
+
from clm.core.models import TaskState, TaskTree, TaskNode
|
|
125
|
+
|
|
126
|
+
clm = CLM(CLMConfig(verbose_signals=True), verbose=True)
|
|
127
|
+
|
|
128
|
+
task_state = TaskState(task_tree=your_tree, ...)
|
|
129
|
+
result = clm.observe(llm_output, task_state)
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Observability
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
clm.get_score() # current CLM score (0–100)
|
|
136
|
+
clm.get_zone() # "Green" | "Amber" | "Red"
|
|
137
|
+
clm.get_history() # full step-by-step intervention log
|
|
138
|
+
clm.summary() # aggregate stats for the session
|
|
139
|
+
clm.get_sidecar_stats() # compressed task storage stats
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
## No internet? No GPU? Use no_embed mode
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
from clm import CLM, CLMConfig
|
|
146
|
+
|
|
147
|
+
clm = CLM(CLMConfig(no_embed=True)) # keyword-based signals, no model download
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
## Configuration
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
from clm import CLMConfig
|
|
154
|
+
|
|
155
|
+
config = CLMConfig(
|
|
156
|
+
branching_threshold=7, # active tasks before normalising to 1.0
|
|
157
|
+
repetition_threshold=0.85,
|
|
158
|
+
uncertainty_threshold=0.15,
|
|
159
|
+
weights=[0.30, 0.25, 0.25, 0.20], # must sum to 1.0
|
|
160
|
+
green_max=40.0,
|
|
161
|
+
amber_max=70.0,
|
|
162
|
+
no_embed=False, # set True to skip model download
|
|
163
|
+
storage_type="sqlite",
|
|
164
|
+
storage_params={"db_path": "clm.db"}, # omit for in-memory
|
|
165
|
+
)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Tuning Weights for Your Domain
|
|
169
|
+
|
|
170
|
+
⚠️ **Default weights are informed heuristics, not empirically validated. Tune them for your domain using `CLMConfig(weights=[...])`.**
|
|
171
|
+
|
|
172
|
+
The default weights `[0.30, 0.25, 0.25, 0.20]` (branching, repetition, uncertainty, goal_distance) are based on reasoning about general agent behavior, not empirical validation across domains.
|
|
173
|
+
|
|
174
|
+
**How to tune:**
|
|
175
|
+
|
|
176
|
+
1. **Start with defaults** and observe your agent's behavior with `verbose=True`
|
|
177
|
+
2. **Identify false positives**: If CLM interrupts when your agent is working correctly, reduce the weight of the signal that triggered the intervention
|
|
178
|
+
3. **Identify false negatives**: If CLM doesn't intervene when your agent is struggling, increase the weight of the signal that should have triggered intervention
|
|
179
|
+
|
|
180
|
+
**Example: Reducing false positives from branching**
|
|
181
|
+
|
|
182
|
+
If your agent legitimately needs to track many parallel tasks (e.g., data pipeline orchestration), but CLM keeps interrupting:
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
# Default: branching weight = 0.30
|
|
186
|
+
config = CLMConfig(weights=[0.15, 0.30, 0.30, 0.25]) # Reduce branching to 0.15
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**Example: Increasing sensitivity to goal drift**
|
|
190
|
+
|
|
191
|
+
If your agent frequently wanders off-task but CLM doesn't catch it:
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
# Default: goal_distance weight = 0.20
|
|
195
|
+
config = CLMConfig(weights=[0.25, 0.20, 0.20, 0.35]) # Increase goal_distance to 0.35
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
Remember: weights must sum to 1.0.
|
|
199
|
+
|
|
200
|
+
## Domain-Specific Configuration
|
|
201
|
+
|
|
202
|
+
CLM's default weights are tuned for general-purpose agent tasks. Different domains benefit from different signal priorities:
|
|
203
|
+
|
|
204
|
+
### Medical Diagnosis Assistant
|
|
205
|
+
|
|
206
|
+
Prioritize goal distance (staying on diagnostic protocol) and minimize false interruptions:
|
|
207
|
+
|
|
208
|
+
```python
|
|
209
|
+
from clm import CLM, CLMConfig
|
|
210
|
+
|
|
211
|
+
config = CLMConfig(
|
|
212
|
+
weights=[0.20, 0.20, 0.15, 0.45], # Heavy weight on goal_distance
|
|
213
|
+
green_max=50.0, # Higher tolerance before intervention
|
|
214
|
+
amber_max=75.0,
|
|
215
|
+
branching_threshold=5, # Medical protocols are often sequential
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
clm = CLM(config, verbose=True)
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
**Rationale:** Medical diagnosis requires strict adherence to diagnostic protocols. Goal drift is the most critical signal, while branching is less concerning since medical workflows are often linear.
|
|
222
|
+
|
|
223
|
+
### Legal Document Analysis
|
|
224
|
+
|
|
225
|
+
Prioritize repetition detection (circular reasoning) and uncertainty (hedging language):
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
config = CLMConfig(
|
|
229
|
+
weights=[0.15, 0.35, 0.35, 0.15], # Heavy weight on repetition and uncertainty
|
|
230
|
+
repetition_threshold=0.75, # Lower threshold for detecting circular reasoning
|
|
231
|
+
uncertainty_threshold=0.20, # Higher tolerance for legal hedging language
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
clm = CLM(config, verbose=True)
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
**Rationale:** Legal analysis must avoid circular reasoning and excessive hedging. Repetition and uncertainty are critical signals, while branching (considering multiple legal precedents) is expected behavior.
|
|
238
|
+
|
|
239
|
+
### Voice Assistant
|
|
240
|
+
|
|
241
|
+
Prioritize branching (context switching) and goal distance (staying on user intent):
|
|
242
|
+
|
|
243
|
+
```python
|
|
244
|
+
config = CLMConfig(
|
|
245
|
+
weights=[0.40, 0.15, 0.15, 0.30], # Heavy weight on branching and goal_distance
|
|
246
|
+
branching_threshold=3, # Voice interactions should stay focused
|
|
247
|
+
green_max=35.0, # Lower tolerance for intervention
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
clm = CLM(config, verbose=True)
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
**Rationale:** Voice assistants must maintain tight focus on user intent and avoid context switching. Branching and goal distance are critical, while repetition and uncertainty are less concerning in conversational contexts.
|
|
254
|
+
|
|
255
|
+
## Architecture
|
|
256
|
+
|
|
257
|
+
5 layers, each independently testable:
|
|
258
|
+
|
|
259
|
+
```
|
|
260
|
+
Agent loop output
|
|
261
|
+
↓
|
|
262
|
+
Signal Collector — extracts 4 cognitive signals
|
|
263
|
+
↓
|
|
264
|
+
CLM Scorer — weighted score → zone classification
|
|
265
|
+
↓
|
|
266
|
+
Action Dispatcher — routes to Green / Amber / Red handler
|
|
267
|
+
↓
|
|
268
|
+
Chunking Engine — compress · anchor · expand
|
|
269
|
+
↓
|
|
270
|
+
Sidecar Store — SQLite persistence for compressed tasks
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
## License
|
|
274
|
+
|
|
275
|
+
MIT
|