memctrl 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
memctrl/rules.py ADDED
@@ -0,0 +1,330 @@
1
+ """MemCtrl — .memoryrc parser and rule engine.
2
+
3
+ Uses TOML for configuration (native tomllib in Python 3.11+).
4
+ Implements hot-reload via watchdog and trigger execution.
5
+
6
+ Research: TOML is the best format for .memoryrc — native Python support,
7
+ clean syntax for rules, no external parser needed on 3.11+.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import copy
13
+ import os
14
+ import re
15
+ import sys
16
+ from dataclasses import dataclass, field
17
+ from datetime import datetime, timedelta
18
+ from pathlib import Path
19
+ from typing import Any, Dict, List, Optional
20
+
21
+ # ---------------------------------------------------------------------------
22
+ # TOML parser compatibility
23
+ # ---------------------------------------------------------------------------
24
+
25
+ if sys.version_info >= (3, 11):
26
+ import tomllib
27
+ else:
28
+ try:
29
+ import tomli as tomllib
30
+ except ImportError:
31
+ tomllib = None
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # Data model
35
+ # ---------------------------------------------------------------------------
36
+
37
+ @dataclass
38
+ class Rules:
39
+ """Normalized .memoryrc configuration."""
40
+
41
+ layers: Dict[str, str] = field(default_factory=dict)
42
+ triggers: Dict[str, str] = field(default_factory=dict)
43
+ forget_never: List[str] = field(default_factory=list)
44
+ forget_after_days: Dict[str, int] = field(default_factory=dict)
45
+ confidence: Dict[str, float] = field(default_factory=dict)
46
+
47
+ def get_ttl_days(self, layer: str) -> Optional[int]:
48
+ return self.forget_after_days.get(layer)
49
+
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Default rules (used when .memoryrc does not exist)
53
+ # ---------------------------------------------------------------------------
54
+
55
+ DEFAULT_RULES = Rules(
56
+ layers={
57
+ "project": "architecture decisions, tech stack, ADRs, why we chose X",
58
+ "session": "current task, WIP, what was done this session",
59
+ "user": "preferences, working style, patterns, personal rules",
60
+ },
61
+ triggers={
62
+ "on_commit": "consolidate session -> project",
63
+ "on_session_end": "summarize session -> user",
64
+ 'on_file "docs/ADR-*.md"': "extract -> project",
65
+ 'on_file "*.md"': "extract -> project if contains decision",
66
+ },
67
+ forget_never=["passwords", "keys", "PII", "secrets", "api_key",
68
+ "token", "secret", "password"],
69
+ forget_after_days={"session": 7, "user": 90},
70
+ confidence={"explicit": 1.0, "inferred": 0.7, "mentioned": 0.5},
71
+ )
72
+
73
+
74
+ # ---------------------------------------------------------------------------
75
+ # Rule engine
76
+ # ---------------------------------------------------------------------------
77
+
78
+ class RuleEngine:
79
+ """Parse .memoryrc (TOML), validate, and execute rules.
80
+
81
+ Supports hot-reload via watchdog (optional dependency).
82
+ """
83
+
84
+ def __init__(self, rc_path: str = ".memoryrc"):
85
+ self.rc_path = Path(rc_path)
86
+ self.rules: Rules = copy.deepcopy(DEFAULT_RULES)
87
+ self._watching = False
88
+
89
+ # --- Loading ---
90
+
91
+ def load(self) -> Rules:
92
+ """Parse .memoryrc TOML. Return Rules (falls back to defaults)."""
93
+ if not self.rc_path.exists():
94
+ self.rules = copy.deepcopy(DEFAULT_RULES)
95
+ return self.rules
96
+
97
+ if tomllib is None:
98
+ raise RuntimeError(
99
+ "TOML parsing requires Python 3.11+ or 'tomli' package. "
100
+ "Install: pip install tomli"
101
+ )
102
+
103
+ try:
104
+ with open(self.rc_path, "rb") as f:
105
+ data = tomllib.load(f)
106
+ except Exception as exc:
107
+ raise ValueError(f"Failed to parse {self.rc_path}: {exc}") from exc
108
+
109
+ rules = copy.deepcopy(DEFAULT_RULES)
110
+
111
+ # [layers]
112
+ if "layers" in data:
113
+ for layer_name, desc in data["layers"].items():
114
+ rules.layers[layer_name] = desc
115
+
116
+ # [triggers] — handle both formats:
117
+ # compact: 'on_file "*.md"' = "extract -> project"
118
+ # flat: on_file = "*.md -> extract -> project"
119
+ if "triggers" in data:
120
+ raw_triggers: Dict[str, Any] = {}
121
+ for key, val in data["triggers"].items():
122
+ if isinstance(val, str):
123
+ raw_triggers[key] = val
124
+ elif isinstance(val, dict):
125
+ for sub_key, sub_val in val.items():
126
+ raw_triggers[f'{key} "{sub_key}"'] = sub_val
127
+ rules.triggers.update(raw_triggers)
128
+
129
+ # [forget]
130
+ if "forget" in data:
131
+ forget = data["forget"]
132
+ if "never" in forget:
133
+ rules.forget_never = forget["never"]
134
+ if "after_days" in forget:
135
+ rules.forget_after_days = dict(forget["after_days"])
136
+
137
+ # [extract]
138
+ if "extract" in data:
139
+ extract = data["extract"]
140
+ if "confidence" in extract:
141
+ rules.confidence = {k: float(v)
142
+ for k, v in extract["confidence"].items()}
143
+
144
+ self.rules = rules
145
+ return rules
146
+
147
+ def reload(self) -> Rules:
148
+ """Reload rules from disk."""
149
+ return self.load()
150
+
151
+ # --- Hot reload ---
152
+
153
+ def watch(self) -> None:
154
+ """Start watchdog to auto-reload .memoryrc on change."""
155
+ if self._watching:
156
+ return
157
+ try:
158
+ from watchdog.observers import Observer
159
+ from watchdog.events import FileSystemEventHandler, FileModifiedEvent
160
+ except ImportError:
161
+ return # watchdog not installed — skip silently
162
+
163
+ class _Handler(FileSystemEventHandler):
164
+ def __init__(self, engine: RuleEngine):
165
+ self.engine = engine
166
+
167
+ def on_modified(self, event) -> None:
168
+ if isinstance(event, FileModifiedEvent):
169
+ p = Path(event.src_path)
170
+ if p.name == self.engine.rc_path.name:
171
+ self.engine.reload()
172
+
173
+ self._handler = _Handler(self)
174
+ self._observer = Observer()
175
+ watch_dir = self.rc_path.parent if self.rc_path.exists() else Path.cwd()
176
+ self._observer.schedule(self._handler, str(watch_dir), recursive=False)
177
+ self._observer.start()
178
+ self._watching = True
179
+
180
+ def stop_watch(self) -> None:
181
+ if self._observer:
182
+ self._observer.stop()
183
+ self._observer.join()
184
+ self._watching = False
185
+
186
+ # --- Trigger execution ---
187
+
188
+ def fire_trigger(self, event: str, context: dict, store) -> List[str]:
189
+ """Execute matching trigger rule. Return affected memory IDs.
190
+
191
+ Parse actions like 'consolidate session -> project'.
192
+ """
193
+ matched_ids: List[str] = []
194
+
195
+ for pattern, action in self.rules.triggers.items():
196
+ # Simple substring match for event name
197
+ if event.lower() in pattern.lower():
198
+ parsed = self._parse_action(action)
199
+ if parsed:
200
+ ids = self._execute_action(parsed, context, store)
201
+ matched_ids.extend(ids)
202
+ # Log trigger execution
203
+ store.log_trigger(event, action, ids)
204
+
205
+ return matched_ids
206
+
207
+ def _parse_action(self, action: str) -> Optional[Dict[str, str]]:
208
+ """Parse action string into structured dict."""
209
+ action = action.strip()
210
+
211
+ # consolidate X -> Y
212
+ m = re.match(r"consolidate\s+(\w+)\s*[-]+>\s*(\w+)", action, re.I)
213
+ if m:
214
+ return {"verb": "consolidate", "from": m.group(1), "to": m.group(2)}
215
+
216
+ # summarize X -> Y
217
+ m = re.match(r"summarize\s+(\w+)\s*[-]+>\s*(\w+)", action, re.I)
218
+ if m:
219
+ return {"verb": "summarize", "from": m.group(1), "to": m.group(2)}
220
+
221
+ # extract -> layer
222
+ m = re.match(r"extract\s*[-]+>\s*(\w+)", action, re.I)
223
+ if m:
224
+ return {"verb": "extract", "to": m.group(1)}
225
+
226
+ m = re.match(r"extract\s*[-]+>\s*(\w+)\s+if\s+(.+)", action, re.I)
227
+ if m:
228
+ return {"verb": "extract", "to": m.group(1), "condition": m.group(2)}
229
+
230
+ return {"verb": "unknown", "raw": action}
231
+
232
+ def _execute_action(self, parsed: dict, context: dict, store) -> List[str]:
233
+ verb = parsed.get("verb", "")
234
+ if verb == "consolidate":
235
+ return store.consolidate(parsed["from"], parsed["to"])
236
+ elif verb == "summarize":
237
+ # For now: consolidate + mark as summarized
238
+ return store.consolidate(parsed["from"], parsed["to"])
239
+ elif verb == "extract":
240
+ # Extract is handled by extractor module
241
+ return []
242
+ return []
243
+
244
+ # --- Forget rules ---
245
+
246
+ def should_forget(self, memory, rules: Optional[Rules] = None) -> bool:
247
+ """Check if a memory should be forgotten based on rules."""
248
+ r = rules or self.rules
249
+
250
+ # Never forget items matching forget.never
251
+ content_lower = memory.content.lower()
252
+ for pattern in r.forget_never:
253
+ if pattern.lower() in content_lower:
254
+ return False
255
+
256
+ # Check TTL
257
+ ttl = r.get_ttl_days(memory.layer)
258
+ if ttl is None:
259
+ return False
260
+ if memory.expires_at is None:
261
+ return False
262
+
263
+ return datetime.now() > memory.expires_at
264
+
265
+ # --- Extraction helpers ---
266
+
267
+ def extract_memories(self, text: str, layer: str, rules: Rules) -> List[dict]:
268
+ """Baseline text extraction using heuristics.
269
+
270
+ Returns list of dicts with content, confidence, source, tags.
271
+ For full LLM extraction, use MemoryExtractor.
272
+ """
273
+ results = []
274
+ lines = text.split("\n")
275
+ for line in lines:
276
+ line = line.strip()
277
+ if len(line) < 10:
278
+ continue
279
+
280
+ confidence = self._heuristic_confidence(line, rules)
281
+ if confidence >= 0.5:
282
+ results.append({
283
+ "content": line,
284
+ "confidence": confidence,
285
+ "source": "heuristic",
286
+ "tags": [layer, "auto-extracted"],
287
+ })
288
+
289
+ return results
290
+
291
+ def _heuristic_confidence(self, line: str, rules: Rules) -> float:
292
+ """Score a line's confidence based on explicit indicators."""
293
+ explicit_markers = [
294
+ r"we\s+(use|use[d]|chose|decided|migrated|switched|implemented)",
295
+ r"adr[-\s]?\d+",
296
+ r"decided\s+to\s+",
297
+ r"architecture\s+decision",
298
+ r"tech\s+stack",
299
+ ]
300
+ for pattern in explicit_markers:
301
+ if re.search(pattern, line, re.I):
302
+ return rules.confidence.get("explicit", 1.0)
303
+
304
+ inferred_markers = [
305
+ r"import\s+\w+",
306
+ r"from\s+\w+\s+import",
307
+ r"uses?\s+\w+",
308
+ r"built\s+(with|on)\s+",
309
+ ]
310
+ for pattern in inferred_markers:
311
+ if re.search(pattern, line, re.I):
312
+ return rules.confidence.get("inferred", 0.7)
313
+
314
+ # Check for mention patterns
315
+ mentioned_markers = [
316
+ r"(consider|considering|evaluating|looking at|might|maybe)",
317
+ r"(suggested|proposed|idea)",
318
+ ]
319
+ for pattern in mentioned_markers:
320
+ if re.search(pattern, line, re.I):
321
+ return rules.confidence.get("mentioned", 0.5)
322
+
323
+ return 0.0
324
+
325
+ def compute_expiry(self, layer: str) -> Optional[datetime]:
326
+ """Compute expiry datetime for a given layer."""
327
+ days = self.rules.get_ttl_days(layer)
328
+ if days is None:
329
+ return None
330
+ return datetime.now() + timedelta(days=days)