superlocalmemory 3.2.3 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. package/CHANGELOG.md +43 -1
  2. package/README.md +106 -71
  3. package/package.json +1 -2
  4. package/pyproject.toml +16 -1
  5. package/src/superlocalmemory/cli/commands.py +419 -15
  6. package/src/superlocalmemory/cli/main.py +44 -0
  7. package/src/superlocalmemory/core/config.py +276 -4
  8. package/src/superlocalmemory/core/consolidation_engine.py +37 -0
  9. package/src/superlocalmemory/core/engine.py +21 -0
  10. package/src/superlocalmemory/core/engine_wiring.py +58 -8
  11. package/src/superlocalmemory/dynamics/activation_guided_quantization.py +374 -0
  12. package/src/superlocalmemory/dynamics/eap_scheduler.py +276 -0
  13. package/src/superlocalmemory/dynamics/ebbinghaus_langevin_coupling.py +171 -0
  14. package/src/superlocalmemory/encoding/cognitive_consolidator.py +804 -0
  15. package/src/superlocalmemory/hooks/auto_invoker.py +46 -8
  16. package/src/superlocalmemory/hooks/auto_parameterize.py +147 -0
  17. package/src/superlocalmemory/infra/heartbeat_monitor.py +140 -0
  18. package/src/superlocalmemory/infra/pid_manager.py +193 -0
  19. package/src/superlocalmemory/infra/process_reaper.py +572 -0
  20. package/src/superlocalmemory/learning/consolidation_quantization_worker.py +115 -0
  21. package/src/superlocalmemory/learning/forgetting_scheduler.py +263 -0
  22. package/src/superlocalmemory/learning/quantization_scheduler.py +320 -0
  23. package/src/superlocalmemory/math/ebbinghaus.py +309 -0
  24. package/src/superlocalmemory/math/fisher_quantized.py +251 -0
  25. package/src/superlocalmemory/math/hopfield.py +279 -0
  26. package/src/superlocalmemory/math/polar_quant.py +379 -0
  27. package/src/superlocalmemory/math/qjl.py +115 -0
  28. package/src/superlocalmemory/mcp/server.py +2 -0
  29. package/src/superlocalmemory/mcp/tools_v3.py +10 -0
  30. package/src/superlocalmemory/mcp/tools_v33.py +351 -0
  31. package/src/superlocalmemory/parameterization/__init__.py +47 -0
  32. package/src/superlocalmemory/parameterization/pattern_extractor.py +534 -0
  33. package/src/superlocalmemory/parameterization/pii_filter.py +106 -0
  34. package/src/superlocalmemory/parameterization/prompt_injector.py +216 -0
  35. package/src/superlocalmemory/parameterization/prompt_lifecycle.py +275 -0
  36. package/src/superlocalmemory/parameterization/soft_prompt_generator.py +425 -0
  37. package/src/superlocalmemory/retrieval/engine.py +21 -3
  38. package/src/superlocalmemory/retrieval/forgetting_filter.py +145 -0
  39. package/src/superlocalmemory/retrieval/hopfield_channel.py +335 -0
  40. package/src/superlocalmemory/retrieval/quantization_aware_search.py +133 -0
  41. package/src/superlocalmemory/retrieval/strategy.py +16 -6
  42. package/src/superlocalmemory/server/routes/agents.py +68 -8
  43. package/src/superlocalmemory/server/routes/learning.py +18 -1
  44. package/src/superlocalmemory/server/routes/lifecycle.py +36 -17
  45. package/src/superlocalmemory/server/routes/v3_api.py +503 -1
  46. package/src/superlocalmemory/storage/database.py +206 -0
  47. package/src/superlocalmemory/storage/embedding_migrator.py +178 -0
  48. package/src/superlocalmemory/storage/migration_v33.py +140 -0
  49. package/src/superlocalmemory/storage/quantized_store.py +261 -0
  50. package/src/superlocalmemory/storage/schema_v32.py +137 -0
  51. package/conftest.py +0 -5
@@ -0,0 +1,534 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3.3
4
+
5
+ """PatternExtractor — Mine patterns from 4 sources for soft prompt generation.
6
+
7
+ Sources: core memory blocks, behavioral patterns, cross-project preferences,
8
+ workflow sequences. Includes deduplication (independence assumption) and
9
+ contradiction resolution (temporal ordering + close-confidence alternate marking).
10
+
11
+ [AUDIT FIX F-1] Sheaf removed — heuristic key-value comparison for contradictions.
12
+ [AUDIT FIX F-4] source_ids as tuple for true immutability.
13
+
14
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import logging
21
+ from dataclasses import dataclass
22
+ from datetime import datetime, timezone
23
+ from enum import Enum
24
+ from typing import TYPE_CHECKING
25
+
26
+ if TYPE_CHECKING:
27
+ from superlocalmemory.storage.database import DatabaseManager
28
+ from superlocalmemory.learning.behavioral import BehavioralPatternStore
29
+ from superlocalmemory.learning.cross_project import CrossProjectAggregator
30
+ from superlocalmemory.learning.workflows import WorkflowMiner
31
+ from superlocalmemory.core.config import ParameterizationConfig
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Data models
37
+ # ---------------------------------------------------------------------------
38
+
39
+ # Keyword sets for classification heuristics
40
+ _IDENTITY_KEYWORDS = frozenset({
41
+ "role", "title", "senior", "architect", "engineer", "manager",
42
+ "developer", "designer", "expertise", "lead", "director", "analyst",
43
+ })
44
+ _TECH_KEYWORDS = frozenset({
45
+ "typescript", "python", "react", "vue", "angular", "node", "java",
46
+ "rust", "go", "framework", "library", "language", "database",
47
+ "postgres", "mongodb", "redis", "aws", "azure", "gcp", "docker",
48
+ "kubernetes", "tool", "sdk", "api",
49
+ })
50
+ _STYLE_KEYWORDS = frozenset({
51
+ "prefer", "style", "tone", "format", "concise", "verbose",
52
+ "detailed", "brief", "formal", "casual",
53
+ })
54
+ _AVOIDANCE_KEYWORDS = frozenset({
55
+ "avoid", "never", "don't", "dont", "stop", "hate", "dislike",
56
+ "not use", "refuse",
57
+ })
58
+ _DECISION_KEYWORDS = frozenset({
59
+ "decided", "chose", "selected", "picked", "switched", "migrated",
60
+ "adopted", "dropped",
61
+ })
62
+
63
+ # Behavioral pattern type -> PatternCategory mapping
64
+ _BEHAVIORAL_TYPE_MAP: dict[str, str] = {
65
+ "entity_pref": "tech_preference",
66
+ "query_type": "workflow_pattern",
67
+ "time_of_day": "workflow_pattern",
68
+ "refinement": "communication_style",
69
+ "interest": "tech_preference",
70
+ "archival": "avoidance",
71
+ }
72
+
73
+ # Cross-project key -> PatternCategory mapping
74
+ _CROSS_PROJECT_KEY_MAP: dict[str, str] = {
75
+ "frontend_framework": "tech_preference",
76
+ "backend_framework": "tech_preference",
77
+ "language": "tech_preference",
78
+ "database": "tech_preference",
79
+ "cloud": "tech_preference",
80
+ "testing": "tech_preference",
81
+ }
82
+
83
+
84
+ class PatternCategory(str, Enum):
85
+ """Categories for extracted pattern assertions."""
86
+
87
+ IDENTITY = "identity"
88
+ TECH_PREFERENCE = "tech_preference"
89
+ COMMUNICATION_STYLE = "communication_style"
90
+ WORKFLOW_PATTERN = "workflow_pattern"
91
+ PROJECT_CONTEXT = "project_context"
92
+ DECISION_HISTORY = "decision_history"
93
+ AVOIDANCE = "avoidance"
94
+ CUSTOM = "custom"
95
+
96
+
97
+ @dataclass(frozen=True)
98
+ class PatternAssertion:
99
+ """Single extracted pattern assertion with provenance."""
100
+
101
+ category: PatternCategory
102
+ key: str
103
+ value: str
104
+ confidence: float
105
+ evidence_count: int
106
+ source: str # "core_memory" | "behavioral" | "cross_project" | "workflow"
107
+ source_ids: tuple[str, ...] = ()
108
+ cross_project_validated: bool = False
109
+ created_at: str = ""
110
+
111
+
112
+ # ---------------------------------------------------------------------------
113
+ # PatternExtractor class
114
+ # ---------------------------------------------------------------------------
115
+
116
+ class PatternExtractor:
117
+ """Extract patterns from 4 SLM sources for soft prompt generation.
118
+
119
+ Sources:
120
+ 1. Core Memory blocks (user_profile, behavioral_patterns, learned_preferences)
121
+ 2. Behavioral pattern store (entity_pref, query_type, etc.)
122
+ 3. Cross-project aggregator (transferable preferences)
123
+ 4. Workflow miner (action sequences)
124
+ """
125
+
126
+ def __init__(
127
+ self,
128
+ db: DatabaseManager,
129
+ behavioral_store: BehavioralPatternStore,
130
+ cross_project: CrossProjectAggregator,
131
+ workflow_miner: WorkflowMiner,
132
+ config: ParameterizationConfig,
133
+ ) -> None:
134
+ if not (0.3 <= config.min_confidence <= 1.0):
135
+ raise ValueError(
136
+ f"min_confidence must be in [0.3, 1.0], got {config.min_confidence}"
137
+ )
138
+ self._db = db
139
+ self._behavioral_store = behavioral_store
140
+ self._cross_project = cross_project
141
+ self._workflow_miner = workflow_miner
142
+ self._config = config
143
+
144
+ # ------------------------------------------------------------------
145
+ # Public API
146
+ # ------------------------------------------------------------------
147
+
148
+ def extract(self, profile_id: str) -> list[PatternAssertion]:
149
+ """Master extraction pipeline across all 4 sources.
150
+
151
+ Args:
152
+ profile_id: Profile to extract patterns for.
153
+
154
+ Returns:
155
+ Deduplicated, contradiction-resolved list sorted by confidence DESC.
156
+ """
157
+ all_patterns: list[PatternAssertion] = []
158
+ all_patterns.extend(self._extract_from_core_memory(profile_id))
159
+ all_patterns.extend(self._extract_from_behavioral(profile_id))
160
+ all_patterns.extend(self._extract_from_cross_project())
161
+ all_patterns.extend(self._extract_from_workflows(profile_id))
162
+
163
+ if not all_patterns:
164
+ return []
165
+
166
+ deduped = self._deduplicate(all_patterns)
167
+ resolved = self._check_contradictions(deduped, profile_id)
168
+ resolved.sort(key=lambda p: p.confidence, reverse=True)
169
+ return resolved
170
+
171
+ # ------------------------------------------------------------------
172
+ # Source extractors
173
+ # ------------------------------------------------------------------
174
+
175
+ def _extract_from_core_memory(
176
+ self, profile_id: str,
177
+ ) -> list[PatternAssertion]:
178
+ """Extract patterns from core memory blocks."""
179
+ rows = self._db.execute(
180
+ "SELECT block_id, block_type, content, source_fact_ids "
181
+ "FROM core_memory_blocks "
182
+ "WHERE profile_id = ? AND block_type IN "
183
+ "('user_profile', 'behavioral_patterns', 'learned_preferences')",
184
+ (profile_id,),
185
+ )
186
+ patterns: list[PatternAssertion] = []
187
+ for row in rows:
188
+ block_id = row["block_id"]
189
+ content = row["content"]
190
+ raw_ids = row["source_fact_ids"]
191
+ try:
192
+ fact_ids = json.loads(raw_ids) if raw_ids else []
193
+ except (json.JSONDecodeError, TypeError):
194
+ fact_ids = []
195
+
196
+ evidence = len(fact_ids)
197
+ assertions = self._split_assertions(content)
198
+ for text in assertions:
199
+ category = self._classify_text(text)
200
+ confidence = min(evidence / 10.0, 1.0)
201
+ if confidence < self._config.min_confidence:
202
+ continue
203
+ key = self._extract_key(text)
204
+ value = text.strip()[:200]
205
+ patterns.append(PatternAssertion(
206
+ category=category,
207
+ key=key,
208
+ value=value,
209
+ confidence=confidence,
210
+ evidence_count=evidence,
211
+ source="core_memory",
212
+ source_ids=(block_id,),
213
+ created_at=datetime.now(timezone.utc).isoformat(),
214
+ ))
215
+ return patterns
216
+
217
+ def _extract_from_behavioral(
218
+ self, profile_id: str,
219
+ ) -> list[PatternAssertion]:
220
+ """Extract patterns from behavioral pattern store."""
221
+ raw_patterns = self._behavioral_store.get_patterns(
222
+ profile_id, min_confidence=self._config.min_confidence,
223
+ )
224
+ patterns: list[PatternAssertion] = []
225
+ for bp in raw_patterns:
226
+ if isinstance(bp, dict):
227
+ ev_count = bp.get("evidence_count", 0)
228
+ if ev_count < self._config.min_evidence:
229
+ continue
230
+ p_type = bp.get("pattern_type", "")
231
+ cat_str = _BEHAVIORAL_TYPE_MAP.get(p_type, "custom")
232
+ category = PatternCategory(cat_str)
233
+ p_key = bp.get("pattern_key", "")
234
+ p_value = bp.get("pattern_value", p_key)
235
+ conf = bp.get("confidence", 0.0)
236
+ p_id = str(bp.get("pattern_id", ""))
237
+ patterns.append(PatternAssertion(
238
+ category=category,
239
+ key=p_key,
240
+ value=p_value,
241
+ confidence=conf,
242
+ evidence_count=ev_count,
243
+ source="behavioral",
244
+ source_ids=(p_id,),
245
+ created_at=datetime.now(timezone.utc).isoformat(),
246
+ ))
247
+ else:
248
+ # BehavioralPattern object
249
+ ev_count = getattr(bp, "evidence_count", 0)
250
+ if ev_count < self._config.min_evidence:
251
+ continue
252
+ p_type = getattr(bp, "pattern_type", "")
253
+ cat_str = _BEHAVIORAL_TYPE_MAP.get(p_type, "custom")
254
+ category = PatternCategory(cat_str)
255
+ p_key = getattr(bp, "pattern_key", "")
256
+ p_value = getattr(bp, "pattern_value", p_key)
257
+ conf = getattr(bp, "confidence", 0.0)
258
+ p_id = str(getattr(bp, "pattern_id", ""))
259
+ patterns.append(PatternAssertion(
260
+ category=category,
261
+ key=p_key,
262
+ value=p_value,
263
+ confidence=conf,
264
+ evidence_count=ev_count,
265
+ source="behavioral",
266
+ source_ids=(p_id,),
267
+ created_at=datetime.now(timezone.utc).isoformat(),
268
+ ))
269
+ return patterns
270
+
271
+ def _extract_from_cross_project(self) -> list[PatternAssertion]:
272
+ """Extract patterns from cross-project aggregator."""
273
+ preferences = self._cross_project.get_preferences(
274
+ min_confidence=self._config.min_confidence,
275
+ )
276
+ patterns: list[PatternAssertion] = []
277
+ for key, data in preferences.items():
278
+ cat_str = _CROSS_PROJECT_KEY_MAP.get(key, "custom")
279
+ category = PatternCategory(cat_str)
280
+ raw_conf = data.get("confidence", 0.0)
281
+ boosted = min(1.0, raw_conf * self._config.cross_project_boost)
282
+ ev_count = data.get("evidence_count", 0)
283
+ patterns.append(PatternAssertion(
284
+ category=category,
285
+ key=key,
286
+ value=data.get("value", ""),
287
+ confidence=boosted,
288
+ evidence_count=ev_count,
289
+ source="cross_project",
290
+ source_ids=(),
291
+ cross_project_validated=True,
292
+ created_at=datetime.now(timezone.utc).isoformat(),
293
+ ))
294
+ return patterns
295
+
296
+ def _extract_from_workflows(
297
+ self, profile_id: str,
298
+ ) -> list[PatternAssertion]:
299
+ """Extract patterns from workflow miner."""
300
+ raw = self._workflow_miner.mine(profile_id, min_support=0.3)
301
+ patterns: list[PatternAssertion] = []
302
+ for wp in raw:
303
+ count = wp.get("count", 0)
304
+ if count < self._config.min_evidence:
305
+ continue
306
+ sequence = wp.get("sequence", [])
307
+ value = " -> ".join(sequence)
308
+ confidence = wp.get("support", 0.0)
309
+ if confidence < self._config.min_confidence:
310
+ continue
311
+ key = f"workflow_{len(sequence)}gram"
312
+ patterns.append(PatternAssertion(
313
+ category=PatternCategory.WORKFLOW_PATTERN,
314
+ key=key,
315
+ value=value,
316
+ confidence=confidence,
317
+ evidence_count=count,
318
+ source="workflow",
319
+ created_at=datetime.now(timezone.utc).isoformat(),
320
+ ))
321
+ return patterns
322
+
323
+ # ------------------------------------------------------------------
324
+ # Deduplication & contradiction resolution
325
+ # ------------------------------------------------------------------
326
+
327
+ def _deduplicate(
328
+ self, patterns: list[PatternAssertion],
329
+ ) -> list[PatternAssertion]:
330
+ """Merge patterns sharing (category, key) using independence assumption.
331
+
332
+ Merged confidence: c_merged = 1 - product(1 - c_i)
333
+ """
334
+ groups: dict[tuple[str, str], list[PatternAssertion]] = {}
335
+ for p in patterns:
336
+ gkey = (p.category.value, p.key)
337
+ groups.setdefault(gkey, []).append(p)
338
+
339
+ result: list[PatternAssertion] = []
340
+ for group in groups.values():
341
+ if len(group) == 1:
342
+ result.append(group[0])
343
+ continue
344
+
345
+ # Independence-assumption merge
346
+ c_complement = 1.0
347
+ for p in group:
348
+ c_complement *= (1.0 - p.confidence)
349
+ merged_confidence = 1.0 - c_complement
350
+
351
+ total_evidence = sum(p.evidence_count for p in group)
352
+ # Value from highest-confidence pattern
353
+ best = max(group, key=lambda p: p.confidence)
354
+ # Collect all source_ids
355
+ all_ids: list[str] = []
356
+ for p in group:
357
+ all_ids.extend(p.source_ids)
358
+ cross_validated = any(p.cross_project_validated for p in group)
359
+ # Use latest created_at
360
+ timestamps = [p.created_at for p in group if p.created_at]
361
+ latest_ts = max(timestamps) if timestamps else ""
362
+
363
+ result.append(PatternAssertion(
364
+ category=best.category,
365
+ key=best.key,
366
+ value=best.value,
367
+ confidence=merged_confidence,
368
+ evidence_count=total_evidence,
369
+ source=best.source,
370
+ source_ids=tuple(all_ids),
371
+ cross_project_validated=cross_validated,
372
+ created_at=latest_ts,
373
+ ))
374
+ return result
375
+
376
+ def _check_contradictions(
377
+ self,
378
+ patterns: list[PatternAssertion],
379
+ profile_id: str,
380
+ ) -> list[PatternAssertion]:
381
+ """Resolve contradictions within same category+key.
382
+
383
+ Resolution: temporal ordering (newer wins). For tech_preference with
384
+ close confidence (both >= 0.8, diff <= 0.1), keep both with _alternate.
385
+ """
386
+ # Group by category
387
+ cat_groups: dict[str, list[PatternAssertion]] = {}
388
+ for p in patterns:
389
+ cat_groups.setdefault(p.category.value, []).append(p)
390
+
391
+ resolved: list[PatternAssertion] = []
392
+ for cat_value, group in cat_groups.items():
393
+ # Group by key within category
394
+ key_groups: dict[str, list[PatternAssertion]] = {}
395
+ for p in group:
396
+ key_groups.setdefault(p.key, []).append(p)
397
+
398
+ for key, key_patterns in key_groups.items():
399
+ if len(key_patterns) == 1:
400
+ resolved.append(key_patterns[0])
401
+ continue
402
+
403
+ # Check pairwise for contradictions
404
+ surviving = list(key_patterns)
405
+ to_remove: set[int] = set()
406
+
407
+ for i in range(len(surviving)):
408
+ for j in range(i + 1, len(surviving)):
409
+ if i in to_remove or j in to_remove:
410
+ continue
411
+ p_a = surviving[i]
412
+ p_b = surviving[j]
413
+
414
+ if p_a.value == p_b.value:
415
+ continue # Same value = not a contradiction
416
+
417
+ # Check close confidence for tech_preference
418
+ if (
419
+ cat_value == "tech_preference"
420
+ and p_a.confidence >= 0.8
421
+ and p_b.confidence >= 0.8
422
+ and abs(p_a.confidence - p_b.confidence) <= 0.1
423
+ ):
424
+ # Keep both — mark lower as alternate
425
+ if p_a.confidence >= p_b.confidence:
426
+ surviving[j] = PatternAssertion(
427
+ category=p_b.category,
428
+ key=f"{p_b.key}_alternate",
429
+ value=p_b.value,
430
+ confidence=p_b.confidence,
431
+ evidence_count=p_b.evidence_count,
432
+ source=p_b.source,
433
+ source_ids=p_b.source_ids,
434
+ cross_project_validated=p_b.cross_project_validated,
435
+ created_at=p_b.created_at,
436
+ )
437
+ else:
438
+ surviving[i] = PatternAssertion(
439
+ category=p_a.category,
440
+ key=f"{p_a.key}_alternate",
441
+ value=p_a.value,
442
+ confidence=p_a.confidence,
443
+ evidence_count=p_a.evidence_count,
444
+ source=p_a.source,
445
+ source_ids=p_a.source_ids,
446
+ cross_project_validated=p_a.cross_project_validated,
447
+ created_at=p_a.created_at,
448
+ )
449
+ logger.warning(
450
+ "Close confidence conflict in %s: '%s' vs '%s' "
451
+ "for key '%s'. Both kept as alternate.",
452
+ cat_value, p_a.value, p_b.value, key,
453
+ )
454
+ continue
455
+
456
+ # Temporal resolution
457
+ if p_a.created_at and p_b.created_at:
458
+ if p_a.created_at > p_b.created_at:
459
+ to_remove.add(j)
460
+ resolution = "temporal (newer wins)"
461
+ elif p_b.created_at > p_a.created_at:
462
+ to_remove.add(i)
463
+ resolution = "temporal (newer wins)"
464
+ else:
465
+ # Same timestamp — keep higher confidence
466
+ if p_a.confidence >= p_b.confidence:
467
+ to_remove.add(j)
468
+ else:
469
+ to_remove.add(i)
470
+ resolution = "confidence (higher wins)"
471
+ else:
472
+ # No timestamps — keep higher confidence
473
+ if p_a.confidence >= p_b.confidence:
474
+ to_remove.add(j)
475
+ else:
476
+ to_remove.add(i)
477
+ resolution = "confidence (higher wins)"
478
+
479
+ logger.warning(
480
+ "Contradiction in %s: '%s' vs '%s' for key '%s'. "
481
+ "Resolved by %s.",
482
+ cat_value, p_a.value, p_b.value, key, resolution,
483
+ )
484
+
485
+ for idx, p in enumerate(surviving):
486
+ if idx not in to_remove:
487
+ resolved.append(p)
488
+
489
+ return resolved
490
+
491
+ # ------------------------------------------------------------------
492
+ # Helpers
493
+ # ------------------------------------------------------------------
494
+
495
+ @staticmethod
496
+ def _split_assertions(content: str) -> list[str]:
497
+ """Split block content into atomic assertions."""
498
+ import re
499
+ parts = re.split(r"\n[-*\.]\s+", content)
500
+ result = []
501
+ for part in parts:
502
+ stripped = part.strip()
503
+ if stripped:
504
+ # Remove leading bullet markers from first element
505
+ stripped = re.sub(r"^[-*\.]\s*", "", stripped)
506
+ if stripped:
507
+ result.append(stripped)
508
+ return result
509
+
510
+ @staticmethod
511
+ def _classify_text(text: str) -> PatternCategory:
512
+ """Classify assertion text into a PatternCategory using keywords."""
513
+ lower = text.lower()
514
+ words = set(lower.split())
515
+
516
+ if words & _AVOIDANCE_KEYWORDS:
517
+ return PatternCategory.AVOIDANCE
518
+ if words & _DECISION_KEYWORDS:
519
+ return PatternCategory.DECISION_HISTORY
520
+ if words & _IDENTITY_KEYWORDS:
521
+ return PatternCategory.IDENTITY
522
+ if words & _TECH_KEYWORDS:
523
+ return PatternCategory.TECH_PREFERENCE
524
+ if words & _STYLE_KEYWORDS:
525
+ return PatternCategory.COMMUNICATION_STYLE
526
+ return PatternCategory.CUSTOM
527
+
528
+ @staticmethod
529
+ def _extract_key(text: str) -> str:
530
+ """Extract a key from assertion text (first significant phrase)."""
531
+ # Take first few words as key, normalized
532
+ words = text.strip().split()[:4]
533
+ key = "_".join(w.lower().strip(",.;:") for w in words if w)
534
+ return key or "unknown"
@@ -0,0 +1,106 @@
1
+ # Copyright (c) 2026 Varun Pratap Bhardwaj / Qualixar
2
+ # Licensed under the MIT License - see LICENSE file
3
+ # Part of SuperLocalMemory V3.3
4
+
5
+ """PII Filter — Stateless PII detection and redaction for soft prompts.
6
+
7
+ Ensures no personally identifiable information leaks into generated prompts.
8
+ Patterns cover: email, phone, SSN, credit card, IP address, API keys.
9
+
10
+ Part of Qualixar | Author: Varun Pratap Bhardwaj
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import re
16
+ import logging
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # PII regex patterns
22
+ # ---------------------------------------------------------------------------
23
+
24
+ # Order matters: longer/more-specific patterns first to prevent partial matches.
25
+ # Credit card (16 digits) must come before phone (7-12 digits).
26
+ # SSN (XXX-XX-XXXX) must come before phone to avoid partial match.
27
+ PII_PATTERNS: dict[str, re.Pattern] = {
28
+ "api_key": re.compile(
29
+ r"\b(?:sk-|pk-|api[_-]?key[_-]?)[A-Za-z0-9_-]{20,}\b", re.IGNORECASE
30
+ ),
31
+ "email": re.compile(
32
+ r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b"
33
+ ),
34
+ "credit_card": re.compile(
35
+ r"\b(?:\d{4}[-\s]?){3}\d{4}\b"
36
+ ),
37
+ "ssn": re.compile(
38
+ r"\b\d{3}-\d{2}-\d{4}\b"
39
+ ),
40
+ "ip_address": re.compile(
41
+ r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b"
42
+ ),
43
+ "phone": re.compile(
44
+ r"\b(?:\+?\d{1,3}[-.\s]?)?\(?\d{2,4}\)?[-.\s]?\d{3,4}[-.\s]?\d{3,4}\b"
45
+ ),
46
+ }
47
+
48
+
49
+ # ---------------------------------------------------------------------------
50
+ # PIIFilter class
51
+ # ---------------------------------------------------------------------------
52
+
53
+ class PIIFilter:
54
+ """Stateless PII detection and redaction.
55
+
56
+ Usage:
57
+ pii = PIIFilter()
58
+ clean = pii.filter_text("email user@test.com")
59
+ # -> "email [REDACTED:email]"
60
+ """
61
+
62
+ def __init__(self) -> None:
63
+ """Stateless — no initialization needed."""
64
+
65
+ def filter_text(self, text: str) -> str:
66
+ """Replace all PII matches with [REDACTED:type] placeholders.
67
+
68
+ Args:
69
+ text: Input text potentially containing PII.
70
+
71
+ Returns:
72
+ Text with all detected PII replaced by redaction markers.
73
+ """
74
+ result = text
75
+ for pii_type, pattern in PII_PATTERNS.items():
76
+ result = pattern.sub(f"[REDACTED:{pii_type}]", result)
77
+ return result
78
+
79
+ def has_pii(self, text: str) -> bool:
80
+ """Check whether text contains any PII.
81
+
82
+ Args:
83
+ text: Input text to scan.
84
+
85
+ Returns:
86
+ True if any PII pattern matches.
87
+ """
88
+ for pattern in PII_PATTERNS.values():
89
+ if pattern.search(text):
90
+ return True
91
+ return False
92
+
93
+ def detect_pii_types(self, text: str) -> list[str]:
94
+ """Detect which PII types are present in text.
95
+
96
+ Args:
97
+ text: Input text to scan.
98
+
99
+ Returns:
100
+ List of PII type names found (e.g., ["email", "phone"]).
101
+ """
102
+ found: list[str] = []
103
+ for pii_type, pattern in PII_PATTERNS.items():
104
+ if pattern.search(text):
105
+ found.append(pii_type)
106
+ return found