memlint 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
memlint/__init__.py ADDED
@@ -0,0 +1,19 @@
1
+ from memlint.core import StaleDetector
2
+ from memlint.models import (
3
+ MemoryFact,
4
+ StalenessResult,
5
+ DetectionReport,
6
+ FactCategory,
7
+ StalenessLevel,
8
+ )
9
+ from memlint.classifier import classify_fact_async
10
+
11
+ __all__ = [
12
+ "StaleDetector",
13
+ "MemoryFact",
14
+ "StalenessResult",
15
+ "DetectionReport",
16
+ "FactCategory",
17
+ "StalenessLevel",
18
+ "classify_fact_async",
19
+ ]
File without changes
@@ -0,0 +1,8 @@
1
+ from datetime import datetime
2
+ from dateutil import parser as dateutil_parser
3
+
4
+
5
+ def parse_dt(value: str | None) -> datetime | None:
6
+ if value is None:
7
+ return None
8
+ return dateutil_parser.parse(value).replace(tzinfo=None)
@@ -0,0 +1,28 @@
1
+ import json
2
+ from memlint.adapters._utils import parse_dt
3
+ from memlint.models import MemoryFact
4
+
5
+
6
+ def load_from_json(filepath: str) -> list[MemoryFact]:
7
+ with open(filepath, "r", encoding="utf-8") as f:
8
+ data = json.load(f)
9
+
10
+ if not isinstance(data, list):
11
+ raise ValueError(f"Expected a JSON array at root, got {type(data).__name__}")
12
+
13
+ facts = []
14
+ for i, entry in enumerate(data):
15
+ for required in ("id", "content", "created_at"):
16
+ if required not in entry:
17
+ raise ValueError(f"Entry {i} missing required field '{required}'")
18
+
19
+ facts.append(MemoryFact(
20
+ id=entry["id"],
21
+ content=entry["content"],
22
+ created_at=parse_dt(entry["created_at"]),
23
+ last_confirmed_at=parse_dt(entry.get("last_confirmed_at")),
24
+ confirmation_count=entry.get("confirmation_count", 0),
25
+ source=entry.get("source", "user"),
26
+ metadata=entry.get("metadata", {}),
27
+ ))
28
+ return facts
@@ -0,0 +1,25 @@
1
+ import json
2
+ from memlint.models import MemoryFact
3
+
4
+ try:
5
+ from langchain_core.tools import tool
6
+ LANGCHAIN_AVAILABLE = True
7
+ except ImportError:
8
+ LANGCHAIN_AVAILABLE = False
9
+
10
+ if LANGCHAIN_AVAILABLE:
11
+ from memlint.core import StaleDetector
12
+
13
+ @tool
14
+ def check_memory_staleness(fact_json: str) -> str:
15
+ """Check if a single memory fact is stale before injecting it into context."""
16
+ fact = MemoryFact.model_validate(json.loads(fact_json))
17
+ result = StaleDetector().check_one(fact)
18
+ return result.model_dump_json()
19
+
20
+ @tool
21
+ def filter_stale_memories(facts_json: str) -> str:
22
+ """Filter out stale and expired memory facts from a list, returning only safe-to-use facts."""
23
+ facts = [MemoryFact.model_validate(d) for d in json.loads(facts_json)]
24
+ safe = StaleDetector().filter_safe(facts)
25
+ return json.dumps([f.model_dump() for f in safe], default=str)
@@ -0,0 +1,28 @@
1
+ import json
2
+ from memlint.adapters._utils import parse_dt
3
+ from memlint.models import MemoryFact
4
+
5
+
6
+ def load_from_mem0(filepath: str) -> list[MemoryFact]:
7
+ with open(filepath, "r", encoding="utf-8") as f:
8
+ data = json.load(f)
9
+
10
+ if not isinstance(data, list):
11
+ raise ValueError(f"Expected a JSON array at root, got {type(data).__name__}")
12
+
13
+ facts = []
14
+ for i, entry in enumerate(data):
15
+ for required in ("id", "memory", "created_at"):
16
+ if required not in entry:
17
+ raise ValueError(f"Entry {i} missing required field '{required}'")
18
+
19
+ facts.append(MemoryFact(
20
+ id=entry["id"],
21
+ content=entry["memory"],
22
+ created_at=parse_dt(entry["created_at"]),
23
+ last_confirmed_at=parse_dt(entry.get("updated_at")),
24
+ confirmation_count=entry.get("confirmation_count", 0),
25
+ source=entry.get("source", "user"),
26
+ metadata=entry.get("metadata", {}),
27
+ ))
28
+ return facts
memlint/classifier.py ADDED
@@ -0,0 +1,155 @@
1
+ from memlint.models import FactCategory
2
+
3
+ CATEGORY_KEYWORDS: dict[FactCategory, list[str]] = {
4
+ FactCategory.LOCATION: [
5
+ "lives", "located", "based in", "address", "city", "country",
6
+ "office", "moved to", "residing", "hometown", "location",
7
+ ],
8
+ FactCategory.EMPLOYMENT: [
9
+ "works at", "employed", "job", "role", "position", "company",
10
+ "organization", "joined", "hired", "manager", "team", "department",
11
+ "title", "consultant", "engineer", "analyst", "intern",
12
+ ],
13
+ FactCategory.PROJECT: [
14
+ "project", "building", "repo", "codebase", "app", "tool",
15
+ "working on", "developing", "implementing", "stack", "framework",
16
+ "library", "version", "api", "endpoint", "deployed", "launched",
17
+ ],
18
+ FactCategory.PREFERENCE: [
19
+ "prefers", "likes", "favorite", "enjoys", "uses", "dislikes",
20
+ "wants", "chooses", "opts for", "theme", "mode", "setting",
21
+ "style", "approach",
22
+ ],
23
+ FactCategory.RELATIONSHIP: [
24
+ "friend", "colleague", "manager", "reports to", "partner",
25
+ "teammate", "mentor", "client", "collaborator", "family",
26
+ ],
27
+ FactCategory.IDENTITY: [
28
+ "name is", "called", "age", "born", "nationality", "speaks",
29
+ "gender", "education", "degree", "graduated", "alumni",
30
+ ],
31
+ FactCategory.EPISODIC: [
32
+ "today", "yesterday", "last week", "this morning", "just",
33
+ "recently", "earlier", "said that", "mentioned", "asked about",
34
+ "discussed", "fixed", "resolved", "debugging",
35
+ ],
36
+ FactCategory.SYSTEM_FACT: [
37
+ "python version", "node version", "npm", "pip", "docker",
38
+ "os", "operating system", "machine", "cpu", "ram", "disk",
39
+ "installed", "configured", "environment", "env", ".env",
40
+ ],
41
+ }
42
+
43
+ CLASSIFY_PROMPT = """You are classifying a memory fact into exactly one category.
44
+
45
+ Categories:
46
+ - location: where someone lives, works, or is based
47
+ - employment: job, company, role, title, team
48
+ - project: software projects, tools being built, tech stack
49
+ - preference: likes, dislikes, habits, settings
50
+ - relationship: people the user knows or works with
51
+ - identity: name, age, education, nationality, languages spoken
52
+ - episodic: time-specific events, recent actions, things that happened
53
+ - system_fact: software versions, OS, environment config
54
+ - unknown: does not fit any category
55
+
56
+ Memory fact: "{fact}"
57
+
58
+ Respond with ONLY the category name, nothing else. Example: "employment"
59
+ """
60
+
61
+
62
+ def _rule_based_classify(content: str) -> FactCategory:
63
+ lower = content.lower()
64
+ scores: dict[FactCategory, int] = {}
65
+ for category, keywords in CATEGORY_KEYWORDS.items():
66
+ hits = sum(1 for kw in keywords if kw in lower)
67
+ if hits > 0:
68
+ scores[category] = hits
69
+ if not scores:
70
+ return FactCategory.UNKNOWN
71
+ return max(scores, key=lambda c: scores[c])
72
+
73
+
74
+ def _llm_classify(content: str, llm_provider: str, model: str, llm=None) -> FactCategory:
75
+ if llm is None:
76
+ import os
77
+ from langchain_openai import ChatOpenAI
78
+ api_key = (
79
+ os.getenv("OPENAI_API_KEY") if llm_provider == "openai"
80
+ else os.getenv("ANTHROPIC_API_KEY")
81
+ )
82
+ if not api_key:
83
+ raise ValueError(f"No API key found for provider {llm_provider!r}")
84
+ llm = ChatOpenAI(model=model, temperature=0, api_key=api_key)
85
+
86
+ prompt = CLASSIFY_PROMPT.format(fact=content)
87
+ try:
88
+ from langchain_core.messages import HumanMessage
89
+ messages = [HumanMessage(content=prompt)]
90
+ except ImportError:
91
+ messages = [{"role": "user", "content": prompt}]
92
+
93
+ response = llm.invoke(messages)
94
+ raw = response.content.strip().lower()
95
+ try:
96
+ return FactCategory(raw)
97
+ except ValueError:
98
+ raise ValueError(f"LLM returned unrecognized category: {raw!r}")
99
+
100
+
101
+ def classify_fact(
102
+ content: str,
103
+ use_llm: bool = False,
104
+ llm_provider: str = "openai",
105
+ model: str = "gpt-4o-mini",
106
+ llm=None,
107
+ ) -> FactCategory:
108
+ if use_llm:
109
+ try:
110
+ return _llm_classify(content, llm_provider, model, llm=llm)
111
+ except Exception:
112
+ pass
113
+ return _rule_based_classify(content)
114
+
115
+
116
+ async def _async_llm_classify(content: str, llm_provider: str, model: str, llm=None) -> FactCategory:
117
+ if llm is None:
118
+ import os
119
+ from langchain_openai import ChatOpenAI
120
+ api_key = (
121
+ os.getenv("OPENAI_API_KEY") if llm_provider == "openai"
122
+ else os.getenv("ANTHROPIC_API_KEY")
123
+ )
124
+ if not api_key:
125
+ raise ValueError(f"No API key found for provider {llm_provider!r}")
126
+ llm = ChatOpenAI(model=model, temperature=0, api_key=api_key)
127
+
128
+ prompt = CLASSIFY_PROMPT.format(fact=content)
129
+ try:
130
+ from langchain_core.messages import HumanMessage
131
+ messages = [HumanMessage(content=prompt)]
132
+ except ImportError:
133
+ messages = [{"role": "user", "content": prompt}]
134
+
135
+ response = await llm.ainvoke(messages)
136
+ raw = response.content.strip().lower()
137
+ try:
138
+ return FactCategory(raw)
139
+ except ValueError:
140
+ raise ValueError(f"LLM returned unrecognized category: {raw!r}")
141
+
142
+
143
+ async def classify_fact_async(
144
+ content: str,
145
+ use_llm: bool = False,
146
+ llm_provider: str = "openai",
147
+ model: str = "gpt-4o-mini",
148
+ llm=None,
149
+ ) -> FactCategory:
150
+ if use_llm:
151
+ try:
152
+ return await _async_llm_classify(content, llm_provider, model, llm=llm)
153
+ except Exception:
154
+ pass
155
+ return _rule_based_classify(content)
memlint/cli.py ADDED
@@ -0,0 +1,79 @@
1
+ import sys
2
+
3
+ import click
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+ from rich import box
7
+ from rich.text import Text
8
+
9
+ from memlint.core import StaleDetector
10
+ from memlint.adapters.json_adapter import load_from_json
11
+ from memlint.adapters.mem0_adapter import load_from_mem0
12
+ from memlint.models import StalenessLevel
13
+
14
+ console = Console()
15
+
16
+ _LEVEL_STYLES = {
17
+ StalenessLevel.FRESH: "green",
18
+ StalenessLevel.AGING: "yellow",
19
+ StalenessLevel.STALE: "red",
20
+ StalenessLevel.EXPIRED: "bold red",
21
+ }
22
+
23
+
24
+ @click.group()
25
+ def main():
26
+ pass
27
+
28
+
29
+ @main.command()
30
+ @click.argument("file", type=click.Path(exists=True))
31
+ @click.option("--only-flagged", is_flag=True, help="Show only STALE and EXPIRED facts.")
32
+ @click.option("--json", "output_json", is_flag=True, help="Output raw JSON to stdout.")
33
+ @click.option("--format", "fmt", default="default",
34
+ type=click.Choice(["default", "mem0"]), help="Input format.")
35
+ def check(file: str, only_flagged: bool, output_json: bool, fmt: str):
36
+ """Check memory facts for staleness."""
37
+ facts = load_from_mem0(file) if fmt == "mem0" else load_from_json(file)
38
+
39
+ if not facts:
40
+ click.echo("No facts found in file.")
41
+ sys.exit(0)
42
+
43
+ report = StaleDetector().check(facts)
44
+
45
+ if output_json:
46
+ click.echo(report.model_dump_json(indent=2))
47
+ return
48
+
49
+ rows = report.flagged if only_flagged else report.results
50
+
51
+ table = Table(box=box.ROUNDED, show_header=True, header_style="bold cyan")
52
+ table.add_column("ID", style="dim", max_width=12)
53
+ table.add_column("Content", max_width=50)
54
+ table.add_column("Category", max_width=12)
55
+ table.add_column("Age", justify="right", max_width=6)
56
+ table.add_column("Score", justify="right", max_width=6)
57
+ table.add_column("Level", max_width=8)
58
+ table.add_column("Action", max_width=8)
59
+
60
+ for r in rows:
61
+ content = r.content[:47] + "..." if len(r.content) > 50 else r.content
62
+ table.add_row(
63
+ r.fact_id,
64
+ content,
65
+ r.category.value,
66
+ str(r.age_days),
67
+ f"{r.staleness_score:.2f}",
68
+ Text(r.staleness_level.value.upper(), style=_LEVEL_STYLES[r.staleness_level]),
69
+ r.recommendation,
70
+ )
71
+
72
+ console.print(table)
73
+ console.print(
74
+ f"\n[dim]Checked {report.total_facts} facts: "
75
+ f"[green]{report.fresh_count} fresh[/], "
76
+ f"[yellow]{report.aging_count} aging[/], "
77
+ f"[red]{report.stale_count} stale[/], "
78
+ f"[bold red]{report.expired_count} expired[/][/dim]"
79
+ )
memlint/core.py ADDED
@@ -0,0 +1,165 @@
1
+ from datetime import datetime
2
+ from memlint.models import (
3
+ FactCategory, MemoryFact, StalenessResult, DetectionReport, StalenessLevel,
4
+ )
5
+ from memlint.classifier import classify_fact, classify_fact_async
6
+ from memlint.scorer import (
7
+ compute_staleness_score, determine_level, build_reason, build_recommendation,
8
+ )
9
+
10
+
11
+ class StaleDetector:
12
+ def __init__(
13
+ self,
14
+ use_llm: bool = False,
15
+ llm_provider: str = "openai",
16
+ model: str = "gpt-4o-mini",
17
+ llm=None,
18
+ ):
19
+ self._use_llm = use_llm
20
+ self._llm_provider = llm_provider
21
+ self._model = model
22
+ self._llm = llm
23
+
24
+ def _classify(self, fact: MemoryFact) -> FactCategory:
25
+ if fact.category is not None:
26
+ return fact.category
27
+ return classify_fact(
28
+ fact.content,
29
+ use_llm=self._use_llm,
30
+ llm_provider=self._llm_provider,
31
+ model=self._model,
32
+ llm=self._llm,
33
+ )
34
+
35
+ def check_one(
36
+ self,
37
+ fact: MemoryFact,
38
+ context_facts: list[MemoryFact] | None = None,
39
+ now: datetime | None = None,
40
+ ) -> StalenessResult:
41
+ if now is None:
42
+ now = datetime.utcnow()
43
+ all_facts = context_facts if context_facts is not None else [fact]
44
+ category = self._classify(fact)
45
+ score, has_contradiction, contradicted_by = compute_staleness_score(
46
+ fact, category, all_facts, now
47
+ )
48
+ level = determine_level(score)
49
+ reference_time = fact.last_confirmed_at or fact.created_at
50
+ age_days = max((now - reference_time).days, 0)
51
+ return StalenessResult(
52
+ fact_id=fact.id,
53
+ content=fact.content,
54
+ category=category,
55
+ staleness_score=round(score, 4),
56
+ staleness_level=level,
57
+ age_days=age_days,
58
+ reason=build_reason(age_days, category, fact.confirmation_count, has_contradiction, score),
59
+ recommendation=build_recommendation(level),
60
+ has_contradiction=has_contradiction,
61
+ contradicted_by=contradicted_by,
62
+ )
63
+
64
+ def check(
65
+ self,
66
+ facts: list[MemoryFact],
67
+ now: datetime | None = None,
68
+ ) -> DetectionReport:
69
+ if now is None:
70
+ now = datetime.utcnow()
71
+ results = [self.check_one(f, context_facts=facts, now=now) for f in facts]
72
+ counts: dict[StalenessLevel, int] = {level: 0 for level in StalenessLevel}
73
+ for r in results:
74
+ counts[r.staleness_level] += 1
75
+ return DetectionReport(
76
+ checked_at=now,
77
+ total_facts=len(facts),
78
+ fresh_count=counts[StalenessLevel.FRESH],
79
+ aging_count=counts[StalenessLevel.AGING],
80
+ stale_count=counts[StalenessLevel.STALE],
81
+ expired_count=counts[StalenessLevel.EXPIRED],
82
+ results=results,
83
+ )
84
+
85
+ def filter_safe(
86
+ self,
87
+ facts: list[MemoryFact],
88
+ now: datetime | None = None,
89
+ ) -> list[MemoryFact]:
90
+ report = self.check(facts, now)
91
+ safe_ids = {r.fact_id for r in report.safe}
92
+ return [f for f in facts if f.id in safe_ids]
93
+
94
+ async def _classify_async(self, fact: MemoryFact) -> FactCategory:
95
+ if fact.category is not None:
96
+ return fact.category
97
+ return await classify_fact_async(
98
+ fact.content,
99
+ use_llm=self._use_llm,
100
+ llm_provider=self._llm_provider,
101
+ model=self._model,
102
+ llm=self._llm,
103
+ )
104
+
105
+ async def check_one_async(
106
+ self,
107
+ fact: MemoryFact,
108
+ context_facts: list[MemoryFact] | None = None,
109
+ now: datetime | None = None,
110
+ ) -> StalenessResult:
111
+ if now is None:
112
+ now = datetime.utcnow()
113
+ all_facts = context_facts if context_facts is not None else [fact]
114
+ category = await self._classify_async(fact)
115
+ score, has_contradiction, contradicted_by = compute_staleness_score(
116
+ fact, category, all_facts, now
117
+ )
118
+ level = determine_level(score)
119
+ reference_time = fact.last_confirmed_at or fact.created_at
120
+ age_days = max((now - reference_time).days, 0)
121
+ return StalenessResult(
122
+ fact_id=fact.id,
123
+ content=fact.content,
124
+ category=category,
125
+ staleness_score=round(score, 4),
126
+ staleness_level=level,
127
+ age_days=age_days,
128
+ reason=build_reason(age_days, category, fact.confirmation_count, has_contradiction, score),
129
+ recommendation=build_recommendation(level),
130
+ has_contradiction=has_contradiction,
131
+ contradicted_by=contradicted_by,
132
+ )
133
+
134
+ async def check_async(
135
+ self,
136
+ facts: list[MemoryFact],
137
+ now: datetime | None = None,
138
+ ) -> DetectionReport:
139
+ import asyncio
140
+ if now is None:
141
+ now = datetime.utcnow()
142
+ results = await asyncio.gather(
143
+ *[self.check_one_async(f, context_facts=facts, now=now) for f in facts]
144
+ )
145
+ counts: dict[StalenessLevel, int] = {level: 0 for level in StalenessLevel}
146
+ for r in results:
147
+ counts[r.staleness_level] += 1
148
+ return DetectionReport(
149
+ checked_at=now,
150
+ total_facts=len(facts),
151
+ fresh_count=counts[StalenessLevel.FRESH],
152
+ aging_count=counts[StalenessLevel.AGING],
153
+ stale_count=counts[StalenessLevel.STALE],
154
+ expired_count=counts[StalenessLevel.EXPIRED],
155
+ results=list(results),
156
+ )
157
+
158
+ async def filter_safe_async(
159
+ self,
160
+ facts: list[MemoryFact],
161
+ now: datetime | None = None,
162
+ ) -> list[MemoryFact]:
163
+ report = await self.check_async(facts, now)
164
+ safe_ids = {r.fact_id for r in report.safe}
165
+ return [f for f in facts if f.id in safe_ids]
memlint/models.py ADDED
@@ -0,0 +1,73 @@
1
+ from pydantic import BaseModel, Field, ConfigDict
2
+ from datetime import datetime
3
+ from typing import Optional
4
+ from enum import Enum
5
+
6
+
7
+ class FactCategory(str, Enum):
8
+ LOCATION = "location"
9
+ EMPLOYMENT = "employment"
10
+ PROJECT = "project"
11
+ PREFERENCE = "preference"
12
+ RELATIONSHIP = "relationship"
13
+ IDENTITY = "identity"
14
+ EPISODIC = "episodic"
15
+ SYSTEM_FACT = "system_fact"
16
+ UNKNOWN = "unknown"
17
+
18
+
19
+ class StalenessLevel(str, Enum):
20
+ FRESH = "fresh"
21
+ AGING = "aging"
22
+ STALE = "stale"
23
+ EXPIRED = "expired"
24
+
25
+
26
+ class MemoryFact(BaseModel):
27
+ model_config = ConfigDict(use_enum_values=False)
28
+
29
+ id: str
30
+ content: str
31
+ created_at: datetime
32
+ last_confirmed_at: Optional[datetime] = None
33
+ confirmation_count: int = 0
34
+ category: Optional[FactCategory] = None
35
+ source: str = "user"
36
+ metadata: dict = Field(default_factory=dict)
37
+
38
+
39
+ class StalenessResult(BaseModel):
40
+ model_config = ConfigDict(use_enum_values=False)
41
+
42
+ fact_id: str
43
+ content: str
44
+ category: FactCategory
45
+ staleness_score: float
46
+ staleness_level: StalenessLevel
47
+ age_days: int
48
+ reason: str
49
+ recommendation: str
50
+ has_contradiction: bool = False
51
+ contradicted_by: Optional[str] = None
52
+
53
+
54
+ class DetectionReport(BaseModel):
55
+ model_config = ConfigDict(use_enum_values=False)
56
+
57
+ checked_at: datetime
58
+ total_facts: int
59
+ fresh_count: int
60
+ aging_count: int
61
+ stale_count: int
62
+ expired_count: int
63
+ results: list[StalenessResult]
64
+
65
+ @property
66
+ def flagged(self) -> list[StalenessResult]:
67
+ return [r for r in self.results
68
+ if r.staleness_level in (StalenessLevel.STALE, StalenessLevel.EXPIRED)]
69
+
70
+ @property
71
+ def safe(self) -> list[StalenessResult]:
72
+ return [r for r in self.results
73
+ if r.staleness_level in (StalenessLevel.FRESH, StalenessLevel.AGING)]
memlint/scorer.py ADDED
@@ -0,0 +1,103 @@
1
+ from datetime import datetime, timedelta
2
+ from memlint.models import FactCategory, MemoryFact, StalenessLevel
3
+ from memlint.classifier import CATEGORY_KEYWORDS
4
+
5
+ DECAY_RATES: dict[FactCategory, float] = {
6
+ FactCategory.LOCATION: 0.0020,
7
+ FactCategory.EMPLOYMENT: 0.0025,
8
+ FactCategory.PROJECT: 0.0060,
9
+ FactCategory.PREFERENCE: 0.0030,
10
+ FactCategory.RELATIONSHIP: 0.0025,
11
+ FactCategory.IDENTITY: 0.0005,
12
+ FactCategory.EPISODIC: 0.0500,
13
+ FactCategory.SYSTEM_FACT: 0.0100,
14
+ FactCategory.UNKNOWN: 0.0030,
15
+ }
16
+
17
+
18
+ def determine_level(score: float) -> StalenessLevel:
19
+ if score < 0.30:
20
+ return StalenessLevel.FRESH
21
+ if score < 0.60:
22
+ return StalenessLevel.AGING
23
+ if score < 0.80:
24
+ return StalenessLevel.STALE
25
+ return StalenessLevel.EXPIRED
26
+
27
+
28
+ def build_reason(
29
+ age_days: int,
30
+ category: FactCategory,
31
+ confirmation_count: int,
32
+ has_contradiction: bool,
33
+ score: float,
34
+ ) -> str:
35
+ decay_days = int(1 / DECAY_RATES[category])
36
+ parts = [f"{age_days} days old ({category.value} facts decay in ~{decay_days} days)"]
37
+ if confirmation_count > 0:
38
+ parts.append(f"confirmed {confirmation_count} time(s)")
39
+ if has_contradiction:
40
+ parts.append("a newer conflicting fact exists")
41
+ return "; ".join(parts)
42
+
43
+
44
+ def build_recommendation(level: StalenessLevel) -> str:
45
+ return {
46
+ StalenessLevel.FRESH: "use",
47
+ StalenessLevel.AGING: "verify",
48
+ StalenessLevel.STALE: "flag",
49
+ StalenessLevel.EXPIRED: "discard",
50
+ }[level]
51
+
52
+
53
+ def _are_contradictory(
54
+ fact_a: MemoryFact,
55
+ fact_b: MemoryFact,
56
+ category: FactCategory,
57
+ ) -> bool:
58
+ if fact_a.category != fact_b.category and not (
59
+ fact_a.category == category or fact_b.category == category
60
+ ):
61
+ return False
62
+
63
+ time_diff = abs(fact_a.created_at - fact_b.created_at)
64
+ if time_diff < timedelta(days=1):
65
+ return False
66
+
67
+ keywords = CATEGORY_KEYWORDS.get(category, [])
68
+ a_lower = fact_a.content.lower()
69
+ b_lower = fact_b.content.lower()
70
+ return any(kw in a_lower and kw in b_lower for kw in keywords)
71
+
72
+
73
+ def compute_staleness_score(
74
+ fact: MemoryFact,
75
+ category: FactCategory,
76
+ all_facts: list[MemoryFact],
77
+ now: datetime,
78
+ ) -> tuple[float, bool, str | None]:
79
+ """Returns (score, has_contradiction, contradicted_by_id)."""
80
+ reference_time = fact.last_confirmed_at or fact.created_at
81
+ age_days = max((now - reference_time).days, 0)
82
+
83
+ decay_rate = DECAY_RATES[category]
84
+ score = age_days * decay_rate
85
+
86
+ confirmation_reduction = min(fact.confirmation_count * 0.08, 0.40)
87
+ score -= confirmation_reduction
88
+
89
+ if fact.source == "agent_inferred":
90
+ score *= 1.3
91
+
92
+ has_contradiction = False
93
+ contradicted_by: str | None = None
94
+ for other in all_facts:
95
+ if other.id == fact.id:
96
+ continue
97
+ if _are_contradictory(fact, other, category):
98
+ has_contradiction = True
99
+ contradicted_by = other.id
100
+ score += 0.40
101
+ break
102
+
103
+ return min(max(score, 0.0), 1.0), has_contradiction, contradicted_by
@@ -0,0 +1,152 @@
1
+ Metadata-Version: 2.4
2
+ Name: memlint
3
+ Version: 0.1.0
4
+ Summary: Detect stale facts in LLM agent memory stores
5
+ Project-URL: Homepage, https://github.com/Bhavye2003Developer/memlint
6
+ Project-URL: Issues, https://github.com/Bhavye2003Developer/memlint/issues
7
+ Author-email: Bhavye <bhavyedevelopment2003@gmail.com>
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Requires-Python: >=3.11
11
+ Requires-Dist: click>=8.0
12
+ Requires-Dist: pydantic>=2.0
13
+ Requires-Dist: python-dateutil>=2.8
14
+ Requires-Dist: python-dotenv>=1.0
15
+ Requires-Dist: rich>=13.0
16
+ Provides-Extra: dev
17
+ Requires-Dist: pytest-cov; extra == 'dev'
18
+ Requires-Dist: pytest>=8.0; extra == 'dev'
19
+ Provides-Extra: llm
20
+ Requires-Dist: langchain-core>=0.2; extra == 'llm'
21
+ Requires-Dist: langchain-openai>=0.1; extra == 'llm'
22
+ Description-Content-Type: text/markdown
23
+
24
+ # memlint
25
+
26
+ **Lint your LLM agent's memory before it lies to you.**
27
+
28
+ `memlint` detects stale facts in an LLM agent's memory store before they are injected into the context window. It scores each fact by age, confirmation history, and contradiction signals, then tells you which ones to flag, refresh, or discard.
29
+
30
+ ## The problem
31
+
32
+ LLM agents that work across sessions store facts about the user and world - where they live, where they work, what they're building. These facts go stale when the real world changes but the memory doesn't. A fact like `"User works at xyz"` stays in memory after a job change. The agent retrieves it, injects it, and answers confidently with wrong information.
33
+
34
+ `memlint` catches this before it happens.
35
+
36
+ ## Installation
37
+
38
+ ```bash
39
+ pip install memlint
40
+ ```
41
+
42
+ With optional LLM-assisted classification:
43
+
44
+ ```bash
45
+ pip install memlint[llm]
46
+ ```
47
+
48
+ ## Quick Start
49
+
50
+ ```python
51
+ from memlint import StaleDetector
52
+ from memlint.adapters.json_adapter import load_from_json
53
+
54
+ facts = load_from_json("sample_memories.json")
55
+ detector = StaleDetector()
56
+ report = detector.check(facts)
57
+
58
+ print(f"Total: {report.total_facts} | Flagged: {len(report.flagged)}")
59
+ for result in report.flagged:
60
+ print(f" [{result.staleness_level.value.upper()}] {result.content}")
61
+ print(f" Reason: {result.reason}")
62
+ print(f" Action: {result.recommendation}")
63
+ ```
64
+
65
+ ## CLI Usage
66
+
67
+ Check all facts:
68
+ ```bash
69
+ memlint check memories.json
70
+ ```
71
+
72
+ Show only stale and expired:
73
+ ```bash
74
+ memlint check memories.json --only-flagged
75
+ ```
76
+
77
+ Output raw JSON:
78
+ ```bash
79
+ memlint check memories.json --json
80
+ ```
81
+
82
+ Parse Mem0 format:
83
+ ```bash
84
+ memlint check memories.json --format mem0
85
+ ```
86
+
87
+ Sample output:
88
+ ```
89
+ ╭──────────┬────────────────────────────────────────┬────────────┬─────┬───────┬─────────┬─────────╮
90
+ │ ID │ Content │ Category │ Age │ Score │ Level │ Action │
91
+ ├──────────┼────────────────────────────────────────┼────────────┼─────┼───────┼─────────┼─────────┤
92
+ │ mem_004 │ User works at XYZ as a senior cons... │ employment │ 279 │ 0.70 │ STALE │ flag │
93
+ │ mem_006 │ User debugged a LangGraph memory is... │ episodic │ 29 │ 1.00 │ EXPIRED │ discard │
94
+ ╰──────────┴────────────────────────────────────────┴────────────┴─────┴───────┴─────────┴─────────╯
95
+
96
+ Checked 8 facts: 1 fresh, 2 aging, 3 stale, 2 expired
97
+ ```
98
+
99
+ ## Staleness Score Explained
100
+
101
+ Each fact is assigned a category with a natural lifespan:
102
+
103
+ | Category | Examples | Typical Valid Window |
104
+ |--------------|---------------------------------------|----------------------|
105
+ | `location` | "lives in Delhi", "office in Sector 5"| 6–24 months |
106
+ | `employment` | "works at xyz", "role is consultant" | 6–18 months |
107
+ | `project` | "building pract-agents", "using Pinecone" | 1–6 months |
108
+ | `preference` | "prefers Python", "uses dark mode" | 3–12 months |
109
+ | `relationship`| "manager is X", "team has 5 people" | 3–12 months |
110
+ | `identity` | "name is X", "speaks Hindi" | Very long/permanent |
111
+ | `episodic` | "debugged a LangGraph issue today" | Days to weeks |
112
+ | `system_fact`| "Python version is 3.10", "npm v9" | 1–3 months |
113
+
114
+ Score thresholds:
115
+ - `0.0 – 0.29` → **FRESH** (safe to use)
116
+ - `0.30 – 0.59` → **AGING** (use with caution)
117
+ - `0.60 – 0.79` → **STALE** (flag before injecting)
118
+ - `0.80 – 1.0` → **EXPIRED** (do not inject without reconfirmation)
119
+
120
+ ## Adapters
121
+
122
+ **JSON**: default format:
123
+ ```python
124
+ from memlint.adapters.json_adapter import load_from_json
125
+ facts = load_from_json("memories.json")
126
+ ```
127
+
128
+ **Mem0**: maps `memory` to `content`, `updated_at` to `last_confirmed_at`:
129
+ ```python
130
+ from memlint.adapters.mem0_adapter import load_from_mem0
131
+ facts = load_from_mem0("mem0_export.json")
132
+ ```
133
+
134
+ **LangChain**: two tools: `check_memory_staleness` and `filter_stale_memories` (see below).
135
+
136
+ ## LangChain / LangGraph Integration
137
+
138
+ ```python
139
+ from memlint.adapters.langchain_tool import (
140
+ check_memory_staleness,
141
+ filter_stale_memories,
142
+ )
143
+
144
+ # In a LangGraph node: filter before injecting memories into the LLM
145
+ safe_facts_json = filter_stale_memories.invoke({"facts_json": memories_json_string})
146
+ ```
147
+
148
+ Requires `pip install memlint[llm]`.
149
+
150
+ ## Contributing
151
+
152
+ Open an issue or pull request at the project repository.
@@ -0,0 +1,16 @@
1
+ memlint/__init__.py,sha256=mLVWLRdehPWe0imt5Tk97LwuXUOd7PhdgL69vxJcSGQ,386
2
+ memlint/classifier.py,sha256=gM1Ib7EjVFKcNvu4THZ2QeTyW9O96zWAqOf0xCQGLxw,5533
3
+ memlint/cli.py,sha256=TXbH1YO0lih4VFXpJhRRjbRlh5hj0wPZrVX9uxLez8E,2668
4
+ memlint/core.py,sha256=kanV4fSlsuL3GPUtWSMslm6jcq9zjBCNHlo4cGJEOHs,5865
5
+ memlint/models.py,sha256=kQMCE_Q9t63VsYkaBWQWvkV_FyGGfKnkCLXHM5sFt7A,1856
6
+ memlint/scorer.py,sha256=paoRrYQQNVcUXhV4qI5wziqaMZpOgoK2nCQwc7hTXvY,3133
7
+ memlint/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ memlint/adapters/_utils.py,sha256=7Dy_6SPb_NAe9HNtRpKEhbC4Q-XuI4RdKHYdPvYt9Tk,234
9
+ memlint/adapters/json_adapter.py,sha256=h-ZSfzUHu1CqAe0rQ0XPK-I9o8XTXA55QxZ9Q9UYNFA,1003
10
+ memlint/adapters/langchain_tool.py,sha256=x7C_UNjXow5hOdy1ajF7NDAo9goS28mxNY1xtdL-PMw,924
11
+ memlint/adapters/mem0_adapter.py,sha256=HD6DNaXjWibfa3bmSjkus-CYwUfzvp9FR0bZ0zKyPc0,994
12
+ memlint-0.1.0.dist-info/METADATA,sha256=WFSz6abUdWJOzgec5FiYqdz8gxn97YGjmakZOkv6cZs,5787
13
+ memlint-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
14
+ memlint-0.1.0.dist-info/entry_points.txt,sha256=H3PyyUzwn9zU5t2yB_XCQyPVUI93qtuuBUMYaarny5o,45
15
+ memlint-0.1.0.dist-info/licenses/LICENSE,sha256=8lSgQm0jIJfk-yY3ATXdgRzUo10X0hLT9CIQqig0Ids,1091
16
+ memlint-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ memlint = memlint.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 MatrixEscaper
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.