mcal-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mcal/core/models.py ADDED
@@ -0,0 +1,445 @@
1
+ """
2
+ MCAL Core Data Models
3
+
4
+ Defines the fundamental data structures for:
5
+ - Intent Graphs (goal hierarchies)
6
+ - Decision Trails (reasoning preservation)
7
+ - Memory objects (context storage)
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from datetime import datetime, timezone
13
+ from enum import Enum
14
+ from typing import Optional
15
+ from uuid import uuid4
16
+
17
+ from pydantic import BaseModel, Field
18
+
19
+
20
+ def _utc_now() -> datetime:
21
+ """Return current UTC time (timezone-aware)."""
22
+ return datetime.now(timezone.utc)
23
+
24
+
25
+ # =============================================================================
26
+ # Enums
27
+ # =============================================================================
28
+
29
+ class IntentType(str, Enum):
30
+ """Types of intent nodes in the hierarchy."""
31
+ MISSION = "mission" # Overarching objective (rarely changes)
32
+ GOAL = "goal" # Major sub-objectives (session-level)
33
+ TASK = "task" # Concrete actions (turn-level)
34
+ DECISION = "decision" # Choices made with rationale
35
+
36
+
37
+ class IntentStatus(str, Enum):
38
+ """Status of an intent node."""
39
+ ACTIVE = "active"
40
+ COMPLETED = "completed"
41
+ ABANDONED = "abandoned"
42
+ PENDING = "pending"
43
+ BLOCKED = "blocked"
44
+
45
+
46
+ class EdgeRelation(str, Enum):
47
+ """Types of relationships between intent nodes."""
48
+ DERIVES_FROM = "derives_from" # Child derives from parent
49
+ ENABLES = "enables" # Completing this enables another
50
+ CONFLICTS_WITH = "conflicts_with" # Mutually exclusive
51
+ SUPERSEDES = "supersedes" # Replaces a previous intent
52
+ DEPENDS_ON = "depends_on" # Requires another to be completed first
53
+
54
+
55
+ class EvidenceSource(str, Enum):
56
+ """Source of evidence supporting a decision."""
57
+ USER_STATED = "user_stated" # Explicitly stated by user
58
+ INFERRED = "inferred" # Inferred from context
59
+ EXTERNAL = "external" # From external source (web, docs)
60
+ SYSTEM = "system" # System-generated
61
+
62
+
63
+ # =============================================================================
64
+ # Intent Graph Models
65
+ # =============================================================================
66
+
67
+ class IntentNode(BaseModel):
68
+ """
69
+ A node in the intent graph representing a goal, task, or decision.
70
+
71
+ Example:
72
+ IntentNode(
73
+ type=IntentType.GOAL,
74
+ content="Build fraud detection ML pipeline",
75
+ status=IntentStatus.ACTIVE,
76
+ confidence=0.9,
77
+ evidence=["turn_5", "turn_8"]
78
+ )
79
+ """
80
+ id: str = Field(default_factory=lambda: str(uuid4())[:8])
81
+ type: IntentType
82
+ content: str
83
+ status: IntentStatus = IntentStatus.ACTIVE
84
+ confidence: float = Field(ge=0.0, le=1.0, default=0.8)
85
+ evidence: list[str] = Field(default_factory=list) # Turn IDs supporting this
86
+ created_at: datetime = Field(default_factory=_utc_now)
87
+ updated_at: datetime = Field(default_factory=_utc_now)
88
+ metadata: dict = Field(default_factory=dict)
89
+
90
+ def update_status(self, new_status: IntentStatus) -> None:
91
+ """Update status and timestamp."""
92
+ self.status = new_status
93
+ self.updated_at = _utc_now()
94
+
95
+
96
+ class IntentEdge(BaseModel):
97
+ """
98
+ An edge connecting two intent nodes.
99
+
100
+ Example:
101
+ IntentEdge(
102
+ source="goal_123",
103
+ target="task_456",
104
+ relation=EdgeRelation.DERIVES_FROM
105
+ )
106
+ """
107
+ id: str = Field(default_factory=lambda: str(uuid4())[:8])
108
+ source: str # Source node ID
109
+ target: str # Target node ID
110
+ relation: EdgeRelation
111
+ strength: float = Field(ge=0.0, le=1.0, default=1.0)
112
+ created_at: datetime = Field(default_factory=_utc_now)
113
+
114
+
115
+ class IntentGraph(BaseModel):
116
+ """
117
+ Hierarchical representation of user goals and their relationships.
118
+
119
+ The graph captures:
120
+ - What the user is trying to achieve (goals)
121
+ - How goals break down into tasks
122
+ - Dependencies and conflicts between goals
123
+ - Progress status of each node
124
+ """
125
+ id: str = Field(default_factory=lambda: str(uuid4())[:8])
126
+ session_id: Optional[str] = None
127
+ nodes: dict[str, IntentNode] = Field(default_factory=dict)
128
+ edges: list[IntentEdge] = Field(default_factory=list)
129
+ root_node_id: Optional[str] = None # Mission-level node
130
+ created_at: datetime = Field(default_factory=_utc_now)
131
+ updated_at: datetime = Field(default_factory=_utc_now)
132
+
133
+ def add_node(self, node: IntentNode) -> str:
134
+ """Add a node to the graph."""
135
+ self.nodes[node.id] = node
136
+ self.updated_at = _utc_now()
137
+ if node.type == IntentType.MISSION and self.root_node_id is None:
138
+ self.root_node_id = node.id
139
+ return node.id
140
+
141
+ def add_edge(self, edge: IntentEdge) -> str:
142
+ """Add an edge to the graph."""
143
+ self.edges.append(edge)
144
+ self.updated_at = _utc_now()
145
+ return edge.id
146
+
147
+ def get_active_goals(self) -> list[IntentNode]:
148
+ """Return all currently active goals, missions, and tasks.
149
+
150
+ Includes nodes that are:
151
+ - ACTIVE status (explicitly in progress)
152
+ - PENDING status (not yet started but still active work)
153
+
154
+ And are of type:
155
+ - MISSION (high-level objectives)
156
+ - GOAL (concrete goals)
157
+ - TASK (actionable tasks)
158
+ """
159
+ active_statuses = (IntentStatus.ACTIVE, IntentStatus.PENDING)
160
+ active_types = (IntentType.MISSION, IntentType.GOAL, IntentType.TASK)
161
+
162
+ return [
163
+ node for node in self.nodes.values()
164
+ if node.status in active_statuses
165
+ and node.type in active_types
166
+ ]
167
+
168
+ def get_children(self, node_id: str) -> list[IntentNode]:
169
+ """Get all child nodes of a given node."""
170
+ child_ids = [
171
+ edge.target for edge in self.edges
172
+ if edge.source == node_id and edge.relation == EdgeRelation.DERIVES_FROM
173
+ ]
174
+ return [self.nodes[cid] for cid in child_ids if cid in self.nodes]
175
+
176
+ def get_node_path(self, node_id: str) -> list[IntentNode]:
177
+ """Get path from root to a specific node."""
178
+ path = []
179
+ current_id = node_id
180
+ visited = set()
181
+
182
+ while current_id and current_id not in visited:
183
+ visited.add(current_id)
184
+ if current_id in self.nodes:
185
+ path.append(self.nodes[current_id])
186
+
187
+ # Find parent
188
+ parent_edge = next(
189
+ (e for e in self.edges
190
+ if e.target == current_id and e.relation == EdgeRelation.DERIVES_FROM),
191
+ None
192
+ )
193
+ current_id = parent_edge.source if parent_edge else None
194
+
195
+ return list(reversed(path))
196
+
197
+
198
+ # =============================================================================
199
+ # Decision Trail Models
200
+ # =============================================================================
201
+
202
+ class Alternative(BaseModel):
203
+ """An alternative that was considered but not chosen."""
204
+ option: str
205
+ pros: list[str] = Field(default_factory=list)
206
+ cons: list[str] = Field(default_factory=list)
207
+ rejection_reason: Optional[str] = None
208
+
209
+
210
+ class Evidence(BaseModel):
211
+ """Evidence supporting a decision."""
212
+ claim: str
213
+ source: EvidenceSource
214
+ confidence: float = Field(ge=0.0, le=1.0, default=0.8)
215
+ turn_id: Optional[str] = None
216
+ external_url: Optional[str] = None
217
+
218
+
219
+ class TradeOff(BaseModel):
220
+ """A trade-off acknowledged in making a decision."""
221
+ gained: str
222
+ sacrificed: str
223
+ justification: Optional[str] = None
224
+
225
+
226
+ class DecisionTrail(BaseModel):
227
+ """
228
+ Captures not just WHAT was decided, but WHY.
229
+
230
+ This is the core innovation for reasoning preservation:
231
+ instead of storing "User chose PostgreSQL", we store:
232
+ - The decision itself
233
+ - What alternatives were considered
234
+ - Why this option was chosen
235
+ - What evidence supported it
236
+ - What trade-offs were made
237
+
238
+ Example:
239
+ DecisionTrail(
240
+ decision="Use PostgreSQL for the database",
241
+ context="Discussing data storage for ML pipeline",
242
+ alternatives=[
243
+ Alternative(option="MongoDB", rejection_reason="Need ACID compliance"),
244
+ Alternative(option="SQLite", rejection_reason="Won't scale")
245
+ ],
246
+ rationale="PostgreSQL offers ACID + pgvector for embeddings",
247
+ evidence=[Evidence(claim="pgvector supports vector similarity", source=EvidenceSource.EXTERNAL)]
248
+ )
249
+ """
250
+ id: str = Field(default_factory=lambda: str(uuid4())[:8])
251
+ decision: str # What was decided
252
+ context: str # Situation when decision was made
253
+
254
+ # The "WHY" components
255
+ alternatives: list[Alternative] = Field(default_factory=list)
256
+ rationale: str # Why this option was chosen
257
+ evidence: list[Evidence] = Field(default_factory=list)
258
+ trade_offs: list[TradeOff] = Field(default_factory=list)
259
+ confidence: float = Field(ge=0.0, le=1.0, default=0.8)
260
+
261
+ # Linkage
262
+ related_goals: list[str] = Field(default_factory=list) # Goal IDs this serves
263
+ dependencies: list[str] = Field(default_factory=list) # Prior decisions this builds on
264
+ invalidated_by: Optional[str] = None # If superseded, what replaced it
265
+
266
+ # Metadata
267
+ turn_id: Optional[str] = None
268
+ created_at: datetime = Field(default_factory=_utc_now)
269
+ updated_at: datetime = Field(default_factory=_utc_now)
270
+
271
+ def invalidate(self, replacement_id: str) -> None:
272
+ """Mark this decision as invalidated by a new decision."""
273
+ self.invalidated_by = replacement_id
274
+ self.updated_at = _utc_now()
275
+
276
+ @property
277
+ def is_valid(self) -> bool:
278
+ """Check if this decision is still valid (not superseded)."""
279
+ return self.invalidated_by is None
280
+
281
+
282
+ # =============================================================================
283
+ # Memory Models
284
+ # =============================================================================
285
+
286
+ class MemoryType(str, Enum):
287
+ """Types of memory items."""
288
+ FACT = "fact" # Simple factual information
289
+ PREFERENCE = "preference" # User preference
290
+ INTENT = "intent" # Goal/intent (links to IntentNode)
291
+ DECISION = "decision" # Decision (links to DecisionTrail)
292
+ EPISODE = "episode" # Episodic memory (conversation summary)
293
+
294
+
295
+ class Memory(BaseModel):
296
+ """
297
+ A memory item that can be stored and retrieved.
298
+
299
+ This is the base unit of storage, which can represent:
300
+ - Simple facts (like Mem0)
301
+ - Intent graph nodes
302
+ - Decision trails
303
+ - Episodic summaries
304
+ """
305
+ id: str = Field(default_factory=lambda: str(uuid4())[:8])
306
+ type: MemoryType
307
+ content: str
308
+
309
+ # Embedding for semantic search
310
+ embedding: Optional[list[float]] = None
311
+
312
+ # Relevance factors
313
+ importance: float = Field(ge=0.0, le=1.0, default=0.5)
314
+ user_marked: bool = False # Explicitly marked important by user
315
+ reference_count: int = 0 # How often this has been referenced
316
+
317
+ # Linkage to structured data
318
+ intent_node_id: Optional[str] = None
319
+ decision_trail_id: Optional[str] = None
320
+
321
+ # Metadata
322
+ session_id: Optional[str] = None
323
+ turn_id: Optional[str] = None
324
+ created_at: datetime = Field(default_factory=_utc_now)
325
+ last_accessed: datetime = Field(default_factory=_utc_now)
326
+
327
+ def access(self) -> None:
328
+ """Record that this memory was accessed."""
329
+ self.reference_count += 1
330
+ self.last_accessed = _utc_now()
331
+
332
+
333
+ # =============================================================================
334
+ # Conversation Models
335
+ # =============================================================================
336
+
337
+ class Turn(BaseModel):
338
+ """A single turn in a conversation."""
339
+ id: str = Field(default_factory=lambda: str(uuid4())[:8])
340
+ role: str # "user" or "assistant"
341
+ content: str
342
+ timestamp: datetime = Field(default_factory=_utc_now)
343
+ session_id: Optional[str] = None
344
+ metadata: dict = Field(default_factory=dict)
345
+
346
+
347
+ class Session(BaseModel):
348
+ """A conversation session."""
349
+ id: str = Field(default_factory=lambda: str(uuid4())[:8])
350
+ turns: list[Turn] = Field(default_factory=list)
351
+ intent_graph: Optional[IntentGraph] = None
352
+ decisions: list[DecisionTrail] = Field(default_factory=list)
353
+ created_at: datetime = Field(default_factory=_utc_now)
354
+ updated_at: datetime = Field(default_factory=_utc_now)
355
+
356
+ def add_turn(self, turn: Turn) -> str:
357
+ """Add a turn to the session."""
358
+ turn.session_id = self.id
359
+ self.turns.append(turn)
360
+ self.updated_at = _utc_now()
361
+ return turn.id
362
+
363
+
364
+ # =============================================================================
365
+ # Retrieval Models
366
+ # =============================================================================
367
+
368
+ class RetrievalResult(BaseModel):
369
+ """Result of a memory retrieval operation."""
370
+ memory: Memory
371
+ score: float # Combined relevance score
372
+ score_breakdown: dict[str, float] = Field(default_factory=dict) # Component scores
373
+
374
+
375
+ class RetrievalConfig(BaseModel):
376
+ """Configuration for retrieval operations."""
377
+ max_results: int = 10
378
+ min_score: float = 0.0
379
+
380
+ # Weight factors for multi-factor scoring
381
+ semantic_weight: float = 0.4
382
+ goal_alignment_weight: float = 0.3
383
+ recency_weight: float = 0.1
384
+ reference_weight: float = 0.1
385
+ decision_impact_weight: float = 0.1
386
+
387
+ # Filters
388
+ memory_types: Optional[list[MemoryType]] = None
389
+ include_invalidated_decisions: bool = False
390
+
391
+
392
+ # =============================================================================
393
+ # Decision Carry-Forward Models (for Issue #1 fix)
394
+ # =============================================================================
395
+
396
+ class VerifiedDecision(BaseModel):
397
+ """A prior decision verified as still valid."""
398
+ decision_id: str
399
+ still_valid: bool = True
400
+ confidence: float = Field(ge=0.0, le=1.0, default=0.8)
401
+ supporting_evidence: Optional[str] = None
402
+
403
+
404
+ class ModifiedDecision(BaseModel):
405
+ """A prior decision that was modified/updated."""
406
+ original_decision_id: str
407
+ original_summary: str
408
+ new_decision: str
409
+ modification_type: str = "refined" # refined, updated, partially_changed
410
+ reason: str
411
+ confidence: float = Field(ge=0.0, le=1.0, default=0.8)
412
+
413
+
414
+ class InvalidatedDecision(BaseModel):
415
+ """A prior decision that was reversed/abandoned."""
416
+ decision_id: str
417
+ reason: str
418
+
419
+
420
+ class NewDecision(BaseModel):
421
+ """A newly extracted decision from the current session."""
422
+ decision: str
423
+ context: str
424
+ rationale: str
425
+ confidence: float = Field(ge=0.0, le=1.0, default=0.8)
426
+ related_goal: Optional[str] = None
427
+ alternatives: list[dict] = Field(default_factory=list)
428
+ evidence: list[dict] = Field(default_factory=list)
429
+ trade_offs: list[dict] = Field(default_factory=list)
430
+
431
+
432
+ class DecisionReconciliation(BaseModel):
433
+ """
434
+ Result of decision carry-forward reconciliation.
435
+
436
+ Contains:
437
+ - verified: Prior decisions confirmed as still valid
438
+ - modified: Prior decisions that were updated
439
+ - new: New decisions from current session
440
+ - invalidated: Prior decisions that were reversed
441
+ """
442
+ verified: list[VerifiedDecision] = Field(default_factory=list)
443
+ modified: list[ModifiedDecision] = Field(default_factory=list)
444
+ new: list[NewDecision] = Field(default_factory=list)
445
+ invalidated: list[InvalidatedDecision] = Field(default_factory=list)