mcal-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,734 @@
1
+ """
2
+ Intent Tracker
3
+
4
+ Extracts and maintains hierarchical intent graphs from conversations.
5
+ This is Pillar 1 of MCAL: Intent Graph Preservation.
6
+
7
+ Key capabilities:
8
+ - Extract intent structures from conversation turns
9
+ - Incrementally update intent graph as conversation progresses
10
+ - Track goal status (active, completed, abandoned)
11
+ - Detect intent drift and evolution
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import logging
18
+ from typing import Optional, Protocol
19
+
20
+ from .models import (
21
+ EdgeRelation,
22
+ IntentEdge,
23
+ IntentGraph,
24
+ IntentNode,
25
+ IntentStatus,
26
+ IntentType,
27
+ Turn,
28
+ )
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+
33
+ # Issue #1: Map invalid intent types from LLM to valid enum values
34
+ INTENT_TYPE_MAPPING = {
35
+ "evidence": IntentType.TASK,
36
+ "consideration": IntentType.TASK,
37
+ "analysis": IntentType.TASK,
38
+ "objective": IntentType.GOAL,
39
+ "sub-goal": IntentType.TASK,
40
+ "question": IntentType.TASK,
41
+ "constraint": IntentType.TASK,
42
+ "preference": IntentType.TASK,
43
+ }
44
+
45
+ # Issue #4: Map invalid intent status from LLM to valid enum values
46
+ INTENT_STATUS_MAPPING = {
47
+ "reopened": IntentStatus.ACTIVE,
48
+ "paused": IntentStatus.BLOCKED,
49
+ "in_progress": IntentStatus.ACTIVE,
50
+ "in-progress": IntentStatus.ACTIVE,
51
+ "done": IntentStatus.COMPLETED,
52
+ "cancelled": IntentStatus.ABANDONED,
53
+ "canceled": IntentStatus.ABANDONED,
54
+ }
55
+
56
+ # Issue #5: Map invalid edge relations from LLM to valid enum values
57
+ EDGE_RELATION_MAPPING = {
58
+ "results_in": EdgeRelation.ENABLES,
59
+ "leads_to": EdgeRelation.ENABLES,
60
+ "requires": EdgeRelation.DEPENDS_ON,
61
+ "needs": EdgeRelation.DEPENDS_ON,
62
+ "parent_of": EdgeRelation.DERIVES_FROM,
63
+ "child_of": EdgeRelation.DERIVES_FROM,
64
+ "replaces": EdgeRelation.SUPERSEDES,
65
+ "blocks": EdgeRelation.CONFLICTS_WITH,
66
+ }
67
+
68
+
69
+ def normalize_intent_type(type_str: str) -> IntentType:
70
+ """
71
+ Normalize intent type string to valid IntentType enum.
72
+
73
+ Handles invalid types from LLM by mapping to closest valid type.
74
+ Fixes Issue #1: Invalid IntentType values from LLM extraction.
75
+
76
+ Args:
77
+ type_str: Raw type string from LLM
78
+
79
+ Returns:
80
+ Valid IntentType enum value
81
+ """
82
+ type_lower = type_str.lower().strip()
83
+
84
+ # Try direct enum conversion first
85
+ try:
86
+ return IntentType(type_lower)
87
+ except ValueError:
88
+ pass
89
+
90
+ # Try mapping table
91
+ if type_lower in INTENT_TYPE_MAPPING:
92
+ logger.warning(f"Mapped invalid intent type '{type_str}' to {INTENT_TYPE_MAPPING[type_lower]}")
93
+ return INTENT_TYPE_MAPPING[type_lower]
94
+
95
+ # Fallback to TASK for unknown types
96
+ logger.warning(f"Unknown intent type '{type_str}', falling back to TASK")
97
+ return IntentType.TASK
98
+
99
+
100
+ def normalize_intent_status(status_str: str) -> IntentStatus:
101
+ """
102
+ Normalize intent status string to valid IntentStatus enum.
103
+
104
+ Fixes Issue #4: Invalid IntentStatus values from LLM.
105
+ """
106
+ status_lower = status_str.lower().strip()
107
+
108
+ try:
109
+ return IntentStatus(status_lower)
110
+ except ValueError:
111
+ pass
112
+
113
+ if status_lower in INTENT_STATUS_MAPPING:
114
+ logger.warning(f"Mapped invalid intent status '{status_str}' to {INTENT_STATUS_MAPPING[status_lower]}")
115
+ return INTENT_STATUS_MAPPING[status_lower]
116
+
117
+ logger.warning(f"Unknown intent status '{status_str}', falling back to ACTIVE")
118
+ return IntentStatus.ACTIVE
119
+
120
+
121
+ def normalize_edge_relation(relation_str: str) -> EdgeRelation:
122
+ """
123
+ Normalize edge relation string to valid EdgeRelation enum.
124
+
125
+ Fixes Issue #5: Invalid EdgeRelation values from LLM.
126
+ """
127
+ relation_lower = relation_str.lower().strip()
128
+
129
+ try:
130
+ return EdgeRelation(relation_lower)
131
+ except ValueError:
132
+ pass
133
+
134
+ if relation_lower in EDGE_RELATION_MAPPING:
135
+ logger.warning(f"Mapped invalid edge relation '{relation_str}' to {EDGE_RELATION_MAPPING[relation_lower]}")
136
+ return EDGE_RELATION_MAPPING[relation_lower]
137
+
138
+ logger.warning(f"Unknown edge relation '{relation_str}', falling back to DERIVES_FROM")
139
+ return EdgeRelation.DERIVES_FROM
140
+
141
+
142
+ # =============================================================================
143
+ # LLM Client Protocol
144
+ # =============================================================================
145
+
146
+ class LLMClient(Protocol):
147
+ """Protocol for LLM client implementations."""
148
+
149
+ async def complete(self, prompt: str, system: Optional[str] = None) -> str:
150
+ """Generate a completion for the given prompt."""
151
+ ...
152
+
153
+
154
+ # =============================================================================
155
+ # Prompts
156
+ # =============================================================================
157
+
158
+ INTENT_EXTRACTION_SYSTEM = """You are an expert at analyzing conversations to extract user intents and goals.
159
+
160
+ Your task is to identify the hierarchical structure of what the user is trying to achieve:
161
+ - MISSION: The overarching objective (if discernible)
162
+ - GOALS: Major sub-objectives the user wants to accomplish
163
+ - TASKS: Specific actions or steps within goals
164
+ - DECISIONS: Choices the user has made or needs to make
165
+
166
+ For each intent, assess:
167
+ - Status: active, completed, abandoned, pending, or blocked
168
+ - Confidence: How certain are you this is a real intent (0.0-1.0)
169
+ - Evidence: Which parts of the conversation support this intent
170
+
171
+ Output your analysis as valid JSON."""
172
+
173
+ INTENT_EXTRACTION_PROMPT = """Analyze this conversation and extract the user's intent hierarchy.
174
+
175
+ CONVERSATION:
176
+ {conversation}
177
+
178
+ Extract intents as JSON with this structure:
179
+ {{
180
+ "mission": {{
181
+ "content": "string or null if not clear",
182
+ "confidence": 0.0-1.0
183
+ }},
184
+ "goals": [
185
+ {{
186
+ "id": "g1",
187
+ "content": "goal description",
188
+ "status": "active|completed|abandoned|pending|blocked",
189
+ "confidence": 0.0-1.0,
190
+ "evidence": ["turn_1", "turn_3"],
191
+ "parent": "mission or null"
192
+ }}
193
+ ],
194
+ "tasks": [
195
+ {{
196
+ "id": "t1",
197
+ "content": "task description",
198
+ "status": "active|completed|abandoned|pending|blocked",
199
+ "confidence": 0.0-1.0,
200
+ "evidence": ["turn_2"],
201
+ "parent": "g1"
202
+ }}
203
+ ],
204
+ "decisions": [
205
+ {{
206
+ "id": "d1",
207
+ "content": "decision description",
208
+ "status": "active|completed",
209
+ "confidence": 0.0-1.0,
210
+ "evidence": ["turn_4"],
211
+ "parent": "t1 or g1"
212
+ }}
213
+ ]
214
+ }}
215
+
216
+ Be thorough but don't invent intents that aren't supported by the conversation.
217
+ Output ONLY valid JSON, no explanation."""
218
+
219
+ INTENT_UPDATE_PROMPT = """Given the existing intent graph and a new conversation turn, update the intent structure.
220
+
221
+ EXISTING INTENT GRAPH:
222
+ {intent_graph}
223
+
224
+ NEW TURN:
225
+ {turn}
226
+
227
+ Determine what changes are needed:
228
+ 1. New intents to add?
229
+ 2. Status changes for existing intents?
230
+ 3. New relationships between intents?
231
+
232
+ Output as JSON:
233
+ {{
234
+ "new_nodes": [
235
+ {{
236
+ "id": "unique_id",
237
+ "type": "mission|goal|task|decision",
238
+ "content": "Description of the intent",
239
+ "status": "active|completed|pending|abandoned|blocked",
240
+ "confidence": 0.0-1.0,
241
+ "parent": "parent_node_id or null"
242
+ }}
243
+ ],
244
+ "status_updates": [
245
+ {{"id": "g1", "new_status": "completed"}}
246
+ ],
247
+ "new_edges": [
248
+ {{"source": "g1", "target": "t2", "relation": "derives_from"}}
249
+ ]
250
+ }}
251
+
252
+ IMPORTANT: Each new_node MUST have a "content" field with a description.
253
+ Output ONLY valid JSON, no explanation."""
254
+
255
+
256
+ # =============================================================================
257
+ # Intent Tracker
258
+ # =============================================================================
259
+
260
+ class IntentTracker:
261
+ """
262
+ Extracts and maintains intent graphs from conversations.
263
+
264
+ Usage:
265
+ tracker = IntentTracker(llm_client)
266
+
267
+ # Extract from full conversation
268
+ graph = await tracker.extract_intents(turns)
269
+
270
+ # Or incrementally update
271
+ graph = await tracker.update_intent(new_turn, existing_graph)
272
+
273
+ # Query active goals
274
+ active = tracker.get_active_goals(graph)
275
+ """
276
+
277
+ def __init__(self, llm_client: LLMClient):
278
+ """
279
+ Initialize the intent tracker.
280
+
281
+ Args:
282
+ llm_client: LLM client for extraction (Anthropic, OpenAI, etc.)
283
+ """
284
+ self.llm = llm_client
285
+
286
+ async def extract_intents(
287
+ self,
288
+ turns: list[Turn],
289
+ session_id: Optional[str] = None
290
+ ) -> IntentGraph:
291
+ """
292
+ Extract intent graph from a conversation.
293
+
294
+ Args:
295
+ turns: List of conversation turns
296
+ session_id: Optional session identifier
297
+
298
+ Returns:
299
+ IntentGraph with extracted intents
300
+ """
301
+ # Format conversation for prompt (use smart formatting for long conversations)
302
+ conversation = await self._format_conversation_smart(turns)
303
+
304
+ # Call LLM for extraction
305
+ prompt = INTENT_EXTRACTION_PROMPT.format(conversation=conversation)
306
+ response = await self.llm.complete(prompt, system=INTENT_EXTRACTION_SYSTEM)
307
+
308
+ # Parse response
309
+ try:
310
+ data = json.loads(self._clean_json_response(response))
311
+ except json.JSONDecodeError as e:
312
+ logger.error(f"Failed to parse intent extraction response: {e}")
313
+ logger.debug(f"Response was: {response}")
314
+ return IntentGraph(session_id=session_id)
315
+
316
+ # Build graph
317
+ graph = IntentGraph(session_id=session_id)
318
+ node_id_map: dict[str, str] = {} # Map from LLM IDs to our IDs
319
+
320
+ # Add mission if present
321
+ mission_data = data.get("mission", {})
322
+ mission_content = self._extract_node_content(mission_data) if mission_data else None
323
+ if mission_content:
324
+ mission_node = IntentNode(
325
+ type=IntentType.MISSION,
326
+ content=mission_content,
327
+ status=IntentStatus.ACTIVE,
328
+ confidence=mission_data.get("confidence", 0.8)
329
+ )
330
+ graph.add_node(mission_node)
331
+ node_id_map["mission"] = mission_node.id
332
+
333
+ # Add goals
334
+ for goal_data in data.get("goals", []):
335
+ content = self._extract_node_content(goal_data)
336
+ if not content:
337
+ continue
338
+ node = IntentNode(
339
+ type=IntentType.GOAL,
340
+ content=content,
341
+ status=normalize_intent_status(goal_data.get("status", "active")),
342
+ confidence=goal_data.get("confidence", 0.8),
343
+ evidence=goal_data.get("evidence", [])
344
+ )
345
+ graph.add_node(node)
346
+ node_id_map[goal_data["id"]] = node.id
347
+
348
+ # Add edge to parent
349
+ parent_key = goal_data.get("parent")
350
+ if parent_key and parent_key in node_id_map:
351
+ edge = IntentEdge(
352
+ source=node_id_map[parent_key],
353
+ target=node.id,
354
+ relation=EdgeRelation.DERIVES_FROM
355
+ )
356
+ graph.add_edge(edge)
357
+
358
+ # Add tasks
359
+ for task_data in data.get("tasks", []):
360
+ content = self._extract_node_content(task_data)
361
+ if not content:
362
+ continue
363
+ node = IntentNode(
364
+ type=IntentType.TASK,
365
+ content=content,
366
+ status=normalize_intent_status(task_data.get("status", "active")),
367
+ confidence=task_data.get("confidence", 0.8),
368
+ evidence=task_data.get("evidence", [])
369
+ )
370
+ graph.add_node(node)
371
+ node_id_map[task_data["id"]] = node.id
372
+
373
+ # Add edge to parent
374
+ parent_key = task_data.get("parent")
375
+ if parent_key and parent_key in node_id_map:
376
+ edge = IntentEdge(
377
+ source=node_id_map[parent_key],
378
+ target=node.id,
379
+ relation=EdgeRelation.DERIVES_FROM
380
+ )
381
+ graph.add_edge(edge)
382
+
383
+ # Add decisions
384
+ for decision_data in data.get("decisions", []):
385
+ content = self._extract_node_content(decision_data)
386
+ if not content:
387
+ continue
388
+ node = IntentNode(
389
+ type=IntentType.DECISION,
390
+ content=content,
391
+ status=normalize_intent_status(decision_data.get("status", "active")),
392
+ confidence=decision_data.get("confidence", 0.8),
393
+ evidence=decision_data.get("evidence", [])
394
+ )
395
+ graph.add_node(node)
396
+ node_id_map[decision_data["id"]] = node.id
397
+
398
+ # Add edge to parent
399
+ parent_key = decision_data.get("parent")
400
+ if parent_key and parent_key in node_id_map:
401
+ edge = IntentEdge(
402
+ source=node_id_map[parent_key],
403
+ target=node.id,
404
+ relation=EdgeRelation.DERIVES_FROM
405
+ )
406
+ graph.add_edge(edge)
407
+
408
+ logger.info(
409
+ f"Extracted intent graph with {len(graph.nodes)} nodes "
410
+ f"and {len(graph.edges)} edges"
411
+ )
412
+
413
+ return graph
414
+
415
+ async def update_intent(
416
+ self,
417
+ turn: Turn,
418
+ current_graph: IntentGraph
419
+ ) -> IntentGraph:
420
+ """
421
+ Incrementally update intent graph with new turn.
422
+
423
+ Args:
424
+ turn: New conversation turn
425
+ current_graph: Existing intent graph
426
+
427
+ Returns:
428
+ Updated IntentGraph
429
+ """
430
+ # Format current graph for prompt
431
+ graph_summary = self._format_graph_summary(current_graph)
432
+ turn_text = f"[{turn.role}]: {turn.content}"
433
+
434
+ # Call LLM for update
435
+ prompt = INTENT_UPDATE_PROMPT.format(
436
+ intent_graph=graph_summary,
437
+ turn=turn_text
438
+ )
439
+ response = await self.llm.complete(prompt, system=INTENT_EXTRACTION_SYSTEM)
440
+
441
+ # Parse response
442
+ try:
443
+ data = json.loads(self._clean_json_response(response))
444
+ except json.JSONDecodeError as e:
445
+ logger.error(f"Failed to parse intent update response: {e}")
446
+ return current_graph
447
+
448
+ # Apply status updates
449
+ for update in data.get("status_updates", []):
450
+ node_id = update.get("id")
451
+ new_status = update.get("new_status")
452
+ if node_id in current_graph.nodes and new_status:
453
+ current_graph.nodes[node_id].update_status(normalize_intent_status(new_status))
454
+
455
+ # Add new nodes
456
+ for node_data in data.get("new_nodes", []):
457
+ # Issue #2 + P4: Robust content extraction from various field names
458
+ content = self._extract_node_content(node_data)
459
+ if not content:
460
+ logger.warning(f"Skipping node without content: {node_data}")
461
+ continue
462
+
463
+ # Issue #1: Use normalize_intent_type instead of direct IntentType()
464
+ node = IntentNode(
465
+ type=normalize_intent_type(node_data.get("type", "task")),
466
+ content=content,
467
+ status=normalize_intent_status(node_data.get("status", "active")),
468
+ confidence=node_data.get("confidence", 0.8),
469
+ evidence=[turn.id]
470
+ )
471
+ current_graph.add_node(node)
472
+
473
+ # Handle parent relationship
474
+ parent_id = node_data.get("parent")
475
+ if parent_id and parent_id in current_graph.nodes:
476
+ edge = IntentEdge(
477
+ source=parent_id,
478
+ target=node.id,
479
+ relation=EdgeRelation.DERIVES_FROM
480
+ )
481
+ current_graph.add_edge(edge)
482
+
483
+ # Add new edges
484
+ for edge_data in data.get("new_edges", []):
485
+ source = edge_data.get("source")
486
+ target = edge_data.get("target")
487
+ relation = edge_data.get("relation", "derives_from")
488
+
489
+ if source in current_graph.nodes and target in current_graph.nodes:
490
+ edge = IntentEdge(
491
+ source=source,
492
+ target=target,
493
+ relation=normalize_edge_relation(relation)
494
+ )
495
+ current_graph.add_edge(edge)
496
+
497
+ return current_graph
498
+
499
+ def get_active_goals(self, graph: IntentGraph) -> list[IntentNode]:
500
+ """Get all currently active goals and tasks."""
501
+ return graph.get_active_goals()
502
+
503
+ def get_goal_hierarchy(self, graph: IntentGraph, node_id: str) -> list[IntentNode]:
504
+ """Get the full hierarchy path to a specific goal."""
505
+ return graph.get_node_path(node_id)
506
+
507
+ def _format_conversation(self, turns: list[Turn]) -> str:
508
+ """Format turns for prompt."""
509
+ lines = []
510
+ for i, turn in enumerate(turns):
511
+ lines.append(f"[Turn {i+1}] [{turn.role}]: {turn.content}")
512
+ return "\n\n".join(lines)
513
+
514
+ def _estimate_tokens(self, text: str) -> int:
515
+ """
516
+ Estimate token count for text.
517
+
518
+ Uses rough approximation of ~4 characters per token for English text.
519
+ This is conservative to avoid context overflow.
520
+
521
+ Args:
522
+ text: Text to estimate tokens for
523
+
524
+ Returns:
525
+ Estimated token count
526
+ """
527
+ return len(text) // 4
528
+
529
+ async def _summarize_turns(self, turns: list[Turn]) -> str:
530
+ """
531
+ Summarize a batch of conversation turns for intent tracking.
532
+
533
+ Uses LLM to create a concise summary preserving key goals,
534
+ tasks, and intent changes without all the verbose back-and-forth.
535
+
536
+ Args:
537
+ turns: List of turns to summarize
538
+
539
+ Returns:
540
+ Condensed summary string
541
+ """
542
+ if not turns:
543
+ return ""
544
+
545
+ # Format turns for summarization
546
+ turn_text = []
547
+ for turn in turns:
548
+ turn_text.append(f"[{turn.role}]: {turn.content[:500]}") # Truncate very long turns
549
+
550
+ summarization_prompt = f"""Summarize the following conversation segment concisely.
551
+ Focus on:
552
+ 1. Goals mentioned or established
553
+ 2. Tasks discussed or completed
554
+ 3. Changes in priorities or direction
555
+ 4. Key topics and their resolution status
556
+
557
+ Keep the summary under 500 words. Be factual and specific.
558
+
559
+ CONVERSATION SEGMENT:
560
+ {chr(10).join(turn_text)}
561
+
562
+ SUMMARY:"""
563
+
564
+ try:
565
+ summary = await self.llm.complete(summarization_prompt)
566
+ return f"[SUMMARY OF MIDDLE TURNS]: {summary.strip()}"
567
+ except Exception as e:
568
+ logger.warning(f"Failed to summarize turns: {e}")
569
+ # Fallback: just note what was skipped
570
+ return f"[SUMMARY: {len(turns)} turns omitted from middle of conversation]"
571
+
572
+ async def _format_conversation_smart(
573
+ self,
574
+ turns: list[Turn],
575
+ max_tokens: int = 15000,
576
+ first_n: int = 10,
577
+ last_n: int = 30
578
+ ) -> str:
579
+ """
580
+ Format conversation with smart chunking for long conversations.
581
+
582
+ Strategy: Sliding Window with Summary
583
+ - Keep first N turns (establishes context, initial goals)
584
+ - Summarize middle turns (preserve key information compactly)
585
+ - Keep last N turns (recent state, current goals)
586
+
587
+ This handles conversations of any length while preserving
588
+ the most important context for intent extraction.
589
+
590
+ Args:
591
+ turns: All conversation turns
592
+ max_tokens: Maximum token budget for conversation text
593
+ first_n: Number of initial turns to keep verbatim
594
+ last_n: Number of recent turns to keep verbatim
595
+
596
+ Returns:
597
+ Formatted conversation string within token budget
598
+ """
599
+ if not turns:
600
+ return ""
601
+
602
+ total_turns = len(turns)
603
+
604
+ # For short conversations, use simple formatting
605
+ if total_turns <= (first_n + last_n):
606
+ return self._format_conversation(turns)
607
+
608
+ # Check if simple formatting fits within budget
609
+ simple_format = self._format_conversation(turns)
610
+ if self._estimate_tokens(simple_format) <= max_tokens:
611
+ return simple_format
612
+
613
+ logger.info(f"Long conversation detected ({total_turns} turns), applying sliding window")
614
+
615
+ # Split into three segments
616
+ first_turns = turns[:first_n]
617
+ middle_turns = turns[first_n:-last_n] if last_n > 0 else turns[first_n:]
618
+ last_turns = turns[-last_n:] if last_n > 0 else []
619
+
620
+ # Format first and last turns verbatim
621
+ first_formatted = []
622
+ for i, turn in enumerate(first_turns):
623
+ first_formatted.append(f"[Turn {i + 1}] [{turn.role}]: {turn.content}")
624
+
625
+ last_formatted = []
626
+ start_idx = len(turns) - len(last_turns)
627
+ for i, turn in enumerate(last_turns):
628
+ last_formatted.append(f"[Turn {start_idx + i + 1}] [{turn.role}]: {turn.content}")
629
+
630
+ # Summarize middle section
631
+ middle_summary = await self._summarize_turns(middle_turns)
632
+
633
+ # Combine all sections
634
+ sections = [
635
+ "=== CONVERSATION START ===",
636
+ "\n\n".join(first_formatted),
637
+ "",
638
+ "=== MIDDLE SECTION (SUMMARIZED) ===",
639
+ middle_summary,
640
+ "",
641
+ "=== RECENT CONVERSATION ===",
642
+ "\n\n".join(last_formatted)
643
+ ]
644
+
645
+ result = "\n\n".join(sections)
646
+
647
+ # Log token savings
648
+ original_tokens = self._estimate_tokens(simple_format)
649
+ final_tokens = self._estimate_tokens(result)
650
+ logger.info(f"Conversation chunking: {original_tokens} → {final_tokens} tokens "
651
+ f"(saved {original_tokens - final_tokens} tokens)")
652
+
653
+ return result
654
+
655
+ def _format_graph_summary(self, graph: IntentGraph) -> str:
656
+ """Format graph for update prompt."""
657
+ lines = []
658
+
659
+ for node_id, node in graph.nodes.items():
660
+ status_emoji = {
661
+ IntentStatus.ACTIVE: "🔵",
662
+ IntentStatus.COMPLETED: "✅",
663
+ IntentStatus.ABANDONED: "❌",
664
+ IntentStatus.PENDING: "⏳",
665
+ IntentStatus.BLOCKED: "🚫"
666
+ }.get(node.status, "")
667
+
668
+ lines.append(
669
+ f"- [{node.type.value}] {node_id}: {node.content} "
670
+ f"{status_emoji} (confidence: {node.confidence:.2f})"
671
+ )
672
+
673
+ return "\n".join(lines) if lines else "No existing intents"
674
+
675
+ def _clean_json_response(self, response: str) -> str:
676
+ """Clean LLM response to extract JSON."""
677
+ # Remove markdown code blocks if present
678
+ response = response.strip()
679
+ if response.startswith("```json"):
680
+ response = response[7:]
681
+ elif response.startswith("```"):
682
+ response = response[3:]
683
+ if response.endswith("```"):
684
+ response = response[:-3]
685
+ return response.strip()
686
+
687
+ def _extract_node_content(self, node_data: dict) -> str:
688
+ """
689
+ Extract content from node data with robust field mapping.
690
+
691
+ P4 Fix: Handle various field names the LLM might use.
692
+ Priority order for content extraction.
693
+
694
+ Args:
695
+ node_data: Dictionary from LLM response
696
+
697
+ Returns:
698
+ Extracted content string, or empty string if not found
699
+ """
700
+ # Priority order for content field
701
+ content_fields = [
702
+ "content", # Standard field
703
+ "description", # Common alternative
704
+ "label", # Sometimes used by LLM
705
+ "name", # Another alternative
706
+ "title", # Sometimes used for goals
707
+ "text", # Generic text field
708
+ "summary", # Occasionally used
709
+ "goal", # Type-specific
710
+ "task", # Type-specific
711
+ "decision", # Type-specific
712
+ "objective", # Another alternative
713
+ ]
714
+
715
+ for field in content_fields:
716
+ value = node_data.get(field)
717
+ if value and isinstance(value, str) and value.strip():
718
+ return value.strip()
719
+
720
+ # Fallback: find the longest string value that's not an ID or status
721
+ excluded_fields = {"id", "type", "status", "parent", "confidence"}
722
+ string_values = [
723
+ (k, v) for k, v in node_data.items()
724
+ if isinstance(v, str)
725
+ and k not in excluded_fields
726
+ and len(v) > 10 # Skip short values like IDs
727
+ ]
728
+
729
+ if string_values:
730
+ # Return the longest string
731
+ _, longest = max(string_values, key=lambda x: len(x[1]))
732
+ return longest.strip()
733
+
734
+ return ""