abstractcore 2.5.0__py3-none-any.whl → 2.5.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. abstractcore/__init__.py +12 -0
  2. abstractcore/apps/__main__.py +8 -1
  3. abstractcore/apps/deepsearch.py +644 -0
  4. abstractcore/apps/intent.py +614 -0
  5. abstractcore/architectures/detection.py +250 -4
  6. abstractcore/assets/architecture_formats.json +14 -1
  7. abstractcore/assets/model_capabilities.json +583 -44
  8. abstractcore/compression/__init__.py +29 -0
  9. abstractcore/compression/analytics.py +420 -0
  10. abstractcore/compression/cache.py +250 -0
  11. abstractcore/compression/config.py +279 -0
  12. abstractcore/compression/exceptions.py +30 -0
  13. abstractcore/compression/glyph_processor.py +381 -0
  14. abstractcore/compression/optimizer.py +388 -0
  15. abstractcore/compression/orchestrator.py +380 -0
  16. abstractcore/compression/pil_text_renderer.py +818 -0
  17. abstractcore/compression/quality.py +226 -0
  18. abstractcore/compression/text_formatter.py +666 -0
  19. abstractcore/compression/vision_compressor.py +371 -0
  20. abstractcore/config/main.py +66 -1
  21. abstractcore/config/manager.py +111 -5
  22. abstractcore/core/session.py +105 -5
  23. abstractcore/events/__init__.py +1 -1
  24. abstractcore/media/auto_handler.py +312 -18
  25. abstractcore/media/handlers/local_handler.py +14 -2
  26. abstractcore/media/handlers/openai_handler.py +62 -3
  27. abstractcore/media/processors/__init__.py +11 -1
  28. abstractcore/media/processors/direct_pdf_processor.py +210 -0
  29. abstractcore/media/processors/glyph_pdf_processor.py +227 -0
  30. abstractcore/media/processors/image_processor.py +7 -1
  31. abstractcore/media/processors/text_processor.py +18 -3
  32. abstractcore/media/types.py +164 -7
  33. abstractcore/processing/__init__.py +5 -1
  34. abstractcore/processing/basic_deepsearch.py +2173 -0
  35. abstractcore/processing/basic_intent.py +690 -0
  36. abstractcore/providers/__init__.py +18 -0
  37. abstractcore/providers/anthropic_provider.py +29 -2
  38. abstractcore/providers/base.py +279 -6
  39. abstractcore/providers/huggingface_provider.py +658 -27
  40. abstractcore/providers/lmstudio_provider.py +52 -2
  41. abstractcore/providers/mlx_provider.py +103 -4
  42. abstractcore/providers/model_capabilities.py +352 -0
  43. abstractcore/providers/ollama_provider.py +44 -6
  44. abstractcore/providers/openai_provider.py +29 -2
  45. abstractcore/providers/registry.py +91 -19
  46. abstractcore/server/app.py +91 -81
  47. abstractcore/structured/handler.py +161 -1
  48. abstractcore/tools/common_tools.py +98 -3
  49. abstractcore/utils/__init__.py +4 -1
  50. abstractcore/utils/cli.py +114 -1
  51. abstractcore/utils/trace_export.py +287 -0
  52. abstractcore/utils/version.py +1 -1
  53. abstractcore/utils/vlm_token_calculator.py +655 -0
  54. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/METADATA +140 -23
  55. abstractcore-2.5.3.dist-info/RECORD +107 -0
  56. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +4 -0
  57. abstractcore-2.5.0.dist-info/RECORD +0 -86
  58. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
  59. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
  60. {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,690 @@
1
+ """
2
+ Basic Intent Analyzer - Identify and analyze intents behind text
3
+
4
+ Demonstrates how to use AbstractCore's infrastructure to create sophisticated
5
+ intent analysis capabilities with minimal complexity.
6
+ """
7
+
8
+ from enum import Enum
9
+ from typing import List, Optional, Dict, Any
10
+ from pydantic import BaseModel, Field
11
+
12
+ from ..core.interface import AbstractCoreInterface
13
+ from ..core.factory import create_llm
14
+ from ..structured.retry import FeedbackRetry
15
+ from ..utils.structured_logging import get_logger
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ class IntentType(Enum):
21
+ """Primary intent categories based on psychological research"""
22
+ INFORMATION_SEEKING = "information_seeking" # Asking questions, requesting data
23
+ INFORMATION_SHARING = "information_sharing" # Providing facts, explanations
24
+ PROBLEM_SOLVING = "problem_solving" # Seeking or offering solutions
25
+ DECISION_MAKING = "decision_making" # Evaluating options, making choices
26
+ PERSUASION = "persuasion" # Convincing, influencing opinions
27
+ CLARIFICATION = "clarification" # Seeking or providing clarity
28
+ EMOTIONAL_EXPRESSION = "emotional_expression" # Expressing feelings, reactions
29
+ RELATIONSHIP_BUILDING = "relationship_building" # Social connection, rapport
30
+ INSTRUCTION_GIVING = "instruction_giving" # Teaching, directing actions
31
+ VALIDATION_SEEKING = "validation_seeking" # Seeking approval, confirmation
32
+ # New intent types from psychological research
33
+ FACE_SAVING = "face_saving" # Protecting self-image, avoiding embarrassment
34
+ BLAME_DEFLECTION = "blame_deflection" # Redirecting responsibility to external factors
35
+ POWER_ASSERTION = "power_assertion" # Establishing dominance or authority
36
+ EMPATHY_SEEKING = "empathy_seeking" # Seeking understanding and emotional support
37
+ CONFLICT_AVOIDANCE = "conflict_avoidance" # Preventing or minimizing confrontation
38
+ TRUST_BUILDING = "trust_building" # Establishing or maintaining credibility
39
+ DECEPTION = "deception" # Intentional misdirection or false information
40
+
41
+
42
+ class IntentDepth(Enum):
43
+ """Depth of intent analysis"""
44
+ SURFACE = "surface" # Obvious, stated intentions
45
+ UNDERLYING = "underlying" # Hidden motivations and goals
46
+ COMPREHENSIVE = "comprehensive" # Full analysis including subconscious drivers
47
+
48
+
49
+ class IntentContext(Enum):
50
+ """Context type for intent analysis"""
51
+ STANDALONE = "standalone" # Single message/text analysis
52
+ CONVERSATIONAL = "conversational" # Part of ongoing dialogue
53
+ DOCUMENT = "document" # Formal document or article
54
+ INTERACTIVE = "interactive" # Real-time interaction context
55
+
56
+
57
+ class DeceptionIndicators(BaseModel):
58
+ """Deception analysis indicators based on psychological research"""
59
+ deception_likelihood: float = Field(description="Likelihood of deceptive intent (0-1)", ge=0, le=1)
60
+ narrative_consistency: float = Field(description="Internal consistency of the narrative (0-1)", ge=0, le=1)
61
+ linguistic_markers: List[str] = Field(description="Specific linguistic indicators of potential deception", max_length=5)
62
+ temporal_coherence: float = Field(description="Logical flow and timing consistency (0-1)", ge=0, le=1)
63
+ emotional_congruence: float = Field(description="Alignment between stated emotions and content (0-1)", ge=0, le=1)
64
+ deception_evidence: List[str] = Field(description="Evidence indicating potential deception (contradictions, deflection, inconsistencies)", max_length=3)
65
+ authenticity_evidence: List[str] = Field(description="Evidence indicating authenticity (consistency, accountability, directness)", max_length=3)
66
+
67
+
68
+ class IdentifiedIntent(BaseModel):
69
+ """Single identified intent with details"""
70
+ intent_type: IntentType = Field(description="Primary intent category")
71
+ confidence: float = Field(description="Confidence in this intent identification (0-1)", ge=0, le=1)
72
+ description: str = Field(description="Human-readable description of the intent")
73
+ underlying_goal: str = Field(description="What the person ultimately wants to achieve")
74
+ emotional_undertone: str = Field(description="Emotional context or undertone")
75
+ urgency_level: float = Field(description="How urgent or pressing this intent is (0-1)", ge=0, le=1)
76
+ deception_analysis: DeceptionIndicators = Field(description="Deception evaluation based on psychological markers - always included in intent analysis")
77
+
78
+
79
+ class LLMIntentOutput(BaseModel):
80
+ """LLM-generated intent analysis output"""
81
+ primary_intent: IdentifiedIntent = Field(description="The most prominent intent")
82
+ secondary_intents: List[IdentifiedIntent] = Field(description="Additional intents present", max_length=3)
83
+ intent_complexity: float = Field(description="How complex/layered the intentions are (0-1)", ge=0, le=1)
84
+ contextual_factors: List[str] = Field(description="Important contextual elements affecting intent", max_length=5)
85
+ suggested_response_approach: str = Field(description="How one should respond to these intents")
86
+ overall_confidence: float = Field(description="Overall confidence in the analysis (0-1)", ge=0, le=1)
87
+
88
+
89
+ class IntentAnalysisOutput(BaseModel):
90
+ """Complete intent analysis output with computed metadata"""
91
+ primary_intent: IdentifiedIntent = Field(description="The most prominent intent")
92
+ secondary_intents: List[IdentifiedIntent] = Field(description="Additional intents present", max_length=3)
93
+ intent_complexity: float = Field(description="How complex/layered the intentions are (0-1)", ge=0, le=1)
94
+ contextual_factors: List[str] = Field(description="Important contextual elements affecting intent", max_length=5)
95
+ suggested_response_approach: str = Field(description="How one should respond to these intents")
96
+ overall_confidence: float = Field(description="Overall confidence in the analysis (0-1)", ge=0, le=1)
97
+ word_count_analyzed: int = Field(description="Word count of analyzed text (computed client-side)")
98
+ analysis_depth: IntentDepth = Field(description="Depth of analysis performed")
99
+ context_type: IntentContext = Field(description="Context type used for analysis")
100
+
101
+
102
+ class BasicIntentAnalyzer:
103
+ """
104
+ Basic Intent Analyzer using zero-shot structured prompting
105
+
106
+ Demonstrates AbstractCore best practices:
107
+ - Structured output with Pydantic validation
108
+ - Clean parameter design
109
+ - Automatic chunking for long documents
110
+ - Provider-agnostic implementation
111
+ - Built-in retry and error handling (inherited from AbstractCore)
112
+
113
+ Optimized defaults (no setup required):
114
+ analyzer = BasicIntentAnalyzer() # Uses gemma3:1b-it-qat, 32k context, 8k chunks
115
+
116
+ Custom setup for different needs:
117
+ llm = create_llm("openai", model="gpt-4o-mini", max_tokens=32000)
118
+ analyzer = BasicIntentAnalyzer(llm, max_chunk_size=15000)
119
+
120
+ Performance considerations:
121
+ - gemma3:1b-it-qat: Fast, cost-effective for basic intent analysis
122
+ - qwen3-coder:30b: Premium quality for complex psychological analysis
123
+ - GPT-4o-mini: Excellent for nuanced intent understanding
124
+ """
125
+
126
+ def __init__(
127
+ self,
128
+ llm: Optional[AbstractCoreInterface] = None,
129
+ max_chunk_size: int = 8000,
130
+ max_tokens: int = 32000,
131
+ max_output_tokens: int = 8000,
132
+ timeout: Optional[float] = None,
133
+ debug: bool = False
134
+ ):
135
+ """
136
+ Initialize the intent analyzer
137
+
138
+ Args:
139
+ llm: AbstractCore instance (any provider). If None, attempts to create ollama gemma3:1b-it-qat
140
+ max_chunk_size: Maximum characters per chunk for long documents (default 8000)
141
+ max_tokens: Maximum total tokens for LLM context (default 32000)
142
+ max_output_tokens: Maximum tokens for LLM output generation (default 8000)
143
+ timeout: HTTP request timeout in seconds. None for unlimited timeout (default None)
144
+ debug: Enable debug output including raw LLM responses (default False)
145
+ """
146
+ if llm is None:
147
+ try:
148
+ # Default to gemma3:1b-it-qat with configurable token limits
149
+ self.llm = create_llm("ollama", model="gemma3:1b-it-qat", max_tokens=max_tokens, max_output_tokens=max_output_tokens, timeout=timeout)
150
+ except Exception as e:
151
+ error_msg = (
152
+ f"❌ Failed to initialize default Ollama model 'gemma3:1b-it-qat': {e}\n\n"
153
+ "💡 To use the default model, please:\n"
154
+ " 1. Install Ollama from: https://ollama.com/\n"
155
+ " 2. Download the model: ollama pull gemma3:1b-it-qat\n"
156
+ " 3. Start Ollama service\n\n"
157
+ "🔧 Alternatively, provide a custom LLM instance:\n"
158
+ " from abstractcore import create_llm\n"
159
+ " from abstractcore.processing import BasicIntentAnalyzer\n"
160
+ " \n"
161
+ " # Using OpenAI\n"
162
+ " llm = create_llm('openai', model='gpt-4o-mini')\n"
163
+ " analyzer = BasicIntentAnalyzer(llm)\n"
164
+ " \n"
165
+ " # Using Anthropic\n"
166
+ " llm = create_llm('anthropic', model='claude-3-5-haiku-latest')\n"
167
+ " analyzer = BasicIntentAnalyzer(llm)\n"
168
+ " \n"
169
+ " # Using different Ollama model\n"
170
+ " llm = create_llm('ollama', model='llama3.2:3b')\n"
171
+ " analyzer = BasicIntentAnalyzer(llm)"
172
+ )
173
+ raise RuntimeError(error_msg) from e
174
+ else:
175
+ self.llm = llm
176
+ self.max_chunk_size = max_chunk_size
177
+ self.debug = debug
178
+
179
+ # Default retry strategy with 3 attempts
180
+ self.retry_strategy = FeedbackRetry(max_attempts=3)
181
+
182
+ def analyze_intent(
183
+ self,
184
+ text: str,
185
+ context_type: IntentContext = IntentContext.STANDALONE,
186
+ depth: IntentDepth = IntentDepth.UNDERLYING,
187
+ focus: Optional[str] = None,
188
+ ) -> IntentAnalysisOutput:
189
+ """
190
+ Analyze the intents behind the given text
191
+
192
+ Args:
193
+ text: Text to analyze for intents
194
+ context_type: Type of context for the analysis
195
+ depth: Depth of intent analysis to perform
196
+ focus: Optional specific aspect to focus on (e.g., "business motivations", "emotional drivers")
197
+
198
+ Returns:
199
+ IntentAnalysisOutput: Structured intent analysis with metadata including deception assessment
200
+
201
+ Example:
202
+ >>> from abstractcore import create_llm
203
+ >>> from abstractcore.processing import BasicIntentAnalyzer, IntentContext, IntentDepth
204
+ >>>
205
+ >>> llm = create_llm("openai", model="gpt-4o-mini")
206
+ >>> analyzer = BasicIntentAnalyzer(llm)
207
+ >>>
208
+ >>> result = analyzer.analyze_intent(
209
+ ... "I was wondering if you could help me understand how to improve our team's productivity?",
210
+ ... context_type=IntentContext.CONVERSATIONAL,
211
+ ... depth=IntentDepth.UNDERLYING,
212
+ ... focus="management concerns"
213
+ ... )
214
+ >>> print(result.primary_intent.intent_type)
215
+ >>> print(f"Confidence: {result.overall_confidence:.2f}")
216
+ """
217
+ # Handle long documents through chunking
218
+ if self._should_chunk_by_tokens(text):
219
+ logger.info("Using chunked intent analysis for long document",
220
+ text_length=len(text),
221
+ context_type=context_type.value,
222
+ depth=depth.value)
223
+ return self._analyze_long_document(text, context_type, depth, focus)
224
+ else:
225
+ logger.info("Using single-chunk intent analysis",
226
+ text_length=len(text),
227
+ context_type=context_type.value,
228
+ depth=depth.value)
229
+ return self._analyze_single_chunk(text, context_type, depth, focus)
230
+
231
+ def _analyze_single_chunk(
232
+ self,
233
+ text: str,
234
+ context_type: IntentContext,
235
+ depth: IntentDepth,
236
+ focus: Optional[str]
237
+ ) -> IntentAnalysisOutput:
238
+ """Analyze intent for a single chunk of text"""
239
+
240
+ # Build the prompt based on parameters (deception analysis always included)
241
+ prompt = self._build_prompt(text, context_type, depth, focus)
242
+
243
+ if self.debug:
244
+ print(f"\n🔧 DEBUG: Prompt sent to LLM:")
245
+ print("=" * 80)
246
+ print(prompt)
247
+ print("=" * 80)
248
+
249
+ # Use AbstractCore's structured output with retry strategy
250
+ try:
251
+ response = self.llm.generate(prompt, response_model=LLMIntentOutput, retry_strategy=self.retry_strategy)
252
+ except Exception as e:
253
+ if self.debug:
254
+ print(f"\n❌ DEBUG: LLM generation failed with error: {e}")
255
+ # Try to get the raw response if available
256
+ try:
257
+ raw_response = self.llm.generate(prompt)
258
+ print(f"\n🔧 DEBUG: Raw LLM response (without structured output):")
259
+ print("=" * 80)
260
+ print(raw_response.content if hasattr(raw_response, 'content') else str(raw_response))
261
+ print("=" * 80)
262
+ except Exception as raw_e:
263
+ print(f"❌ DEBUG: Could not get raw response: {raw_e}")
264
+ raise
265
+
266
+ # Extract the structured output
267
+ llm_result = None
268
+ if isinstance(response, LLMIntentOutput):
269
+ # When structured output succeeds, response is the LLMIntentOutput object directly
270
+ llm_result = response
271
+ elif hasattr(response, 'structured_output') and response.structured_output:
272
+ # Fallback: check for structured_output attribute
273
+ llm_result = response.structured_output
274
+ else:
275
+ # Debug information for troubleshooting
276
+ error_msg = f"Failed to generate structured intent analysis output. Response type: {type(response)}"
277
+ if hasattr(response, 'content'):
278
+ error_msg += f", Content: {response.content[:200]}..."
279
+ if hasattr(response, 'structured_output'):
280
+ error_msg += f", Structured output: {response.structured_output}"
281
+ raise ValueError(error_msg)
282
+
283
+ # Compute word count (reliable, client-side calculation)
284
+ actual_word_count = len(text.split())
285
+
286
+ # Create complete result with computed metadata
287
+ return IntentAnalysisOutput(
288
+ primary_intent=llm_result.primary_intent,
289
+ secondary_intents=llm_result.secondary_intents,
290
+ intent_complexity=llm_result.intent_complexity,
291
+ contextual_factors=llm_result.contextual_factors,
292
+ suggested_response_approach=llm_result.suggested_response_approach,
293
+ overall_confidence=llm_result.overall_confidence,
294
+ word_count_analyzed=actual_word_count,
295
+ analysis_depth=depth,
296
+ context_type=context_type
297
+ )
298
+
299
+ def _analyze_long_document(
300
+ self,
301
+ text: str,
302
+ context_type: IntentContext,
303
+ depth: IntentDepth,
304
+ focus: Optional[str]
305
+ ) -> IntentAnalysisOutput:
306
+ """
307
+ Handle long documents using map-reduce approach
308
+
309
+ 1. Split into chunks with overlap
310
+ 2. Analyze intent for each chunk
311
+ 3. Combine chunk analyses into final intent analysis
312
+ """
313
+
314
+ # Split text into overlapping chunks
315
+ chunks = self._split_text_into_chunks(text)
316
+
317
+ logger.debug("Split document into chunks",
318
+ chunk_count=len(chunks),
319
+ avg_chunk_size=sum(len(c) for c in chunks) // len(chunks))
320
+
321
+ if len(chunks) == 1:
322
+ return self._analyze_single_chunk(chunks[0], context_type, depth, focus)
323
+
324
+ # Step 1: Analyze each chunk (Map phase)
325
+ chunk_analyses = []
326
+ for i, chunk in enumerate(chunks):
327
+ chunk_prompt = self._build_chunk_prompt(chunk, context_type, focus, i + 1, len(chunks))
328
+
329
+ # Use a simplified output model for chunks
330
+ class ChunkIntentAnalysis(BaseModel):
331
+ primary_intent_type: str
332
+ intent_description: str
333
+ underlying_goal: str
334
+ confidence: float = Field(ge=0, le=1)
335
+
336
+ response = self.llm.generate(chunk_prompt, response_model=ChunkIntentAnalysis, retry_strategy=self.retry_strategy)
337
+ if isinstance(response, ChunkIntentAnalysis):
338
+ # When structured output succeeds, response is the ChunkIntentAnalysis object directly
339
+ chunk_analyses.append(response)
340
+ elif hasattr(response, 'structured_output') and response.structured_output:
341
+ # Fallback: check for structured_output attribute
342
+ chunk_analyses.append(response.structured_output)
343
+ else:
344
+ # If chunk processing fails, create a fallback analysis
345
+ logger.warning("Chunk intent analysis failed, creating fallback",
346
+ chunk_number=i+1,
347
+ total_chunks=len(chunks))
348
+ chunk_analyses.append(ChunkIntentAnalysis(
349
+ primary_intent_type="information_sharing",
350
+ intent_description=f"Section {i+1} intent analysis unavailable",
351
+ underlying_goal="Content processing failed",
352
+ confidence=0.1
353
+ ))
354
+
355
+ # Step 2: Combine chunk analyses (Reduce phase)
356
+ combined_analysis = "\n\n".join([
357
+ f"Section {i+1}:\nIntent Type: {ca.primary_intent_type}\nDescription: {ca.intent_description}\nUnderlying Goal: {ca.underlying_goal}\nConfidence: {ca.confidence:.2f}"
358
+ for i, ca in enumerate(chunk_analyses)
359
+ ])
360
+
361
+ # Generate final intent analysis from combined analyses
362
+ final_prompt = self._build_final_combination_prompt(combined_analysis, context_type, depth, focus, len(text))
363
+
364
+ response = self.llm.generate(final_prompt, response_model=LLMIntentOutput, retry_strategy=self.retry_strategy)
365
+
366
+ # Extract the structured output
367
+ llm_result = None
368
+ if isinstance(response, LLMIntentOutput):
369
+ # When structured output succeeds, response is the LLMIntentOutput object directly
370
+ llm_result = response
371
+ elif hasattr(response, 'structured_output') and response.structured_output:
372
+ # Fallback: check for structured_output attribute
373
+ llm_result = response.structured_output
374
+ else:
375
+ # Debug information for troubleshooting
376
+ error_msg = f"Failed to generate final structured intent analysis output. Response type: {type(response)}"
377
+ if hasattr(response, 'content'):
378
+ error_msg += f", Content: {response.content[:200]}..."
379
+ if hasattr(response, 'structured_output'):
380
+ error_msg += f", Structured output: {response.structured_output}"
381
+ raise ValueError(error_msg)
382
+
383
+ # Compute word count (reliable, client-side calculation)
384
+ actual_word_count = len(text.split())
385
+
386
+ # Create complete result with computed metadata
387
+ return IntentAnalysisOutput(
388
+ primary_intent=llm_result.primary_intent,
389
+ secondary_intents=llm_result.secondary_intents,
390
+ intent_complexity=llm_result.intent_complexity,
391
+ contextual_factors=llm_result.contextual_factors,
392
+ suggested_response_approach=llm_result.suggested_response_approach,
393
+ overall_confidence=llm_result.overall_confidence,
394
+ word_count_analyzed=actual_word_count,
395
+ analysis_depth=depth,
396
+ context_type=context_type
397
+ )
398
+
399
+ def _should_chunk_by_tokens(self, text: str) -> bool:
400
+ """
401
+ Determine if text should be chunked based on token count.
402
+
403
+ Uses centralized TokenUtils for accurate token estimation.
404
+ Falls back to character count if model information unavailable.
405
+ """
406
+ from ..utils.token_utils import TokenUtils
407
+
408
+ # Get model name from LLM if available
409
+ model_name = None
410
+ if self.llm and hasattr(self.llm, 'model'):
411
+ model_name = self.llm.model
412
+
413
+ # Estimate tokens using centralized utility
414
+ estimated_tokens = TokenUtils.estimate_tokens(text, model_name)
415
+
416
+ # Use a conservative token limit (leaving room for prompt overhead)
417
+ # Most models have 32k+ context nowadays, so 8k tokens for input text is safe
418
+ token_limit = 8000
419
+
420
+ if estimated_tokens > token_limit:
421
+ return True
422
+
423
+ # Fallback to character-based check for very long texts
424
+ return len(text) > self.max_chunk_size
425
+
426
+ def _split_text_into_chunks(self, text: str, overlap: int = 200) -> List[str]:
427
+ """Split text into overlapping chunks"""
428
+ chunks = []
429
+ start = 0
430
+
431
+ while start < len(text):
432
+ # Calculate end position
433
+ end = start + self.max_chunk_size
434
+
435
+ # If this isn't the last chunk, try to break at a sentence boundary
436
+ if end < len(text):
437
+ # Look for sentence endings near the chunk boundary
438
+ sentence_end = text.rfind('. ', start + self.max_chunk_size - 500, end)
439
+ if sentence_end != -1 and sentence_end > start:
440
+ end = sentence_end + 2 # Include the period and space
441
+
442
+ chunks.append(text[start:end].strip())
443
+
444
+ # Move start position with overlap
445
+ if end >= len(text):
446
+ break
447
+ start = max(start + self.max_chunk_size - overlap, end - overlap)
448
+
449
+ return chunks
450
+
451
+ def _build_prompt(
452
+ self,
453
+ text: str,
454
+ context_type: IntentContext,
455
+ depth: IntentDepth,
456
+ focus: Optional[str]
457
+ ) -> str:
458
+ """Build the main intent analysis prompt"""
459
+
460
+ # Context instructions
461
+ context_instructions = {
462
+ IntentContext.STANDALONE: "Analyze this text as an independent piece of communication.",
463
+ IntentContext.CONVERSATIONAL: "Analyze this text as part of an ongoing conversation or dialogue.",
464
+ IntentContext.DOCUMENT: "Analyze this text as part of a formal document or structured content.",
465
+ IntentContext.INTERACTIVE: "Analyze this text as part of a real-time interactive communication."
466
+ }
467
+
468
+ # Depth instructions
469
+ depth_instructions = {
470
+ IntentDepth.SURFACE: "Focus on the obvious, explicitly stated intentions and goals.",
471
+ IntentDepth.UNDERLYING: "Look beyond surface statements to identify hidden motivations, implicit goals, and underlying psychological drivers.",
472
+ IntentDepth.COMPREHENSIVE: "Provide a complete analysis including conscious intentions, subconscious motivations, emotional drivers, and potential unstated goals."
473
+ }
474
+
475
+ # Build focus instruction
476
+ focus_instruction = ""
477
+ if focus:
478
+ focus_instruction = f"\nPay special attention to: {focus}\nEnsure the analysis addresses this focus area thoroughly."
479
+
480
+ # Deception analysis is always integrated into intent analysis
481
+ deception_instruction = """
482
+
483
+ DECEPTION ANALYSIS: Always evaluate authenticity with healthy skepticism.
484
+
485
+ Key principles:
486
+ 1. Check for contradictions within the conversation
487
+ 2. Ask: "What does this person gain by lying here?"
488
+ 3. Be suspicious of overly complex explanations for simple failures
489
+ 4. Confident, detailed stories can mask deception - don't trust them blindly
490
+ 5. Look for what consequences they're trying to avoid
491
+
492
+ Red flags:
493
+ - Timeline inconsistencies or contradictions
494
+ - Elaborate technical excuses for basic mistakes
495
+ - Shifting blame to multiple external factors
496
+ - Claims that can't be easily verified
497
+
498
+ For deception analysis, assess:
499
+ - Likelihood of deception (0-1 scale)
500
+ - What motive exists for lying in this context
501
+ - Whether the explanation is unnecessarily complex
502
+ - Evidence for/against authenticity
503
+
504
+ Note: If someone admits past deception to a third party, that admission itself is likely honest."""
505
+
506
+ prompt = f"""Analyze the following text to identify and understand the intents, motivations, and goals behind the communication.
507
+
508
+ {context_instructions[context_type]}
509
+ {depth_instructions[depth]}{focus_instruction}{deception_instruction}
510
+
511
+ Text to analyze:
512
+ {text}
513
+
514
+ Your task is to identify:
515
+ 1. PRIMARY INTENT: The main purpose or goal behind this communication
516
+ 2. SECONDARY INTENTS: Additional intentions that may be present (up to 3)
517
+ 3. UNDERLYING GOALS: What the person ultimately wants to achieve
518
+ 4. EMOTIONAL UNDERTONES: The emotional context affecting the communication
519
+ 5. CONTEXTUAL FACTORS: Important situational elements that influence the intents
520
+ 6. RESPONSE APPROACH: How someone should respond to address these intents effectively
521
+
522
+ For each intent, consider:
523
+ - What type of intent it is (information seeking, problem solving, persuasion, face-saving, deception, etc.)
524
+ - How confident you are in identifying this intent
525
+ - What emotional undertones are present
526
+ - How urgent or pressing this intent seems to be
527
+ - What the person's underlying goal really is
528
+
529
+ Requirements:
530
+ - Be precise and avoid speculation beyond what the text supports
531
+ - Consider both explicit and implicit intentions
532
+ - Evaluate the complexity and layering of multiple intents
533
+ - Provide confidence scores for your assessments
534
+ - Focus on actionable insights for responding appropriately
535
+ - When deception analysis is requested, provide evidence-based psychological assessment
536
+
537
+ Generate a comprehensive structured analysis of the intents behind this communication.
538
+
539
+ CRITICAL JSON FORMAT REQUIREMENTS:
540
+ - Respond with ONLY valid JSON - no other text before or after
541
+ - Use double quotes for all strings and keys
542
+
543
+ - All field names must match exactly: primary_intent, secondary_intents, intent_complexity, etc.
544
+ - Arrays must use square brackets [], objects must use curly braces {{}}
545
+ - No trailing commas, no comments, pure JSON only"""
546
+
547
+ return prompt
548
+
549
+ def _build_chunk_prompt(self, chunk: str, context_type: IntentContext, focus: Optional[str], chunk_num: int, total_chunks: int) -> str:
550
+ """Build prompt for individual chunk processing"""
551
+
552
+ context_instruction = f"This is part of a {context_type.value} communication."
553
+ focus_instruction = f" Focus especially on {focus}." if focus else ""
554
+
555
+ return f"""Analyze the intents in this section of a larger text (Part {chunk_num} of {total_chunks}).
556
+
557
+ {context_instruction}{focus_instruction}
558
+
559
+ Text section:
560
+ {chunk}
561
+
562
+ Identify:
563
+ - The primary intent type in this section
564
+ - A brief description of what the person wants
565
+ - The underlying goal they're trying to achieve
566
+ - Your confidence in this assessment
567
+
568
+ Keep the analysis focused on this section while considering it's part of a larger communication."""
569
+
570
+ def _build_final_combination_prompt(
571
+ self,
572
+ combined_analyses: str,
573
+ context_type: IntentContext,
574
+ depth: IntentDepth,
575
+ focus: Optional[str],
576
+ original_length: int
577
+ ) -> str:
578
+ """Build prompt for combining chunk analyses into final intent analysis"""
579
+
580
+ context_instructions = {
581
+ IntentContext.STANDALONE: "Synthesize the intents from this independent communication.",
582
+ IntentContext.CONVERSATIONAL: "Combine the intents from this conversational exchange.",
583
+ IntentContext.DOCUMENT: "Analyze the overall intents from this formal document.",
584
+ IntentContext.INTERACTIVE: "Synthesize the intents from this interactive communication."
585
+ }
586
+
587
+ depth_instructions = {
588
+ IntentDepth.SURFACE: "Focus on the most obvious intentions across all sections.",
589
+ IntentDepth.UNDERLYING: "Identify the deeper motivations and hidden goals throughout the text.",
590
+ IntentDepth.COMPREHENSIVE: "Provide a complete psychological analysis of all conscious and subconscious intentions."
591
+ }
592
+
593
+ focus_instruction = ""
594
+ if focus:
595
+ focus_instruction = f" Pay special attention to {focus} throughout the analysis."
596
+
597
+ return f"""Combine these section analyses into a comprehensive intent analysis of the complete communication.
598
+
599
+ {context_instructions[context_type]}
600
+ {depth_instructions[depth]}{focus_instruction}
601
+
602
+ Section analyses:
603
+ {combined_analyses}
604
+
605
+ Requirements:
606
+ - Synthesize information from all sections into a coherent understanding
607
+ - Identify the overarching primary intent and up to 3 secondary intents
608
+ - Determine the complexity and layering of intentions throughout
609
+ - Consider how different sections contribute to the overall goals
610
+ - The original text had approximately {original_length} characters
611
+ - Provide confidence scores and contextual factors
612
+ - Suggest an appropriate response approach
613
+
614
+ Create a unified intent analysis that captures the complete communication's purposes and motivations.
615
+
616
+ CRITICAL JSON FORMAT REQUIREMENTS:
617
+ - Respond with ONLY valid JSON - no other text before or after
618
+ - Use double quotes for all strings and keys
619
+
620
+ - All field names must match exactly: primary_intent, secondary_intents, intent_complexity, etc.
621
+ - Arrays must use square brackets [], objects must use curly braces {{}}
622
+ - No trailing commas, no comments, pure JSON only"""
623
+
624
+ def analyze_conversation_intents(
625
+ self,
626
+ messages: List[dict],
627
+ focus_participant: Optional[str] = None,
628
+ depth: IntentDepth = IntentDepth.UNDERLYING
629
+ ) -> Dict[str, IntentAnalysisOutput]:
630
+ """
631
+ Specialized method for analyzing intents in conversation history
632
+
633
+ Args:
634
+ messages: List of message dicts with 'role' and 'content' keys
635
+ focus_participant: Optional role to focus analysis on (e.g., "user", "assistant")
636
+ depth: Depth of intent analysis to perform
637
+
638
+ Returns:
639
+ Dict mapping participant roles to their intent analyses (including deception assessment)
640
+
641
+ Example:
642
+ >>> analyzer = BasicIntentAnalyzer()
643
+ >>> messages = [
644
+ ... {"role": "user", "content": "I'm having trouble with my code..."},
645
+ ... {"role": "assistant", "content": "I'd be happy to help..."},
646
+ ... {"role": "user", "content": "Actually, never mind, I figured it out."}
647
+ ... ]
648
+ >>> results = analyzer.analyze_conversation_intents(messages, focus_participant="user")
649
+ """
650
+ # Group messages by participant
651
+ participant_messages = {}
652
+ for msg in messages:
653
+ role = msg.get('role', 'unknown')
654
+ content = msg.get('content', '').strip()
655
+
656
+ if not content:
657
+ continue
658
+
659
+ if role not in participant_messages:
660
+ participant_messages[role] = []
661
+ participant_messages[role].append(content)
662
+
663
+ # Analyze intents for each participant (or just the focused one)
664
+ results = {}
665
+
666
+ participants_to_analyze = [focus_participant] if focus_participant else list(participant_messages.keys())
667
+
668
+ for role in participants_to_analyze:
669
+ if role not in participant_messages:
670
+ continue
671
+
672
+ # Combine all messages from this participant
673
+ combined_text = "\n\n".join(participant_messages[role])
674
+
675
+ logger.debug("Analyzing conversation intents for participant",
676
+ participant=role,
677
+ message_count=len(participant_messages[role]),
678
+ text_length=len(combined_text))
679
+
680
+ # Analyze with conversational context (deception analysis always included)
681
+ analysis = self.analyze_intent(
682
+ combined_text,
683
+ context_type=IntentContext.CONVERSATIONAL,
684
+ depth=depth,
685
+ focus=f"{role} intentions and goals in this conversation"
686
+ )
687
+
688
+ results[role] = analysis
689
+
690
+ return results