abstractcore 2.5.0__py3-none-any.whl → 2.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/__main__.py +8 -1
- abstractcore/apps/deepsearch.py +644 -0
- abstractcore/apps/intent.py +614 -0
- abstractcore/assets/model_capabilities.json +50 -34
- abstractcore/config/main.py +2 -1
- abstractcore/config/manager.py +11 -0
- abstractcore/core/session.py +46 -1
- abstractcore/processing/__init__.py +5 -1
- abstractcore/processing/basic_deepsearch.py +2173 -0
- abstractcore/processing/basic_intent.py +690 -0
- abstractcore/providers/anthropic_provider.py +1 -0
- abstractcore/providers/base.py +1 -0
- abstractcore/providers/huggingface_provider.py +95 -4
- abstractcore/providers/lmstudio_provider.py +14 -0
- abstractcore/providers/mlx_provider.py +76 -2
- abstractcore/providers/ollama_provider.py +6 -2
- abstractcore/providers/openai_provider.py +1 -0
- abstractcore/providers/registry.py +6 -6
- abstractcore/structured/handler.py +161 -1
- abstractcore/tools/common_tools.py +98 -3
- abstractcore/utils/cli.py +114 -1
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.2.dist-info}/METADATA +34 -18
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.2.dist-info}/RECORD +28 -24
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.2.dist-info}/entry_points.txt +4 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.2.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.2.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,690 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Basic Intent Analyzer - Identify and analyze intents behind text
|
|
3
|
+
|
|
4
|
+
Demonstrates how to use AbstractCore's infrastructure to create sophisticated
|
|
5
|
+
intent analysis capabilities with minimal complexity.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from enum import Enum
|
|
9
|
+
from typing import List, Optional, Dict, Any
|
|
10
|
+
from pydantic import BaseModel, Field
|
|
11
|
+
|
|
12
|
+
from ..core.interface import AbstractCoreInterface
|
|
13
|
+
from ..core.factory import create_llm
|
|
14
|
+
from ..structured.retry import FeedbackRetry
|
|
15
|
+
from ..utils.structured_logging import get_logger
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class IntentType(Enum):
|
|
21
|
+
"""Primary intent categories based on psychological research"""
|
|
22
|
+
INFORMATION_SEEKING = "information_seeking" # Asking questions, requesting data
|
|
23
|
+
INFORMATION_SHARING = "information_sharing" # Providing facts, explanations
|
|
24
|
+
PROBLEM_SOLVING = "problem_solving" # Seeking or offering solutions
|
|
25
|
+
DECISION_MAKING = "decision_making" # Evaluating options, making choices
|
|
26
|
+
PERSUASION = "persuasion" # Convincing, influencing opinions
|
|
27
|
+
CLARIFICATION = "clarification" # Seeking or providing clarity
|
|
28
|
+
EMOTIONAL_EXPRESSION = "emotional_expression" # Expressing feelings, reactions
|
|
29
|
+
RELATIONSHIP_BUILDING = "relationship_building" # Social connection, rapport
|
|
30
|
+
INSTRUCTION_GIVING = "instruction_giving" # Teaching, directing actions
|
|
31
|
+
VALIDATION_SEEKING = "validation_seeking" # Seeking approval, confirmation
|
|
32
|
+
# New intent types from psychological research
|
|
33
|
+
FACE_SAVING = "face_saving" # Protecting self-image, avoiding embarrassment
|
|
34
|
+
BLAME_DEFLECTION = "blame_deflection" # Redirecting responsibility to external factors
|
|
35
|
+
POWER_ASSERTION = "power_assertion" # Establishing dominance or authority
|
|
36
|
+
EMPATHY_SEEKING = "empathy_seeking" # Seeking understanding and emotional support
|
|
37
|
+
CONFLICT_AVOIDANCE = "conflict_avoidance" # Preventing or minimizing confrontation
|
|
38
|
+
TRUST_BUILDING = "trust_building" # Establishing or maintaining credibility
|
|
39
|
+
DECEPTION = "deception" # Intentional misdirection or false information
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class IntentDepth(Enum):
|
|
43
|
+
"""Depth of intent analysis"""
|
|
44
|
+
SURFACE = "surface" # Obvious, stated intentions
|
|
45
|
+
UNDERLYING = "underlying" # Hidden motivations and goals
|
|
46
|
+
COMPREHENSIVE = "comprehensive" # Full analysis including subconscious drivers
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class IntentContext(Enum):
|
|
50
|
+
"""Context type for intent analysis"""
|
|
51
|
+
STANDALONE = "standalone" # Single message/text analysis
|
|
52
|
+
CONVERSATIONAL = "conversational" # Part of ongoing dialogue
|
|
53
|
+
DOCUMENT = "document" # Formal document or article
|
|
54
|
+
INTERACTIVE = "interactive" # Real-time interaction context
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class DeceptionIndicators(BaseModel):
|
|
58
|
+
"""Deception analysis indicators based on psychological research"""
|
|
59
|
+
deception_likelihood: float = Field(description="Likelihood of deceptive intent (0-1)", ge=0, le=1)
|
|
60
|
+
narrative_consistency: float = Field(description="Internal consistency of the narrative (0-1)", ge=0, le=1)
|
|
61
|
+
linguistic_markers: List[str] = Field(description="Specific linguistic indicators of potential deception", max_length=5)
|
|
62
|
+
temporal_coherence: float = Field(description="Logical flow and timing consistency (0-1)", ge=0, le=1)
|
|
63
|
+
emotional_congruence: float = Field(description="Alignment between stated emotions and content (0-1)", ge=0, le=1)
|
|
64
|
+
deception_evidence: List[str] = Field(description="Evidence indicating potential deception (contradictions, deflection, inconsistencies)", max_length=3)
|
|
65
|
+
authenticity_evidence: List[str] = Field(description="Evidence indicating authenticity (consistency, accountability, directness)", max_length=3)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class IdentifiedIntent(BaseModel):
|
|
69
|
+
"""Single identified intent with details"""
|
|
70
|
+
intent_type: IntentType = Field(description="Primary intent category")
|
|
71
|
+
confidence: float = Field(description="Confidence in this intent identification (0-1)", ge=0, le=1)
|
|
72
|
+
description: str = Field(description="Human-readable description of the intent")
|
|
73
|
+
underlying_goal: str = Field(description="What the person ultimately wants to achieve")
|
|
74
|
+
emotional_undertone: str = Field(description="Emotional context or undertone")
|
|
75
|
+
urgency_level: float = Field(description="How urgent or pressing this intent is (0-1)", ge=0, le=1)
|
|
76
|
+
deception_analysis: DeceptionIndicators = Field(description="Deception evaluation based on psychological markers - always included in intent analysis")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class LLMIntentOutput(BaseModel):
|
|
80
|
+
"""LLM-generated intent analysis output"""
|
|
81
|
+
primary_intent: IdentifiedIntent = Field(description="The most prominent intent")
|
|
82
|
+
secondary_intents: List[IdentifiedIntent] = Field(description="Additional intents present", max_length=3)
|
|
83
|
+
intent_complexity: float = Field(description="How complex/layered the intentions are (0-1)", ge=0, le=1)
|
|
84
|
+
contextual_factors: List[str] = Field(description="Important contextual elements affecting intent", max_length=5)
|
|
85
|
+
suggested_response_approach: str = Field(description="How one should respond to these intents")
|
|
86
|
+
overall_confidence: float = Field(description="Overall confidence in the analysis (0-1)", ge=0, le=1)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class IntentAnalysisOutput(BaseModel):
|
|
90
|
+
"""Complete intent analysis output with computed metadata"""
|
|
91
|
+
primary_intent: IdentifiedIntent = Field(description="The most prominent intent")
|
|
92
|
+
secondary_intents: List[IdentifiedIntent] = Field(description="Additional intents present", max_length=3)
|
|
93
|
+
intent_complexity: float = Field(description="How complex/layered the intentions are (0-1)", ge=0, le=1)
|
|
94
|
+
contextual_factors: List[str] = Field(description="Important contextual elements affecting intent", max_length=5)
|
|
95
|
+
suggested_response_approach: str = Field(description="How one should respond to these intents")
|
|
96
|
+
overall_confidence: float = Field(description="Overall confidence in the analysis (0-1)", ge=0, le=1)
|
|
97
|
+
word_count_analyzed: int = Field(description="Word count of analyzed text (computed client-side)")
|
|
98
|
+
analysis_depth: IntentDepth = Field(description="Depth of analysis performed")
|
|
99
|
+
context_type: IntentContext = Field(description="Context type used for analysis")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class BasicIntentAnalyzer:
|
|
103
|
+
"""
|
|
104
|
+
Basic Intent Analyzer using zero-shot structured prompting
|
|
105
|
+
|
|
106
|
+
Demonstrates AbstractCore best practices:
|
|
107
|
+
- Structured output with Pydantic validation
|
|
108
|
+
- Clean parameter design
|
|
109
|
+
- Automatic chunking for long documents
|
|
110
|
+
- Provider-agnostic implementation
|
|
111
|
+
- Built-in retry and error handling (inherited from AbstractCore)
|
|
112
|
+
|
|
113
|
+
Optimized defaults (no setup required):
|
|
114
|
+
analyzer = BasicIntentAnalyzer() # Uses gemma3:1b-it-qat, 32k context, 8k chunks
|
|
115
|
+
|
|
116
|
+
Custom setup for different needs:
|
|
117
|
+
llm = create_llm("openai", model="gpt-4o-mini", max_tokens=32000)
|
|
118
|
+
analyzer = BasicIntentAnalyzer(llm, max_chunk_size=15000)
|
|
119
|
+
|
|
120
|
+
Performance considerations:
|
|
121
|
+
- gemma3:1b-it-qat: Fast, cost-effective for basic intent analysis
|
|
122
|
+
- qwen3-coder:30b: Premium quality for complex psychological analysis
|
|
123
|
+
- GPT-4o-mini: Excellent for nuanced intent understanding
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
def __init__(
|
|
127
|
+
self,
|
|
128
|
+
llm: Optional[AbstractCoreInterface] = None,
|
|
129
|
+
max_chunk_size: int = 8000,
|
|
130
|
+
max_tokens: int = 32000,
|
|
131
|
+
max_output_tokens: int = 8000,
|
|
132
|
+
timeout: Optional[float] = None,
|
|
133
|
+
debug: bool = False
|
|
134
|
+
):
|
|
135
|
+
"""
|
|
136
|
+
Initialize the intent analyzer
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
llm: AbstractCore instance (any provider). If None, attempts to create ollama gemma3:1b-it-qat
|
|
140
|
+
max_chunk_size: Maximum characters per chunk for long documents (default 8000)
|
|
141
|
+
max_tokens: Maximum total tokens for LLM context (default 32000)
|
|
142
|
+
max_output_tokens: Maximum tokens for LLM output generation (default 8000)
|
|
143
|
+
timeout: HTTP request timeout in seconds. None for unlimited timeout (default None)
|
|
144
|
+
debug: Enable debug output including raw LLM responses (default False)
|
|
145
|
+
"""
|
|
146
|
+
if llm is None:
|
|
147
|
+
try:
|
|
148
|
+
# Default to gemma3:1b-it-qat with configurable token limits
|
|
149
|
+
self.llm = create_llm("ollama", model="gemma3:1b-it-qat", max_tokens=max_tokens, max_output_tokens=max_output_tokens, timeout=timeout)
|
|
150
|
+
except Exception as e:
|
|
151
|
+
error_msg = (
|
|
152
|
+
f"❌ Failed to initialize default Ollama model 'gemma3:1b-it-qat': {e}\n\n"
|
|
153
|
+
"💡 To use the default model, please:\n"
|
|
154
|
+
" 1. Install Ollama from: https://ollama.com/\n"
|
|
155
|
+
" 2. Download the model: ollama pull gemma3:1b-it-qat\n"
|
|
156
|
+
" 3. Start Ollama service\n\n"
|
|
157
|
+
"🔧 Alternatively, provide a custom LLM instance:\n"
|
|
158
|
+
" from abstractcore import create_llm\n"
|
|
159
|
+
" from abstractcore.processing import BasicIntentAnalyzer\n"
|
|
160
|
+
" \n"
|
|
161
|
+
" # Using OpenAI\n"
|
|
162
|
+
" llm = create_llm('openai', model='gpt-4o-mini')\n"
|
|
163
|
+
" analyzer = BasicIntentAnalyzer(llm)\n"
|
|
164
|
+
" \n"
|
|
165
|
+
" # Using Anthropic\n"
|
|
166
|
+
" llm = create_llm('anthropic', model='claude-3-5-haiku-latest')\n"
|
|
167
|
+
" analyzer = BasicIntentAnalyzer(llm)\n"
|
|
168
|
+
" \n"
|
|
169
|
+
" # Using different Ollama model\n"
|
|
170
|
+
" llm = create_llm('ollama', model='llama3.2:3b')\n"
|
|
171
|
+
" analyzer = BasicIntentAnalyzer(llm)"
|
|
172
|
+
)
|
|
173
|
+
raise RuntimeError(error_msg) from e
|
|
174
|
+
else:
|
|
175
|
+
self.llm = llm
|
|
176
|
+
self.max_chunk_size = max_chunk_size
|
|
177
|
+
self.debug = debug
|
|
178
|
+
|
|
179
|
+
# Default retry strategy with 3 attempts
|
|
180
|
+
self.retry_strategy = FeedbackRetry(max_attempts=3)
|
|
181
|
+
|
|
182
|
+
def analyze_intent(
|
|
183
|
+
self,
|
|
184
|
+
text: str,
|
|
185
|
+
context_type: IntentContext = IntentContext.STANDALONE,
|
|
186
|
+
depth: IntentDepth = IntentDepth.UNDERLYING,
|
|
187
|
+
focus: Optional[str] = None,
|
|
188
|
+
) -> IntentAnalysisOutput:
|
|
189
|
+
"""
|
|
190
|
+
Analyze the intents behind the given text
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
text: Text to analyze for intents
|
|
194
|
+
context_type: Type of context for the analysis
|
|
195
|
+
depth: Depth of intent analysis to perform
|
|
196
|
+
focus: Optional specific aspect to focus on (e.g., "business motivations", "emotional drivers")
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
IntentAnalysisOutput: Structured intent analysis with metadata including deception assessment
|
|
200
|
+
|
|
201
|
+
Example:
|
|
202
|
+
>>> from abstractcore import create_llm
|
|
203
|
+
>>> from abstractcore.processing import BasicIntentAnalyzer, IntentContext, IntentDepth
|
|
204
|
+
>>>
|
|
205
|
+
>>> llm = create_llm("openai", model="gpt-4o-mini")
|
|
206
|
+
>>> analyzer = BasicIntentAnalyzer(llm)
|
|
207
|
+
>>>
|
|
208
|
+
>>> result = analyzer.analyze_intent(
|
|
209
|
+
... "I was wondering if you could help me understand how to improve our team's productivity?",
|
|
210
|
+
... context_type=IntentContext.CONVERSATIONAL,
|
|
211
|
+
... depth=IntentDepth.UNDERLYING,
|
|
212
|
+
... focus="management concerns"
|
|
213
|
+
... )
|
|
214
|
+
>>> print(result.primary_intent.intent_type)
|
|
215
|
+
>>> print(f"Confidence: {result.overall_confidence:.2f}")
|
|
216
|
+
"""
|
|
217
|
+
# Handle long documents through chunking
|
|
218
|
+
if self._should_chunk_by_tokens(text):
|
|
219
|
+
logger.info("Using chunked intent analysis for long document",
|
|
220
|
+
text_length=len(text),
|
|
221
|
+
context_type=context_type.value,
|
|
222
|
+
depth=depth.value)
|
|
223
|
+
return self._analyze_long_document(text, context_type, depth, focus)
|
|
224
|
+
else:
|
|
225
|
+
logger.info("Using single-chunk intent analysis",
|
|
226
|
+
text_length=len(text),
|
|
227
|
+
context_type=context_type.value,
|
|
228
|
+
depth=depth.value)
|
|
229
|
+
return self._analyze_single_chunk(text, context_type, depth, focus)
|
|
230
|
+
|
|
231
|
+
def _analyze_single_chunk(
|
|
232
|
+
self,
|
|
233
|
+
text: str,
|
|
234
|
+
context_type: IntentContext,
|
|
235
|
+
depth: IntentDepth,
|
|
236
|
+
focus: Optional[str]
|
|
237
|
+
) -> IntentAnalysisOutput:
|
|
238
|
+
"""Analyze intent for a single chunk of text"""
|
|
239
|
+
|
|
240
|
+
# Build the prompt based on parameters (deception analysis always included)
|
|
241
|
+
prompt = self._build_prompt(text, context_type, depth, focus)
|
|
242
|
+
|
|
243
|
+
if self.debug:
|
|
244
|
+
print(f"\n🔧 DEBUG: Prompt sent to LLM:")
|
|
245
|
+
print("=" * 80)
|
|
246
|
+
print(prompt)
|
|
247
|
+
print("=" * 80)
|
|
248
|
+
|
|
249
|
+
# Use AbstractCore's structured output with retry strategy
|
|
250
|
+
try:
|
|
251
|
+
response = self.llm.generate(prompt, response_model=LLMIntentOutput, retry_strategy=self.retry_strategy)
|
|
252
|
+
except Exception as e:
|
|
253
|
+
if self.debug:
|
|
254
|
+
print(f"\n❌ DEBUG: LLM generation failed with error: {e}")
|
|
255
|
+
# Try to get the raw response if available
|
|
256
|
+
try:
|
|
257
|
+
raw_response = self.llm.generate(prompt)
|
|
258
|
+
print(f"\n🔧 DEBUG: Raw LLM response (without structured output):")
|
|
259
|
+
print("=" * 80)
|
|
260
|
+
print(raw_response.content if hasattr(raw_response, 'content') else str(raw_response))
|
|
261
|
+
print("=" * 80)
|
|
262
|
+
except Exception as raw_e:
|
|
263
|
+
print(f"❌ DEBUG: Could not get raw response: {raw_e}")
|
|
264
|
+
raise
|
|
265
|
+
|
|
266
|
+
# Extract the structured output
|
|
267
|
+
llm_result = None
|
|
268
|
+
if isinstance(response, LLMIntentOutput):
|
|
269
|
+
# When structured output succeeds, response is the LLMIntentOutput object directly
|
|
270
|
+
llm_result = response
|
|
271
|
+
elif hasattr(response, 'structured_output') and response.structured_output:
|
|
272
|
+
# Fallback: check for structured_output attribute
|
|
273
|
+
llm_result = response.structured_output
|
|
274
|
+
else:
|
|
275
|
+
# Debug information for troubleshooting
|
|
276
|
+
error_msg = f"Failed to generate structured intent analysis output. Response type: {type(response)}"
|
|
277
|
+
if hasattr(response, 'content'):
|
|
278
|
+
error_msg += f", Content: {response.content[:200]}..."
|
|
279
|
+
if hasattr(response, 'structured_output'):
|
|
280
|
+
error_msg += f", Structured output: {response.structured_output}"
|
|
281
|
+
raise ValueError(error_msg)
|
|
282
|
+
|
|
283
|
+
# Compute word count (reliable, client-side calculation)
|
|
284
|
+
actual_word_count = len(text.split())
|
|
285
|
+
|
|
286
|
+
# Create complete result with computed metadata
|
|
287
|
+
return IntentAnalysisOutput(
|
|
288
|
+
primary_intent=llm_result.primary_intent,
|
|
289
|
+
secondary_intents=llm_result.secondary_intents,
|
|
290
|
+
intent_complexity=llm_result.intent_complexity,
|
|
291
|
+
contextual_factors=llm_result.contextual_factors,
|
|
292
|
+
suggested_response_approach=llm_result.suggested_response_approach,
|
|
293
|
+
overall_confidence=llm_result.overall_confidence,
|
|
294
|
+
word_count_analyzed=actual_word_count,
|
|
295
|
+
analysis_depth=depth,
|
|
296
|
+
context_type=context_type
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
def _analyze_long_document(
|
|
300
|
+
self,
|
|
301
|
+
text: str,
|
|
302
|
+
context_type: IntentContext,
|
|
303
|
+
depth: IntentDepth,
|
|
304
|
+
focus: Optional[str]
|
|
305
|
+
) -> IntentAnalysisOutput:
|
|
306
|
+
"""
|
|
307
|
+
Handle long documents using map-reduce approach
|
|
308
|
+
|
|
309
|
+
1. Split into chunks with overlap
|
|
310
|
+
2. Analyze intent for each chunk
|
|
311
|
+
3. Combine chunk analyses into final intent analysis
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
# Split text into overlapping chunks
|
|
315
|
+
chunks = self._split_text_into_chunks(text)
|
|
316
|
+
|
|
317
|
+
logger.debug("Split document into chunks",
|
|
318
|
+
chunk_count=len(chunks),
|
|
319
|
+
avg_chunk_size=sum(len(c) for c in chunks) // len(chunks))
|
|
320
|
+
|
|
321
|
+
if len(chunks) == 1:
|
|
322
|
+
return self._analyze_single_chunk(chunks[0], context_type, depth, focus)
|
|
323
|
+
|
|
324
|
+
# Step 1: Analyze each chunk (Map phase)
|
|
325
|
+
chunk_analyses = []
|
|
326
|
+
for i, chunk in enumerate(chunks):
|
|
327
|
+
chunk_prompt = self._build_chunk_prompt(chunk, context_type, focus, i + 1, len(chunks))
|
|
328
|
+
|
|
329
|
+
# Use a simplified output model for chunks
|
|
330
|
+
class ChunkIntentAnalysis(BaseModel):
|
|
331
|
+
primary_intent_type: str
|
|
332
|
+
intent_description: str
|
|
333
|
+
underlying_goal: str
|
|
334
|
+
confidence: float = Field(ge=0, le=1)
|
|
335
|
+
|
|
336
|
+
response = self.llm.generate(chunk_prompt, response_model=ChunkIntentAnalysis, retry_strategy=self.retry_strategy)
|
|
337
|
+
if isinstance(response, ChunkIntentAnalysis):
|
|
338
|
+
# When structured output succeeds, response is the ChunkIntentAnalysis object directly
|
|
339
|
+
chunk_analyses.append(response)
|
|
340
|
+
elif hasattr(response, 'structured_output') and response.structured_output:
|
|
341
|
+
# Fallback: check for structured_output attribute
|
|
342
|
+
chunk_analyses.append(response.structured_output)
|
|
343
|
+
else:
|
|
344
|
+
# If chunk processing fails, create a fallback analysis
|
|
345
|
+
logger.warning("Chunk intent analysis failed, creating fallback",
|
|
346
|
+
chunk_number=i+1,
|
|
347
|
+
total_chunks=len(chunks))
|
|
348
|
+
chunk_analyses.append(ChunkIntentAnalysis(
|
|
349
|
+
primary_intent_type="information_sharing",
|
|
350
|
+
intent_description=f"Section {i+1} intent analysis unavailable",
|
|
351
|
+
underlying_goal="Content processing failed",
|
|
352
|
+
confidence=0.1
|
|
353
|
+
))
|
|
354
|
+
|
|
355
|
+
# Step 2: Combine chunk analyses (Reduce phase)
|
|
356
|
+
combined_analysis = "\n\n".join([
|
|
357
|
+
f"Section {i+1}:\nIntent Type: {ca.primary_intent_type}\nDescription: {ca.intent_description}\nUnderlying Goal: {ca.underlying_goal}\nConfidence: {ca.confidence:.2f}"
|
|
358
|
+
for i, ca in enumerate(chunk_analyses)
|
|
359
|
+
])
|
|
360
|
+
|
|
361
|
+
# Generate final intent analysis from combined analyses
|
|
362
|
+
final_prompt = self._build_final_combination_prompt(combined_analysis, context_type, depth, focus, len(text))
|
|
363
|
+
|
|
364
|
+
response = self.llm.generate(final_prompt, response_model=LLMIntentOutput, retry_strategy=self.retry_strategy)
|
|
365
|
+
|
|
366
|
+
# Extract the structured output
|
|
367
|
+
llm_result = None
|
|
368
|
+
if isinstance(response, LLMIntentOutput):
|
|
369
|
+
# When structured output succeeds, response is the LLMIntentOutput object directly
|
|
370
|
+
llm_result = response
|
|
371
|
+
elif hasattr(response, 'structured_output') and response.structured_output:
|
|
372
|
+
# Fallback: check for structured_output attribute
|
|
373
|
+
llm_result = response.structured_output
|
|
374
|
+
else:
|
|
375
|
+
# Debug information for troubleshooting
|
|
376
|
+
error_msg = f"Failed to generate final structured intent analysis output. Response type: {type(response)}"
|
|
377
|
+
if hasattr(response, 'content'):
|
|
378
|
+
error_msg += f", Content: {response.content[:200]}..."
|
|
379
|
+
if hasattr(response, 'structured_output'):
|
|
380
|
+
error_msg += f", Structured output: {response.structured_output}"
|
|
381
|
+
raise ValueError(error_msg)
|
|
382
|
+
|
|
383
|
+
# Compute word count (reliable, client-side calculation)
|
|
384
|
+
actual_word_count = len(text.split())
|
|
385
|
+
|
|
386
|
+
# Create complete result with computed metadata
|
|
387
|
+
return IntentAnalysisOutput(
|
|
388
|
+
primary_intent=llm_result.primary_intent,
|
|
389
|
+
secondary_intents=llm_result.secondary_intents,
|
|
390
|
+
intent_complexity=llm_result.intent_complexity,
|
|
391
|
+
contextual_factors=llm_result.contextual_factors,
|
|
392
|
+
suggested_response_approach=llm_result.suggested_response_approach,
|
|
393
|
+
overall_confidence=llm_result.overall_confidence,
|
|
394
|
+
word_count_analyzed=actual_word_count,
|
|
395
|
+
analysis_depth=depth,
|
|
396
|
+
context_type=context_type
|
|
397
|
+
)
|
|
398
|
+
|
|
399
|
+
def _should_chunk_by_tokens(self, text: str) -> bool:
|
|
400
|
+
"""
|
|
401
|
+
Determine if text should be chunked based on token count.
|
|
402
|
+
|
|
403
|
+
Uses centralized TokenUtils for accurate token estimation.
|
|
404
|
+
Falls back to character count if model information unavailable.
|
|
405
|
+
"""
|
|
406
|
+
from ..utils.token_utils import TokenUtils
|
|
407
|
+
|
|
408
|
+
# Get model name from LLM if available
|
|
409
|
+
model_name = None
|
|
410
|
+
if self.llm and hasattr(self.llm, 'model'):
|
|
411
|
+
model_name = self.llm.model
|
|
412
|
+
|
|
413
|
+
# Estimate tokens using centralized utility
|
|
414
|
+
estimated_tokens = TokenUtils.estimate_tokens(text, model_name)
|
|
415
|
+
|
|
416
|
+
# Use a conservative token limit (leaving room for prompt overhead)
|
|
417
|
+
# Most models have 32k+ context nowadays, so 8k tokens for input text is safe
|
|
418
|
+
token_limit = 8000
|
|
419
|
+
|
|
420
|
+
if estimated_tokens > token_limit:
|
|
421
|
+
return True
|
|
422
|
+
|
|
423
|
+
# Fallback to character-based check for very long texts
|
|
424
|
+
return len(text) > self.max_chunk_size
|
|
425
|
+
|
|
426
|
+
def _split_text_into_chunks(self, text: str, overlap: int = 200) -> List[str]:
|
|
427
|
+
"""Split text into overlapping chunks"""
|
|
428
|
+
chunks = []
|
|
429
|
+
start = 0
|
|
430
|
+
|
|
431
|
+
while start < len(text):
|
|
432
|
+
# Calculate end position
|
|
433
|
+
end = start + self.max_chunk_size
|
|
434
|
+
|
|
435
|
+
# If this isn't the last chunk, try to break at a sentence boundary
|
|
436
|
+
if end < len(text):
|
|
437
|
+
# Look for sentence endings near the chunk boundary
|
|
438
|
+
sentence_end = text.rfind('. ', start + self.max_chunk_size - 500, end)
|
|
439
|
+
if sentence_end != -1 and sentence_end > start:
|
|
440
|
+
end = sentence_end + 2 # Include the period and space
|
|
441
|
+
|
|
442
|
+
chunks.append(text[start:end].strip())
|
|
443
|
+
|
|
444
|
+
# Move start position with overlap
|
|
445
|
+
if end >= len(text):
|
|
446
|
+
break
|
|
447
|
+
start = max(start + self.max_chunk_size - overlap, end - overlap)
|
|
448
|
+
|
|
449
|
+
return chunks
|
|
450
|
+
|
|
451
|
+
def _build_prompt(
|
|
452
|
+
self,
|
|
453
|
+
text: str,
|
|
454
|
+
context_type: IntentContext,
|
|
455
|
+
depth: IntentDepth,
|
|
456
|
+
focus: Optional[str]
|
|
457
|
+
) -> str:
|
|
458
|
+
"""Build the main intent analysis prompt"""
|
|
459
|
+
|
|
460
|
+
# Context instructions
|
|
461
|
+
context_instructions = {
|
|
462
|
+
IntentContext.STANDALONE: "Analyze this text as an independent piece of communication.",
|
|
463
|
+
IntentContext.CONVERSATIONAL: "Analyze this text as part of an ongoing conversation or dialogue.",
|
|
464
|
+
IntentContext.DOCUMENT: "Analyze this text as part of a formal document or structured content.",
|
|
465
|
+
IntentContext.INTERACTIVE: "Analyze this text as part of a real-time interactive communication."
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
# Depth instructions
|
|
469
|
+
depth_instructions = {
|
|
470
|
+
IntentDepth.SURFACE: "Focus on the obvious, explicitly stated intentions and goals.",
|
|
471
|
+
IntentDepth.UNDERLYING: "Look beyond surface statements to identify hidden motivations, implicit goals, and underlying psychological drivers.",
|
|
472
|
+
IntentDepth.COMPREHENSIVE: "Provide a complete analysis including conscious intentions, subconscious motivations, emotional drivers, and potential unstated goals."
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
# Build focus instruction
|
|
476
|
+
focus_instruction = ""
|
|
477
|
+
if focus:
|
|
478
|
+
focus_instruction = f"\nPay special attention to: {focus}\nEnsure the analysis addresses this focus area thoroughly."
|
|
479
|
+
|
|
480
|
+
# Deception analysis is always integrated into intent analysis
|
|
481
|
+
deception_instruction = """
|
|
482
|
+
|
|
483
|
+
DECEPTION ANALYSIS: Always evaluate authenticity with healthy skepticism.
|
|
484
|
+
|
|
485
|
+
Key principles:
|
|
486
|
+
1. Check for contradictions within the conversation
|
|
487
|
+
2. Ask: "What does this person gain by lying here?"
|
|
488
|
+
3. Be suspicious of overly complex explanations for simple failures
|
|
489
|
+
4. Confident, detailed stories can mask deception - don't trust them blindly
|
|
490
|
+
5. Look for what consequences they're trying to avoid
|
|
491
|
+
|
|
492
|
+
Red flags:
|
|
493
|
+
- Timeline inconsistencies or contradictions
|
|
494
|
+
- Elaborate technical excuses for basic mistakes
|
|
495
|
+
- Shifting blame to multiple external factors
|
|
496
|
+
- Claims that can't be easily verified
|
|
497
|
+
|
|
498
|
+
For deception analysis, assess:
|
|
499
|
+
- Likelihood of deception (0-1 scale)
|
|
500
|
+
- What motive exists for lying in this context
|
|
501
|
+
- Whether the explanation is unnecessarily complex
|
|
502
|
+
- Evidence for/against authenticity
|
|
503
|
+
|
|
504
|
+
Note: If someone admits past deception to a third party, that admission itself is likely honest."""
|
|
505
|
+
|
|
506
|
+
prompt = f"""Analyze the following text to identify and understand the intents, motivations, and goals behind the communication.
|
|
507
|
+
|
|
508
|
+
{context_instructions[context_type]}
|
|
509
|
+
{depth_instructions[depth]}{focus_instruction}{deception_instruction}
|
|
510
|
+
|
|
511
|
+
Text to analyze:
|
|
512
|
+
{text}
|
|
513
|
+
|
|
514
|
+
Your task is to identify:
|
|
515
|
+
1. PRIMARY INTENT: The main purpose or goal behind this communication
|
|
516
|
+
2. SECONDARY INTENTS: Additional intentions that may be present (up to 3)
|
|
517
|
+
3. UNDERLYING GOALS: What the person ultimately wants to achieve
|
|
518
|
+
4. EMOTIONAL UNDERTONES: The emotional context affecting the communication
|
|
519
|
+
5. CONTEXTUAL FACTORS: Important situational elements that influence the intents
|
|
520
|
+
6. RESPONSE APPROACH: How someone should respond to address these intents effectively
|
|
521
|
+
|
|
522
|
+
For each intent, consider:
|
|
523
|
+
- What type of intent it is (information seeking, problem solving, persuasion, face-saving, deception, etc.)
|
|
524
|
+
- How confident you are in identifying this intent
|
|
525
|
+
- What emotional undertones are present
|
|
526
|
+
- How urgent or pressing this intent seems to be
|
|
527
|
+
- What the person's underlying goal really is
|
|
528
|
+
|
|
529
|
+
Requirements:
|
|
530
|
+
- Be precise and avoid speculation beyond what the text supports
|
|
531
|
+
- Consider both explicit and implicit intentions
|
|
532
|
+
- Evaluate the complexity and layering of multiple intents
|
|
533
|
+
- Provide confidence scores for your assessments
|
|
534
|
+
- Focus on actionable insights for responding appropriately
|
|
535
|
+
- When deception analysis is requested, provide evidence-based psychological assessment
|
|
536
|
+
|
|
537
|
+
Generate a comprehensive structured analysis of the intents behind this communication.
|
|
538
|
+
|
|
539
|
+
CRITICAL JSON FORMAT REQUIREMENTS:
|
|
540
|
+
- Respond with ONLY valid JSON - no other text before or after
|
|
541
|
+
- Use double quotes for all strings and keys
|
|
542
|
+
|
|
543
|
+
- All field names must match exactly: primary_intent, secondary_intents, intent_complexity, etc.
|
|
544
|
+
- Arrays must use square brackets [], objects must use curly braces {{}}
|
|
545
|
+
- No trailing commas, no comments, pure JSON only"""
|
|
546
|
+
|
|
547
|
+
return prompt
|
|
548
|
+
|
|
549
|
+
def _build_chunk_prompt(self, chunk: str, context_type: IntentContext, focus: Optional[str], chunk_num: int, total_chunks: int) -> str:
|
|
550
|
+
"""Build prompt for individual chunk processing"""
|
|
551
|
+
|
|
552
|
+
context_instruction = f"This is part of a {context_type.value} communication."
|
|
553
|
+
focus_instruction = f" Focus especially on {focus}." if focus else ""
|
|
554
|
+
|
|
555
|
+
return f"""Analyze the intents in this section of a larger text (Part {chunk_num} of {total_chunks}).
|
|
556
|
+
|
|
557
|
+
{context_instruction}{focus_instruction}
|
|
558
|
+
|
|
559
|
+
Text section:
|
|
560
|
+
{chunk}
|
|
561
|
+
|
|
562
|
+
Identify:
|
|
563
|
+
- The primary intent type in this section
|
|
564
|
+
- A brief description of what the person wants
|
|
565
|
+
- The underlying goal they're trying to achieve
|
|
566
|
+
- Your confidence in this assessment
|
|
567
|
+
|
|
568
|
+
Keep the analysis focused on this section while considering it's part of a larger communication."""
|
|
569
|
+
|
|
570
|
+
def _build_final_combination_prompt(
|
|
571
|
+
self,
|
|
572
|
+
combined_analyses: str,
|
|
573
|
+
context_type: IntentContext,
|
|
574
|
+
depth: IntentDepth,
|
|
575
|
+
focus: Optional[str],
|
|
576
|
+
original_length: int
|
|
577
|
+
) -> str:
|
|
578
|
+
"""Build prompt for combining chunk analyses into final intent analysis"""
|
|
579
|
+
|
|
580
|
+
context_instructions = {
|
|
581
|
+
IntentContext.STANDALONE: "Synthesize the intents from this independent communication.",
|
|
582
|
+
IntentContext.CONVERSATIONAL: "Combine the intents from this conversational exchange.",
|
|
583
|
+
IntentContext.DOCUMENT: "Analyze the overall intents from this formal document.",
|
|
584
|
+
IntentContext.INTERACTIVE: "Synthesize the intents from this interactive communication."
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
depth_instructions = {
|
|
588
|
+
IntentDepth.SURFACE: "Focus on the most obvious intentions across all sections.",
|
|
589
|
+
IntentDepth.UNDERLYING: "Identify the deeper motivations and hidden goals throughout the text.",
|
|
590
|
+
IntentDepth.COMPREHENSIVE: "Provide a complete psychological analysis of all conscious and subconscious intentions."
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
focus_instruction = ""
|
|
594
|
+
if focus:
|
|
595
|
+
focus_instruction = f" Pay special attention to {focus} throughout the analysis."
|
|
596
|
+
|
|
597
|
+
return f"""Combine these section analyses into a comprehensive intent analysis of the complete communication.
|
|
598
|
+
|
|
599
|
+
{context_instructions[context_type]}
|
|
600
|
+
{depth_instructions[depth]}{focus_instruction}
|
|
601
|
+
|
|
602
|
+
Section analyses:
|
|
603
|
+
{combined_analyses}
|
|
604
|
+
|
|
605
|
+
Requirements:
|
|
606
|
+
- Synthesize information from all sections into a coherent understanding
|
|
607
|
+
- Identify the overarching primary intent and up to 3 secondary intents
|
|
608
|
+
- Determine the complexity and layering of intentions throughout
|
|
609
|
+
- Consider how different sections contribute to the overall goals
|
|
610
|
+
- The original text had approximately {original_length} characters
|
|
611
|
+
- Provide confidence scores and contextual factors
|
|
612
|
+
- Suggest an appropriate response approach
|
|
613
|
+
|
|
614
|
+
Create a unified intent analysis that captures the complete communication's purposes and motivations.
|
|
615
|
+
|
|
616
|
+
CRITICAL JSON FORMAT REQUIREMENTS:
|
|
617
|
+
- Respond with ONLY valid JSON - no other text before or after
|
|
618
|
+
- Use double quotes for all strings and keys
|
|
619
|
+
|
|
620
|
+
- All field names must match exactly: primary_intent, secondary_intents, intent_complexity, etc.
|
|
621
|
+
- Arrays must use square brackets [], objects must use curly braces {{}}
|
|
622
|
+
- No trailing commas, no comments, pure JSON only"""
|
|
623
|
+
|
|
624
|
+
def analyze_conversation_intents(
|
|
625
|
+
self,
|
|
626
|
+
messages: List[dict],
|
|
627
|
+
focus_participant: Optional[str] = None,
|
|
628
|
+
depth: IntentDepth = IntentDepth.UNDERLYING
|
|
629
|
+
) -> Dict[str, IntentAnalysisOutput]:
|
|
630
|
+
"""
|
|
631
|
+
Specialized method for analyzing intents in conversation history
|
|
632
|
+
|
|
633
|
+
Args:
|
|
634
|
+
messages: List of message dicts with 'role' and 'content' keys
|
|
635
|
+
focus_participant: Optional role to focus analysis on (e.g., "user", "assistant")
|
|
636
|
+
depth: Depth of intent analysis to perform
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
Dict mapping participant roles to their intent analyses (including deception assessment)
|
|
640
|
+
|
|
641
|
+
Example:
|
|
642
|
+
>>> analyzer = BasicIntentAnalyzer()
|
|
643
|
+
>>> messages = [
|
|
644
|
+
... {"role": "user", "content": "I'm having trouble with my code..."},
|
|
645
|
+
... {"role": "assistant", "content": "I'd be happy to help..."},
|
|
646
|
+
... {"role": "user", "content": "Actually, never mind, I figured it out."}
|
|
647
|
+
... ]
|
|
648
|
+
>>> results = analyzer.analyze_conversation_intents(messages, focus_participant="user")
|
|
649
|
+
"""
|
|
650
|
+
# Group messages by participant
|
|
651
|
+
participant_messages = {}
|
|
652
|
+
for msg in messages:
|
|
653
|
+
role = msg.get('role', 'unknown')
|
|
654
|
+
content = msg.get('content', '').strip()
|
|
655
|
+
|
|
656
|
+
if not content:
|
|
657
|
+
continue
|
|
658
|
+
|
|
659
|
+
if role not in participant_messages:
|
|
660
|
+
participant_messages[role] = []
|
|
661
|
+
participant_messages[role].append(content)
|
|
662
|
+
|
|
663
|
+
# Analyze intents for each participant (or just the focused one)
|
|
664
|
+
results = {}
|
|
665
|
+
|
|
666
|
+
participants_to_analyze = [focus_participant] if focus_participant else list(participant_messages.keys())
|
|
667
|
+
|
|
668
|
+
for role in participants_to_analyze:
|
|
669
|
+
if role not in participant_messages:
|
|
670
|
+
continue
|
|
671
|
+
|
|
672
|
+
# Combine all messages from this participant
|
|
673
|
+
combined_text = "\n\n".join(participant_messages[role])
|
|
674
|
+
|
|
675
|
+
logger.debug("Analyzing conversation intents for participant",
|
|
676
|
+
participant=role,
|
|
677
|
+
message_count=len(participant_messages[role]),
|
|
678
|
+
text_length=len(combined_text))
|
|
679
|
+
|
|
680
|
+
# Analyze with conversational context (deception analysis always included)
|
|
681
|
+
analysis = self.analyze_intent(
|
|
682
|
+
combined_text,
|
|
683
|
+
context_type=IntentContext.CONVERSATIONAL,
|
|
684
|
+
depth=depth,
|
|
685
|
+
focus=f"{role} intentions and goals in this conversation"
|
|
686
|
+
)
|
|
687
|
+
|
|
688
|
+
results[role] = analysis
|
|
689
|
+
|
|
690
|
+
return results
|