abstractcore 2.5.0__py3-none-any.whl → 2.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +12 -0
- abstractcore/apps/__main__.py +8 -1
- abstractcore/apps/deepsearch.py +644 -0
- abstractcore/apps/intent.py +614 -0
- abstractcore/architectures/detection.py +250 -4
- abstractcore/assets/architecture_formats.json +14 -1
- abstractcore/assets/model_capabilities.json +583 -44
- abstractcore/compression/__init__.py +29 -0
- abstractcore/compression/analytics.py +420 -0
- abstractcore/compression/cache.py +250 -0
- abstractcore/compression/config.py +279 -0
- abstractcore/compression/exceptions.py +30 -0
- abstractcore/compression/glyph_processor.py +381 -0
- abstractcore/compression/optimizer.py +388 -0
- abstractcore/compression/orchestrator.py +380 -0
- abstractcore/compression/pil_text_renderer.py +818 -0
- abstractcore/compression/quality.py +226 -0
- abstractcore/compression/text_formatter.py +666 -0
- abstractcore/compression/vision_compressor.py +371 -0
- abstractcore/config/main.py +66 -1
- abstractcore/config/manager.py +111 -5
- abstractcore/core/session.py +105 -5
- abstractcore/events/__init__.py +1 -1
- abstractcore/media/auto_handler.py +312 -18
- abstractcore/media/handlers/local_handler.py +14 -2
- abstractcore/media/handlers/openai_handler.py +62 -3
- abstractcore/media/processors/__init__.py +11 -1
- abstractcore/media/processors/direct_pdf_processor.py +210 -0
- abstractcore/media/processors/glyph_pdf_processor.py +227 -0
- abstractcore/media/processors/image_processor.py +7 -1
- abstractcore/media/processors/text_processor.py +18 -3
- abstractcore/media/types.py +164 -7
- abstractcore/processing/__init__.py +5 -1
- abstractcore/processing/basic_deepsearch.py +2173 -0
- abstractcore/processing/basic_intent.py +690 -0
- abstractcore/providers/__init__.py +18 -0
- abstractcore/providers/anthropic_provider.py +29 -2
- abstractcore/providers/base.py +279 -6
- abstractcore/providers/huggingface_provider.py +658 -27
- abstractcore/providers/lmstudio_provider.py +52 -2
- abstractcore/providers/mlx_provider.py +103 -4
- abstractcore/providers/model_capabilities.py +352 -0
- abstractcore/providers/ollama_provider.py +44 -6
- abstractcore/providers/openai_provider.py +29 -2
- abstractcore/providers/registry.py +91 -19
- abstractcore/server/app.py +91 -81
- abstractcore/structured/handler.py +161 -1
- abstractcore/tools/common_tools.py +98 -3
- abstractcore/utils/__init__.py +4 -1
- abstractcore/utils/cli.py +114 -1
- abstractcore/utils/trace_export.py +287 -0
- abstractcore/utils/version.py +1 -1
- abstractcore/utils/vlm_token_calculator.py +655 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/METADATA +140 -23
- abstractcore-2.5.3.dist-info/RECORD +107 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +4 -0
- abstractcore-2.5.0.dist-info/RECORD +0 -86
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,614 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
AbstractCore Intent Analyzer CLI Application
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
python -m abstractcore.apps.intent <file_path_or_text> [options]
|
|
7
|
+
|
|
8
|
+
Options:
|
|
9
|
+
--context <context> Context type (standalone, conversational, document, interactive, default: standalone)
|
|
10
|
+
--depth <depth> Analysis depth (surface, underlying, comprehensive, default: underlying)
|
|
11
|
+
--focus <focus> Specific focus area for intent analysis (e.g., "business motivations", "emotional drivers")
|
|
12
|
+
--format <format> Output format (json, yaml, plain, default: json)
|
|
13
|
+
--output <output> Output file path (optional, prints to console if not provided)
|
|
14
|
+
--chunk-size <size> Chunk size in characters (default: 8000, max: 32000)
|
|
15
|
+
--provider <provider> LLM provider (requires --model)
|
|
16
|
+
--model <model> LLM model (requires --provider)
|
|
17
|
+
--max-tokens <tokens> Maximum total tokens for LLM context (default: 32000)
|
|
18
|
+
--max-output-tokens <tokens> Maximum tokens for LLM output generation (default: 8000)
|
|
19
|
+
--conversation-mode Analyze as conversation (expects multiple messages)
|
|
20
|
+
--focus-participant <role> In conversation mode, focus on specific participant (user, assistant, etc.)
|
|
21
|
+
--verbose Show detailed progress information
|
|
22
|
+
--timeout <seconds> HTTP timeout for LLM providers (default: 300)
|
|
23
|
+
--help Show this help message
|
|
24
|
+
|
|
25
|
+
Note: Deception analysis based on psychological markers is always included in intent analysis.
|
|
26
|
+
|
|
27
|
+
Examples:
|
|
28
|
+
# Single text analysis
|
|
29
|
+
python -m abstractcore.apps.intent "I was wondering if you could help me understand this concept?"
|
|
30
|
+
python -m abstractcore.apps.intent document.txt --depth comprehensive --verbose
|
|
31
|
+
|
|
32
|
+
# Conversation analysis
|
|
33
|
+
python -m abstractcore.apps.intent conversation.txt --conversation-mode --focus-participant user
|
|
34
|
+
|
|
35
|
+
# Advanced options
|
|
36
|
+
python -m abstractcore.apps.intent email.txt --context document --focus "business objectives" --output analysis.json
|
|
37
|
+
python -m abstractcore.apps.intent chat.txt --context conversational --depth surface --format plain
|
|
38
|
+
python -m abstractcore.apps.intent query.txt --provider openai --model gpt-4o-mini --depth comprehensive
|
|
39
|
+
|
|
40
|
+
# Comprehensive psychological analysis (includes deception assessment)
|
|
41
|
+
python -m abstractcore.apps.intent suspicious_message.txt --depth comprehensive
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
import argparse
|
|
45
|
+
import sys
|
|
46
|
+
import time
|
|
47
|
+
import json
|
|
48
|
+
import yaml
|
|
49
|
+
from pathlib import Path
|
|
50
|
+
from typing import Optional, Dict, Any
|
|
51
|
+
|
|
52
|
+
from ..processing import BasicIntentAnalyzer, IntentContext, IntentDepth
|
|
53
|
+
from ..core.factory import create_llm
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def get_app_defaults(app_name: str) -> tuple[str, str]:
|
|
57
|
+
"""Get default provider and model for an app."""
|
|
58
|
+
try:
|
|
59
|
+
from ..config import get_config_manager
|
|
60
|
+
config_manager = get_config_manager()
|
|
61
|
+
return config_manager.get_app_default(app_name)
|
|
62
|
+
except Exception:
|
|
63
|
+
# Fallback to hardcoded defaults if config unavailable
|
|
64
|
+
hardcoded_defaults = {
|
|
65
|
+
'summarizer': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
|
|
66
|
+
'extractor': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
|
|
67
|
+
'judge': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
|
|
68
|
+
'intent': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
|
|
69
|
+
'cli': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
|
|
70
|
+
}
|
|
71
|
+
return hardcoded_defaults.get(app_name, ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'))
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def read_session_file(file_path: str) -> list[dict]:
|
|
75
|
+
"""
|
|
76
|
+
Read and parse a BasicSession JSON file
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
file_path: Path to the session JSON file
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
List of message dictionaries
|
|
83
|
+
"""
|
|
84
|
+
import json
|
|
85
|
+
|
|
86
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
87
|
+
data = json.load(f)
|
|
88
|
+
|
|
89
|
+
# Handle both new archive format and legacy format
|
|
90
|
+
if "schema_version" in data and "messages" in data:
|
|
91
|
+
# New archive format
|
|
92
|
+
messages_data = data["messages"]
|
|
93
|
+
else:
|
|
94
|
+
# Legacy format or direct messages
|
|
95
|
+
messages_data = data.get("messages", [])
|
|
96
|
+
|
|
97
|
+
# Convert to simple format expected by intent analyzer
|
|
98
|
+
messages = []
|
|
99
|
+
for msg_data in messages_data:
|
|
100
|
+
messages.append({
|
|
101
|
+
"role": msg_data["role"],
|
|
102
|
+
"content": msg_data["content"]
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
return messages
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def read_file_content(file_path: str) -> str:
|
|
109
|
+
"""
|
|
110
|
+
Read content from various file types
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
file_path: Path to the file to read
|
|
114
|
+
|
|
115
|
+
Returns:
|
|
116
|
+
File content as string
|
|
117
|
+
|
|
118
|
+
Raises:
|
|
119
|
+
FileNotFoundError: If file doesn't exist
|
|
120
|
+
Exception: If file cannot be read
|
|
121
|
+
"""
|
|
122
|
+
file_path_obj = Path(file_path)
|
|
123
|
+
|
|
124
|
+
if not file_path_obj.exists():
|
|
125
|
+
raise FileNotFoundError(f"File not found: {file_path}")
|
|
126
|
+
|
|
127
|
+
if not file_path_obj.is_file():
|
|
128
|
+
raise ValueError(f"Path is not a file: {file_path}")
|
|
129
|
+
|
|
130
|
+
# Try to read as text file
|
|
131
|
+
try:
|
|
132
|
+
# Try UTF-8 first
|
|
133
|
+
with open(file_path_obj, 'r', encoding='utf-8') as f:
|
|
134
|
+
return f.read()
|
|
135
|
+
except UnicodeDecodeError:
|
|
136
|
+
# Fallback to other encodings
|
|
137
|
+
for encoding in ['latin1', 'cp1252', 'iso-8859-1']:
|
|
138
|
+
try:
|
|
139
|
+
with open(file_path_obj, 'r', encoding=encoding) as f:
|
|
140
|
+
return f.read()
|
|
141
|
+
except UnicodeDecodeError:
|
|
142
|
+
continue
|
|
143
|
+
|
|
144
|
+
# If all text encodings fail, try binary read and decode
|
|
145
|
+
try:
|
|
146
|
+
with open(file_path_obj, 'rb') as f:
|
|
147
|
+
content = f.read()
|
|
148
|
+
# Try to decode as text
|
|
149
|
+
return content.decode('utf-8', errors='ignore')
|
|
150
|
+
except Exception as e:
|
|
151
|
+
raise Exception(f"Cannot read file {file_path}: {e}")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def parse_conversation_text(text: str) -> list[dict]:
|
|
155
|
+
"""
|
|
156
|
+
Parse conversation text into message format
|
|
157
|
+
|
|
158
|
+
Expected formats:
|
|
159
|
+
- "USER: message\nASSISTANT: response\n..."
|
|
160
|
+
- "user: message\nassistant: response\n..."
|
|
161
|
+
- "[USER]: message\n[ASSISTANT]: response\n..."
|
|
162
|
+
"""
|
|
163
|
+
messages = []
|
|
164
|
+
lines = text.strip().split('\n')
|
|
165
|
+
|
|
166
|
+
current_role = None
|
|
167
|
+
current_content = []
|
|
168
|
+
|
|
169
|
+
for line in lines:
|
|
170
|
+
line = line.strip()
|
|
171
|
+
if not line:
|
|
172
|
+
continue
|
|
173
|
+
|
|
174
|
+
# Check for role indicators
|
|
175
|
+
role_found = None
|
|
176
|
+
content_start = None
|
|
177
|
+
|
|
178
|
+
# Try different formats
|
|
179
|
+
for prefix in ['USER:', 'ASSISTANT:', 'SYSTEM:', '[USER]:', '[ASSISTANT]:', '[SYSTEM]:', 'user:', 'assistant:', 'system:']:
|
|
180
|
+
if line.upper().startswith(prefix.upper()):
|
|
181
|
+
role_found = prefix.replace(':', '').replace('[', '').replace(']', '').lower()
|
|
182
|
+
content_start = line[len(prefix):].strip()
|
|
183
|
+
break
|
|
184
|
+
|
|
185
|
+
if role_found:
|
|
186
|
+
# Save previous message if exists
|
|
187
|
+
if current_role and current_content:
|
|
188
|
+
messages.append({
|
|
189
|
+
'role': current_role,
|
|
190
|
+
'content': '\n'.join(current_content).strip()
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
# Start new message
|
|
194
|
+
current_role = role_found
|
|
195
|
+
current_content = [content_start] if content_start else []
|
|
196
|
+
else:
|
|
197
|
+
# Continue current message
|
|
198
|
+
if current_role:
|
|
199
|
+
current_content.append(line)
|
|
200
|
+
|
|
201
|
+
# Save final message
|
|
202
|
+
if current_role and current_content:
|
|
203
|
+
messages.append({
|
|
204
|
+
'role': current_role,
|
|
205
|
+
'content': '\n'.join(current_content).strip()
|
|
206
|
+
})
|
|
207
|
+
|
|
208
|
+
return messages
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def format_intent_output(result, format_type: str, conversation_mode: bool = False, analysis_time: float = None) -> str:
|
|
212
|
+
"""Format intent analysis output for display"""
|
|
213
|
+
|
|
214
|
+
if format_type == "json":
|
|
215
|
+
if conversation_mode and isinstance(result, dict):
|
|
216
|
+
# Multiple participants
|
|
217
|
+
return json.dumps(result, indent=2, default=str)
|
|
218
|
+
else:
|
|
219
|
+
# Single analysis
|
|
220
|
+
return json.dumps(result.dict() if hasattr(result, 'dict') else result, indent=2, default=str)
|
|
221
|
+
|
|
222
|
+
elif format_type == "yaml":
|
|
223
|
+
if conversation_mode and isinstance(result, dict):
|
|
224
|
+
# Multiple participants
|
|
225
|
+
return yaml.dump(result, default_flow_style=False, default=str)
|
|
226
|
+
else:
|
|
227
|
+
# Single analysis
|
|
228
|
+
return yaml.dump(result.dict() if hasattr(result, 'dict') else result, default_flow_style=False, default=str)
|
|
229
|
+
|
|
230
|
+
elif format_type == "plain":
|
|
231
|
+
output_lines = []
|
|
232
|
+
|
|
233
|
+
if conversation_mode and isinstance(result, dict):
|
|
234
|
+
# Multiple participants
|
|
235
|
+
output_lines.append("🎯 CONVERSATION INTENT ANALYSIS")
|
|
236
|
+
output_lines.append("=" * 50)
|
|
237
|
+
|
|
238
|
+
for participant, analysis in result.items():
|
|
239
|
+
output_lines.append(f"\n👤 PARTICIPANT: {participant.upper()}")
|
|
240
|
+
output_lines.append("-" * 30)
|
|
241
|
+
output_lines.extend(_format_single_analysis_plain(analysis, analysis_time))
|
|
242
|
+
else:
|
|
243
|
+
# Single analysis
|
|
244
|
+
output_lines.append("🎯 INTENT ANALYSIS")
|
|
245
|
+
output_lines.append("=" * 40)
|
|
246
|
+
output_lines.extend(_format_single_analysis_plain(result, analysis_time))
|
|
247
|
+
|
|
248
|
+
return "\n".join(output_lines)
|
|
249
|
+
|
|
250
|
+
else:
|
|
251
|
+
raise ValueError(f"Unknown format: {format_type}")
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _format_single_analysis_plain(analysis, analysis_time: float = None) -> list[str]:
|
|
255
|
+
"""Format a single intent analysis in plain text"""
|
|
256
|
+
lines = []
|
|
257
|
+
|
|
258
|
+
# Primary Intent
|
|
259
|
+
lines.append(f"\n🎯 PRIMARY INTENT: {analysis.primary_intent.intent_type.value.replace('_', ' ').title()}")
|
|
260
|
+
lines.append(f" Description: {analysis.primary_intent.description}")
|
|
261
|
+
lines.append(f" Underlying Goal: {analysis.primary_intent.underlying_goal}")
|
|
262
|
+
lines.append(f" Emotional Undertone: {analysis.primary_intent.emotional_undertone}")
|
|
263
|
+
lines.append(f" Confidence: {analysis.primary_intent.confidence:.2f}")
|
|
264
|
+
lines.append(f" Urgency Level: {analysis.primary_intent.urgency_level:.2f}")
|
|
265
|
+
|
|
266
|
+
# Deception Analysis for Primary Intent (always included)
|
|
267
|
+
if analysis.primary_intent.deception_analysis:
|
|
268
|
+
deception = analysis.primary_intent.deception_analysis
|
|
269
|
+
lines.append(f"\n🔍 DECEPTION ANALYSIS:")
|
|
270
|
+
lines.append(f" Deception Likelihood: {deception.deception_likelihood:.2f}")
|
|
271
|
+
lines.append(f" Narrative Consistency: {deception.narrative_consistency:.2f}")
|
|
272
|
+
lines.append(f" Temporal Coherence: {deception.temporal_coherence:.2f}")
|
|
273
|
+
lines.append(f" Emotional Congruence: {deception.emotional_congruence:.2f}")
|
|
274
|
+
|
|
275
|
+
if deception.linguistic_markers:
|
|
276
|
+
lines.append(f" Linguistic Markers: {', '.join(deception.linguistic_markers)}")
|
|
277
|
+
|
|
278
|
+
if deception.deception_evidence:
|
|
279
|
+
lines.append(f" Evidence Indicating Deception:")
|
|
280
|
+
for evidence in deception.deception_evidence:
|
|
281
|
+
lines.append(f" • {evidence}")
|
|
282
|
+
|
|
283
|
+
if deception.authenticity_evidence:
|
|
284
|
+
lines.append(f" Evidence Indicating Authenticity:")
|
|
285
|
+
for evidence in deception.authenticity_evidence:
|
|
286
|
+
lines.append(f" • {evidence}")
|
|
287
|
+
|
|
288
|
+
# Secondary Intents
|
|
289
|
+
if analysis.secondary_intents:
|
|
290
|
+
lines.append(f"\n🔄 SECONDARY INTENTS ({len(analysis.secondary_intents)}):")
|
|
291
|
+
for i, intent in enumerate(analysis.secondary_intents, 1):
|
|
292
|
+
lines.append(f" {i}. {intent.intent_type.value.replace('_', ' ').title()}")
|
|
293
|
+
lines.append(f" Goal: {intent.underlying_goal}")
|
|
294
|
+
lines.append(f" Confidence: {intent.confidence:.2f}")
|
|
295
|
+
|
|
296
|
+
# Deception analysis for secondary intents (always included)
|
|
297
|
+
if intent.deception_analysis:
|
|
298
|
+
deception = intent.deception_analysis
|
|
299
|
+
lines.append(f" Deception Likelihood: {deception.deception_likelihood:.2f}")
|
|
300
|
+
if deception.linguistic_markers:
|
|
301
|
+
lines.append(f" Linguistic Markers: {', '.join(deception.linguistic_markers[:2])}") # Limit for brevity
|
|
302
|
+
|
|
303
|
+
# Analysis Metadata
|
|
304
|
+
lines.append(f"\n📊 ANALYSIS METADATA:")
|
|
305
|
+
lines.append(f" Intent Complexity: {analysis.intent_complexity:.2f}")
|
|
306
|
+
lines.append(f" Overall Confidence: {analysis.overall_confidence:.2f}")
|
|
307
|
+
lines.append(f" Words Analyzed: {analysis.word_count_analyzed:,}")
|
|
308
|
+
lines.append(f" Analysis Depth: {analysis.analysis_depth.value.title()}")
|
|
309
|
+
lines.append(f" Context Type: {analysis.context_type.value.title()}")
|
|
310
|
+
if analysis_time is not None:
|
|
311
|
+
lines.append(f" Analysis Time: {analysis_time:.1f}s")
|
|
312
|
+
|
|
313
|
+
# Contextual Factors
|
|
314
|
+
if analysis.contextual_factors:
|
|
315
|
+
lines.append(f"\n🌍 CONTEXTUAL FACTORS:")
|
|
316
|
+
for factor in analysis.contextual_factors:
|
|
317
|
+
lines.append(f" • {factor}")
|
|
318
|
+
|
|
319
|
+
# Response Approach
|
|
320
|
+
lines.append(f"\n💡 SUGGESTED RESPONSE APPROACH:")
|
|
321
|
+
lines.append(f" {analysis.suggested_response_approach}")
|
|
322
|
+
|
|
323
|
+
return lines
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def main():
|
|
327
|
+
"""Main CLI function"""
|
|
328
|
+
parser = argparse.ArgumentParser(
|
|
329
|
+
description="AbstractCore Intent Analyzer - Identify and analyze intents behind text",
|
|
330
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
331
|
+
epilog="""
|
|
332
|
+
Examples:
|
|
333
|
+
%(prog)s "I was wondering if you could help me understand this concept?"
|
|
334
|
+
%(prog)s document.txt --depth comprehensive --verbose
|
|
335
|
+
%(prog)s conversation.txt --conversation-mode --focus-participant user
|
|
336
|
+
%(prog)s email.txt --context document --focus "business objectives" --output analysis.json
|
|
337
|
+
"""
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
# Required argument
|
|
341
|
+
parser.add_argument(
|
|
342
|
+
'input',
|
|
343
|
+
help='Text to analyze or file path containing text'
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
# Analysis configuration
|
|
347
|
+
parser.add_argument(
|
|
348
|
+
'--context',
|
|
349
|
+
choices=['standalone', 'conversational', 'document', 'interactive'],
|
|
350
|
+
default='standalone',
|
|
351
|
+
help='Context type for analysis (default: standalone)'
|
|
352
|
+
)
|
|
353
|
+
|
|
354
|
+
parser.add_argument(
|
|
355
|
+
'--depth',
|
|
356
|
+
choices=['surface', 'underlying', 'comprehensive'],
|
|
357
|
+
default='underlying',
|
|
358
|
+
help='Analysis depth (default: underlying)'
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
parser.add_argument(
|
|
363
|
+
'--focus',
|
|
364
|
+
type=str,
|
|
365
|
+
help='Specific focus area for intent analysis (e.g., "business motivations", "emotional drivers")'
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
# Conversation mode
|
|
369
|
+
parser.add_argument(
|
|
370
|
+
'--conversation-mode',
|
|
371
|
+
action='store_true',
|
|
372
|
+
help='Analyze as conversation with multiple participants'
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
parser.add_argument(
|
|
376
|
+
'--focus-participant',
|
|
377
|
+
type=str,
|
|
378
|
+
help='In conversation mode, focus on specific participant (user, assistant, etc.)'
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
# Output options
|
|
382
|
+
parser.add_argument(
|
|
383
|
+
'--format',
|
|
384
|
+
choices=['json', 'yaml', 'plain'],
|
|
385
|
+
default='json',
|
|
386
|
+
help='Output format (default: json)'
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
parser.add_argument(
|
|
390
|
+
'--output',
|
|
391
|
+
type=str,
|
|
392
|
+
help='Output file path (optional, prints to console if not provided)'
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
# Processing options
|
|
396
|
+
parser.add_argument(
|
|
397
|
+
'--chunk-size',
|
|
398
|
+
type=int,
|
|
399
|
+
default=8000,
|
|
400
|
+
help='Chunk size in characters for long documents (default: 8000, max: 32000)'
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
# LLM configuration
|
|
404
|
+
parser.add_argument(
|
|
405
|
+
'--provider',
|
|
406
|
+
type=str,
|
|
407
|
+
help='LLM provider (requires --model)'
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
parser.add_argument(
|
|
411
|
+
'--model',
|
|
412
|
+
type=str,
|
|
413
|
+
help='LLM model (requires --provider)'
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
parser.add_argument(
|
|
417
|
+
'--max-tokens',
|
|
418
|
+
type=int,
|
|
419
|
+
default=32000,
|
|
420
|
+
help='Maximum total tokens for LLM context (default: 32000)'
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
parser.add_argument(
|
|
424
|
+
'--max-output-tokens',
|
|
425
|
+
type=int,
|
|
426
|
+
default=8000,
|
|
427
|
+
help='Maximum tokens for LLM output generation (default: 8000)'
|
|
428
|
+
)
|
|
429
|
+
|
|
430
|
+
# Other options
|
|
431
|
+
parser.add_argument(
|
|
432
|
+
'--verbose',
|
|
433
|
+
action='store_true',
|
|
434
|
+
help='Show detailed progress information'
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
parser.add_argument(
|
|
438
|
+
'--debug',
|
|
439
|
+
action='store_true',
|
|
440
|
+
help='Show debug information including raw LLM responses and JSON parsing details'
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
parser.add_argument(
|
|
444
|
+
'--timeout',
|
|
445
|
+
type=int,
|
|
446
|
+
default=300,
|
|
447
|
+
help='HTTP timeout for LLM providers in seconds (default: 300)'
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
args = parser.parse_args()
|
|
451
|
+
|
|
452
|
+
# Validate arguments
|
|
453
|
+
if args.chunk_size > 32000:
|
|
454
|
+
print("❌ Error: chunk-size cannot exceed 32000 characters")
|
|
455
|
+
sys.exit(1)
|
|
456
|
+
|
|
457
|
+
if (args.provider and not args.model) or (args.model and not args.provider):
|
|
458
|
+
print("❌ Error: Both --provider and --model must be specified together")
|
|
459
|
+
sys.exit(1)
|
|
460
|
+
|
|
461
|
+
# Note: We'll validate focus_participant after we potentially auto-enable conversation_mode for session files
|
|
462
|
+
|
|
463
|
+
try:
|
|
464
|
+
# Determine if input is a file or direct text
|
|
465
|
+
input_text = ""
|
|
466
|
+
messages_from_session = None
|
|
467
|
+
|
|
468
|
+
if Path(args.input).exists():
|
|
469
|
+
file_path = Path(args.input)
|
|
470
|
+
if args.verbose:
|
|
471
|
+
print(f"📖 Reading file: {args.input}")
|
|
472
|
+
|
|
473
|
+
# Check if it's a session JSON file
|
|
474
|
+
if file_path.suffix.lower() == '.json':
|
|
475
|
+
try:
|
|
476
|
+
# Try to read as session file first
|
|
477
|
+
messages_from_session = read_session_file(args.input)
|
|
478
|
+
if args.verbose:
|
|
479
|
+
print(f"📋 Detected session file with {len(messages_from_session)} messages")
|
|
480
|
+
print("🔄 Automatically enabling conversation mode")
|
|
481
|
+
args.conversation_mode = True # Auto-enable conversation mode for session files
|
|
482
|
+
except (json.JSONDecodeError, KeyError):
|
|
483
|
+
# If it fails, fall back to reading as text
|
|
484
|
+
if args.verbose:
|
|
485
|
+
print("📄 JSON file doesn't appear to be a session file, reading as text")
|
|
486
|
+
input_text = read_file_content(args.input)
|
|
487
|
+
else:
|
|
488
|
+
# Regular text file
|
|
489
|
+
input_text = read_file_content(args.input)
|
|
490
|
+
else:
|
|
491
|
+
# Treat as direct text input
|
|
492
|
+
input_text = args.input
|
|
493
|
+
|
|
494
|
+
# Validate we have content to analyze
|
|
495
|
+
if not messages_from_session and not input_text.strip():
|
|
496
|
+
print("❌ Error: No text content to analyze")
|
|
497
|
+
sys.exit(1)
|
|
498
|
+
|
|
499
|
+
if args.verbose and input_text:
|
|
500
|
+
print(f"📝 Text length: {len(input_text)} characters")
|
|
501
|
+
|
|
502
|
+
# Now validate focus_participant after potentially auto-enabling conversation_mode
|
|
503
|
+
if args.focus_participant and not args.conversation_mode:
|
|
504
|
+
print("❌ Error: --focus-participant requires --conversation-mode")
|
|
505
|
+
sys.exit(1)
|
|
506
|
+
|
|
507
|
+
# Get LLM configuration
|
|
508
|
+
if args.provider and args.model:
|
|
509
|
+
provider = args.provider
|
|
510
|
+
model = args.model
|
|
511
|
+
else:
|
|
512
|
+
# Use app defaults
|
|
513
|
+
provider, model = get_app_defaults('intent')
|
|
514
|
+
|
|
515
|
+
if args.verbose:
|
|
516
|
+
print(f"🤖 Using LLM: {provider}/{model}")
|
|
517
|
+
|
|
518
|
+
# Create LLM instance
|
|
519
|
+
llm = create_llm(
|
|
520
|
+
provider=provider,
|
|
521
|
+
model=model,
|
|
522
|
+
max_tokens=args.max_tokens,
|
|
523
|
+
max_output_tokens=args.max_output_tokens,
|
|
524
|
+
timeout=args.timeout
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
# Create intent analyzer
|
|
528
|
+
analyzer = BasicIntentAnalyzer(
|
|
529
|
+
llm=llm,
|
|
530
|
+
max_chunk_size=args.chunk_size,
|
|
531
|
+
max_tokens=args.max_tokens,
|
|
532
|
+
max_output_tokens=args.max_output_tokens,
|
|
533
|
+
timeout=args.timeout,
|
|
534
|
+
debug=args.debug
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
# Convert string enums
|
|
538
|
+
context_type = IntentContext(args.context)
|
|
539
|
+
depth = IntentDepth(args.depth)
|
|
540
|
+
|
|
541
|
+
# Perform analysis
|
|
542
|
+
start_time = time.time()
|
|
543
|
+
|
|
544
|
+
if args.conversation_mode:
|
|
545
|
+
if args.verbose:
|
|
546
|
+
print("🗣️ Analyzing conversation intents...")
|
|
547
|
+
|
|
548
|
+
# Use messages from session file or parse from text
|
|
549
|
+
if messages_from_session:
|
|
550
|
+
messages = messages_from_session
|
|
551
|
+
else:
|
|
552
|
+
# Parse conversation from text
|
|
553
|
+
messages = parse_conversation_text(input_text)
|
|
554
|
+
|
|
555
|
+
if not messages:
|
|
556
|
+
print("❌ Error: Could not parse conversation format. Expected format:")
|
|
557
|
+
print("USER: message\\nASSISTANT: response\\n...")
|
|
558
|
+
sys.exit(1)
|
|
559
|
+
|
|
560
|
+
if args.verbose:
|
|
561
|
+
print(f"📋 Parsed {len(messages)} messages")
|
|
562
|
+
|
|
563
|
+
# Analyze conversation intents (deception analysis always included)
|
|
564
|
+
result = analyzer.analyze_conversation_intents(
|
|
565
|
+
messages=messages,
|
|
566
|
+
focus_participant=args.focus_participant,
|
|
567
|
+
depth=depth
|
|
568
|
+
)
|
|
569
|
+
else:
|
|
570
|
+
if args.verbose:
|
|
571
|
+
print("🎯 Analyzing text intents...")
|
|
572
|
+
|
|
573
|
+
# Analyze single text (deception analysis always included)
|
|
574
|
+
result = analyzer.analyze_intent(
|
|
575
|
+
text=input_text,
|
|
576
|
+
context_type=context_type,
|
|
577
|
+
depth=depth,
|
|
578
|
+
focus=args.focus
|
|
579
|
+
)
|
|
580
|
+
|
|
581
|
+
analysis_time = time.time() - start_time
|
|
582
|
+
|
|
583
|
+
if args.verbose:
|
|
584
|
+
print(f"✅ Analysis completed in {analysis_time:.1f} seconds")
|
|
585
|
+
|
|
586
|
+
# Format output
|
|
587
|
+
formatted_output = format_intent_output(result, args.format, args.conversation_mode, analysis_time)
|
|
588
|
+
|
|
589
|
+
# Save or print result
|
|
590
|
+
if args.output:
|
|
591
|
+
output_path = Path(args.output)
|
|
592
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
593
|
+
|
|
594
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
595
|
+
f.write(formatted_output)
|
|
596
|
+
|
|
597
|
+
if args.verbose:
|
|
598
|
+
print(f"💾 Results saved to: {args.output}")
|
|
599
|
+
else:
|
|
600
|
+
print(formatted_output)
|
|
601
|
+
|
|
602
|
+
except KeyboardInterrupt:
|
|
603
|
+
print("\n❌ Analysis interrupted by user")
|
|
604
|
+
sys.exit(1)
|
|
605
|
+
except Exception as e:
|
|
606
|
+
print(f"❌ Error during intent analysis: {e}")
|
|
607
|
+
if args.verbose:
|
|
608
|
+
import traceback
|
|
609
|
+
traceback.print_exc()
|
|
610
|
+
sys.exit(1)
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
if __name__ == "__main__":
|
|
614
|
+
main()
|