abstractcore 2.5.0__py3-none-any.whl → 2.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +12 -0
- abstractcore/apps/__main__.py +8 -1
- abstractcore/apps/deepsearch.py +644 -0
- abstractcore/apps/intent.py +614 -0
- abstractcore/architectures/detection.py +250 -4
- abstractcore/assets/architecture_formats.json +14 -1
- abstractcore/assets/model_capabilities.json +583 -44
- abstractcore/compression/__init__.py +29 -0
- abstractcore/compression/analytics.py +420 -0
- abstractcore/compression/cache.py +250 -0
- abstractcore/compression/config.py +279 -0
- abstractcore/compression/exceptions.py +30 -0
- abstractcore/compression/glyph_processor.py +381 -0
- abstractcore/compression/optimizer.py +388 -0
- abstractcore/compression/orchestrator.py +380 -0
- abstractcore/compression/pil_text_renderer.py +818 -0
- abstractcore/compression/quality.py +226 -0
- abstractcore/compression/text_formatter.py +666 -0
- abstractcore/compression/vision_compressor.py +371 -0
- abstractcore/config/main.py +66 -1
- abstractcore/config/manager.py +111 -5
- abstractcore/core/session.py +105 -5
- abstractcore/events/__init__.py +1 -1
- abstractcore/media/auto_handler.py +312 -18
- abstractcore/media/handlers/local_handler.py +14 -2
- abstractcore/media/handlers/openai_handler.py +62 -3
- abstractcore/media/processors/__init__.py +11 -1
- abstractcore/media/processors/direct_pdf_processor.py +210 -0
- abstractcore/media/processors/glyph_pdf_processor.py +227 -0
- abstractcore/media/processors/image_processor.py +7 -1
- abstractcore/media/processors/text_processor.py +18 -3
- abstractcore/media/types.py +164 -7
- abstractcore/processing/__init__.py +5 -1
- abstractcore/processing/basic_deepsearch.py +2173 -0
- abstractcore/processing/basic_intent.py +690 -0
- abstractcore/providers/__init__.py +18 -0
- abstractcore/providers/anthropic_provider.py +29 -2
- abstractcore/providers/base.py +279 -6
- abstractcore/providers/huggingface_provider.py +658 -27
- abstractcore/providers/lmstudio_provider.py +52 -2
- abstractcore/providers/mlx_provider.py +103 -4
- abstractcore/providers/model_capabilities.py +352 -0
- abstractcore/providers/ollama_provider.py +44 -6
- abstractcore/providers/openai_provider.py +29 -2
- abstractcore/providers/registry.py +91 -19
- abstractcore/server/app.py +91 -81
- abstractcore/structured/handler.py +161 -1
- abstractcore/tools/common_tools.py +98 -3
- abstractcore/utils/__init__.py +4 -1
- abstractcore/utils/cli.py +114 -1
- abstractcore/utils/trace_export.py +287 -0
- abstractcore/utils/version.py +1 -1
- abstractcore/utils/vlm_token_calculator.py +655 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/METADATA +140 -23
- abstractcore-2.5.3.dist-info/RECORD +107 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/entry_points.txt +4 -0
- abstractcore-2.5.0.dist-info/RECORD +0 -86
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/WHEEL +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.5.0.dist-info → abstractcore-2.5.3.dist-info}/top_level.txt +0 -0
|
@@ -6,6 +6,7 @@ import json
|
|
|
6
6
|
import re
|
|
7
7
|
import time
|
|
8
8
|
from typing import Type, Dict, Any, Optional
|
|
9
|
+
from enum import Enum
|
|
9
10
|
from pydantic import BaseModel, ValidationError
|
|
10
11
|
|
|
11
12
|
from .retry import FeedbackRetry
|
|
@@ -69,6 +70,9 @@ class StructuredOutputHandler:
|
|
|
69
70
|
max_retries=self.retry_strategy.max_attempts)
|
|
70
71
|
|
|
71
72
|
try:
|
|
73
|
+
# Store provider for schema generation
|
|
74
|
+
self.current_provider = provider
|
|
75
|
+
|
|
72
76
|
# Strategy 1: Use native support if available
|
|
73
77
|
if self._has_native_support(provider):
|
|
74
78
|
self.logger.debug("Using native structured output support",
|
|
@@ -125,12 +129,44 @@ class StructuredOutputHandler:
|
|
|
125
129
|
"""
|
|
126
130
|
Check if provider has native structured output support.
|
|
127
131
|
|
|
132
|
+
Checks both provider type (Ollama, LMStudio, HuggingFace, MLX with Outlines)
|
|
133
|
+
and model capabilities configuration as fallback.
|
|
134
|
+
|
|
128
135
|
Args:
|
|
129
136
|
provider: The LLM provider instance
|
|
130
137
|
|
|
131
138
|
Returns:
|
|
132
139
|
True if provider supports native structured outputs
|
|
133
140
|
"""
|
|
141
|
+
# Ollama and LMStudio always support native structured outputs
|
|
142
|
+
# via the format and response_format parameters respectively
|
|
143
|
+
provider_name = provider.__class__.__name__
|
|
144
|
+
if provider_name in ['OllamaProvider', 'LMStudioProvider']:
|
|
145
|
+
return True
|
|
146
|
+
|
|
147
|
+
# HuggingFaceProvider supports native via GGUF or Transformers+Outlines
|
|
148
|
+
if provider_name == 'HuggingFaceProvider':
|
|
149
|
+
# Check if it's a GGUF model - these use llama-cpp-python which supports native structured outputs
|
|
150
|
+
if hasattr(provider, 'model_type') and provider.model_type == 'gguf':
|
|
151
|
+
return True
|
|
152
|
+
|
|
153
|
+
# Check if it's a Transformers model with Outlines available
|
|
154
|
+
if hasattr(provider, 'model_type') and provider.model_type == 'transformers':
|
|
155
|
+
try:
|
|
156
|
+
import outlines
|
|
157
|
+
return True
|
|
158
|
+
except ImportError:
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
# MLXProvider supports native via Outlines
|
|
162
|
+
if provider_name == 'MLXProvider':
|
|
163
|
+
try:
|
|
164
|
+
import outlines
|
|
165
|
+
return True
|
|
166
|
+
except ImportError:
|
|
167
|
+
return False
|
|
168
|
+
|
|
169
|
+
# For other providers, check model capabilities
|
|
134
170
|
capabilities = getattr(provider, 'model_capabilities', {})
|
|
135
171
|
return capabilities.get("structured_output") == "native"
|
|
136
172
|
|
|
@@ -242,6 +278,9 @@ class StructuredOutputHandler:
|
|
|
242
278
|
# Try parsing the extracted JSON
|
|
243
279
|
try:
|
|
244
280
|
data = json.loads(json_content)
|
|
281
|
+
# Preprocess enum responses if we have mappings
|
|
282
|
+
if hasattr(self, '_enum_mappings') and self._enum_mappings:
|
|
283
|
+
data = self._preprocess_enum_response(data, self._enum_mappings)
|
|
245
284
|
result = response_model.model_validate(data)
|
|
246
285
|
except (json.JSONDecodeError, ValidationError) as parse_error:
|
|
247
286
|
# Try to fix the JSON
|
|
@@ -254,6 +293,9 @@ class StructuredOutputHandler:
|
|
|
254
293
|
if fixed_json:
|
|
255
294
|
try:
|
|
256
295
|
data = json.loads(fixed_json)
|
|
296
|
+
# Preprocess enum responses if we have mappings
|
|
297
|
+
if hasattr(self, '_enum_mappings') and self._enum_mappings:
|
|
298
|
+
data = self._preprocess_enum_response(data, self._enum_mappings)
|
|
257
299
|
result = response_model.model_validate(data)
|
|
258
300
|
self.logger.info("JSON self-fix successful", attempt=attempt + 1)
|
|
259
301
|
except (json.JSONDecodeError, ValidationError) as fix_error:
|
|
@@ -350,6 +392,14 @@ class StructuredOutputHandler:
|
|
|
350
392
|
Enhanced prompt with schema information
|
|
351
393
|
"""
|
|
352
394
|
schema = response_model.model_json_schema()
|
|
395
|
+
|
|
396
|
+
# For prompted providers, simplify enum schemas to avoid LLM confusion
|
|
397
|
+
# Store original enum mappings for response preprocessing
|
|
398
|
+
if hasattr(self, 'current_provider') and not self._has_native_support(self.current_provider):
|
|
399
|
+
schema, self._enum_mappings = self._simplify_enum_schemas(schema)
|
|
400
|
+
else:
|
|
401
|
+
self._enum_mappings = {}
|
|
402
|
+
|
|
353
403
|
model_name = response_model.__name__
|
|
354
404
|
|
|
355
405
|
# Create example from schema
|
|
@@ -432,4 +482,114 @@ Important: Return ONLY the JSON object, no additional text or formatting."""
|
|
|
432
482
|
return match.group(0)
|
|
433
483
|
|
|
434
484
|
# If nothing found, try the original content
|
|
435
|
-
return content
|
|
485
|
+
return content
|
|
486
|
+
|
|
487
|
+
def _simplify_enum_schemas(self, schema: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Dict[str, str]]]:
|
|
488
|
+
"""
|
|
489
|
+
Simplify enum schemas for prompted providers while preserving enum mappings.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
schema: Original JSON schema
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
Tuple of (simplified_schema, enum_mappings)
|
|
496
|
+
enum_mappings maps field_paths to {enum_notation: enum_value}
|
|
497
|
+
"""
|
|
498
|
+
if '$defs' not in schema:
|
|
499
|
+
return schema, {}
|
|
500
|
+
|
|
501
|
+
# Find enum definitions and build mappings
|
|
502
|
+
enum_mappings = {}
|
|
503
|
+
enum_refs_to_simplify = {}
|
|
504
|
+
|
|
505
|
+
for def_name, def_schema in schema['$defs'].items():
|
|
506
|
+
if def_schema.get('type') == 'string' and 'enum' in def_schema:
|
|
507
|
+
ref_key = f"#/$defs/{def_name}"
|
|
508
|
+
enum_values = def_schema['enum']
|
|
509
|
+
|
|
510
|
+
# Build mapping from Python enum notation to actual values
|
|
511
|
+
enum_class_name = def_name
|
|
512
|
+
field_mappings = {}
|
|
513
|
+
for value in enum_values:
|
|
514
|
+
# Map both "EnumClass.VALUE_NAME" and "<EnumClass.VALUE_NAME: 'value'>" patterns
|
|
515
|
+
enum_notation = f"{enum_class_name}.{value.upper().replace(' ', '_')}"
|
|
516
|
+
field_mappings[enum_notation] = value
|
|
517
|
+
# Also handle the repr format
|
|
518
|
+
repr_notation = f"<{enum_class_name}.{value.upper().replace(' ', '_')}: '{value}'>"
|
|
519
|
+
field_mappings[repr_notation] = value
|
|
520
|
+
|
|
521
|
+
enum_refs_to_simplify[ref_key] = {
|
|
522
|
+
'type': 'string',
|
|
523
|
+
'description': f"Use one of: {', '.join(enum_values)}. IMPORTANT: Use the exact string values, not Python enum notation.",
|
|
524
|
+
'enum': enum_values
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
# Store mappings by reference for later use
|
|
528
|
+
enum_mappings[ref_key] = field_mappings
|
|
529
|
+
|
|
530
|
+
# Create simplified schema by replacing enum references
|
|
531
|
+
def replace_enum_refs(obj, path=""):
|
|
532
|
+
if isinstance(obj, dict):
|
|
533
|
+
if '$ref' in obj and obj['$ref'] in enum_refs_to_simplify:
|
|
534
|
+
# Store the field path for this enum reference
|
|
535
|
+
if path:
|
|
536
|
+
enum_mappings[path] = enum_mappings[obj['$ref']]
|
|
537
|
+
return enum_refs_to_simplify[obj['$ref']]
|
|
538
|
+
return {k: replace_enum_refs(v, f"{path}.{k}" if path else k) for k, v in obj.items()}
|
|
539
|
+
elif isinstance(obj, list):
|
|
540
|
+
return [replace_enum_refs(item, path) for item in obj]
|
|
541
|
+
return obj
|
|
542
|
+
|
|
543
|
+
simplified_schema = replace_enum_refs(schema)
|
|
544
|
+
|
|
545
|
+
# Remove the $defs section since we've inlined the enum definitions
|
|
546
|
+
if '$defs' in simplified_schema:
|
|
547
|
+
# Only remove enum definitions, keep other definitions
|
|
548
|
+
remaining_defs = {k: v for k, v in simplified_schema['$defs'].items()
|
|
549
|
+
if not (v.get('type') == 'string' and 'enum' in v)}
|
|
550
|
+
if remaining_defs:
|
|
551
|
+
simplified_schema['$defs'] = remaining_defs
|
|
552
|
+
else:
|
|
553
|
+
del simplified_schema['$defs']
|
|
554
|
+
|
|
555
|
+
return simplified_schema, enum_mappings
|
|
556
|
+
|
|
557
|
+
def _preprocess_enum_response(self, data: Dict[str, Any], enum_mappings: Dict[str, Dict[str, str]]) -> Dict[str, Any]:
|
|
558
|
+
"""
|
|
559
|
+
Preprocess LLM response to convert Python enum notation back to valid enum values.
|
|
560
|
+
|
|
561
|
+
Args:
|
|
562
|
+
data: Parsed JSON data from LLM
|
|
563
|
+
enum_mappings: Mappings from field paths to enum notation conversions
|
|
564
|
+
|
|
565
|
+
Returns:
|
|
566
|
+
Preprocessed data with enum notations converted to valid values
|
|
567
|
+
"""
|
|
568
|
+
if not enum_mappings:
|
|
569
|
+
return data
|
|
570
|
+
|
|
571
|
+
def convert_enum_values(obj, path=""):
|
|
572
|
+
if isinstance(obj, dict):
|
|
573
|
+
result = {}
|
|
574
|
+
for key, value in obj.items():
|
|
575
|
+
field_path = f"{path}.{key}" if path else key
|
|
576
|
+
|
|
577
|
+
# Check if this field has enum mappings
|
|
578
|
+
field_mappings = None
|
|
579
|
+
for enum_path, mappings in enum_mappings.items():
|
|
580
|
+
if field_path in enum_path or enum_path in field_path:
|
|
581
|
+
field_mappings = mappings
|
|
582
|
+
break
|
|
583
|
+
|
|
584
|
+
if field_mappings and isinstance(value, str):
|
|
585
|
+
# Try to convert enum notation to actual value
|
|
586
|
+
converted_value = field_mappings.get(value, value)
|
|
587
|
+
result[key] = converted_value
|
|
588
|
+
else:
|
|
589
|
+
result[key] = convert_enum_values(value, field_path)
|
|
590
|
+
return result
|
|
591
|
+
elif isinstance(obj, list):
|
|
592
|
+
return [convert_enum_values(item, path) for item in obj]
|
|
593
|
+
return obj
|
|
594
|
+
|
|
595
|
+
return convert_enum_values(data)
|
|
@@ -1277,17 +1277,23 @@ def _parse_content_by_type(content_bytes: bytes, content_type: str, url: str, ex
|
|
|
1277
1277
|
# Final fallback with error replacement
|
|
1278
1278
|
text_content = content_bytes.decode('utf-8', errors='replace')
|
|
1279
1279
|
|
|
1280
|
-
# Parse based on content type
|
|
1280
|
+
# Parse based on content type with fallback content detection
|
|
1281
1281
|
if main_type.startswith('text/html') or main_type.startswith('application/xhtml'):
|
|
1282
1282
|
return _parse_html_content(text_content, url, extract_links)
|
|
1283
1283
|
|
|
1284
1284
|
elif main_type == 'application/json':
|
|
1285
1285
|
return _parse_json_content(text_content)
|
|
1286
1286
|
|
|
1287
|
-
elif main_type in ['application/xml', 'text/xml', 'application/rss+xml', 'application/atom+xml']:
|
|
1287
|
+
elif main_type in ['application/xml', 'text/xml', 'application/rss+xml', 'application/atom+xml', 'application/soap+xml']:
|
|
1288
1288
|
return _parse_xml_content(text_content)
|
|
1289
1289
|
|
|
1290
1290
|
elif main_type.startswith('text/'):
|
|
1291
|
+
# For generic text types, check if it's actually XML or JSON
|
|
1292
|
+
if text_content and text_content.strip():
|
|
1293
|
+
if _is_xml_content(text_content):
|
|
1294
|
+
return _parse_xml_content(text_content)
|
|
1295
|
+
elif _is_json_content(text_content):
|
|
1296
|
+
return _parse_json_content(text_content)
|
|
1291
1297
|
return _parse_text_content(text_content, main_type)
|
|
1292
1298
|
|
|
1293
1299
|
elif main_type.startswith('image/'):
|
|
@@ -1305,18 +1311,107 @@ def _parse_content_by_type(content_bytes: bytes, content_type: str, url: str, ex
|
|
|
1305
1311
|
f"Content size: {len(content_bytes):,} bytes"
|
|
1306
1312
|
|
|
1307
1313
|
|
|
1314
|
+
def _is_xml_content(content: str) -> bool:
|
|
1315
|
+
"""Detect if content is XML rather than HTML."""
|
|
1316
|
+
if not content:
|
|
1317
|
+
return False
|
|
1318
|
+
|
|
1319
|
+
content_lower = content.lower().strip()
|
|
1320
|
+
|
|
1321
|
+
# Check for XML declaration
|
|
1322
|
+
if content_lower.startswith('<?xml'):
|
|
1323
|
+
return True
|
|
1324
|
+
|
|
1325
|
+
# Check for common XML root elements without HTML indicators
|
|
1326
|
+
xml_indicators = ['<rss', '<feed', '<urlset', '<sitemap', '<soap:', '<xml']
|
|
1327
|
+
html_indicators = ['<!doctype html', '<html', '<head>', '<body>', '<div', '<span', '<p>', '<a ']
|
|
1328
|
+
|
|
1329
|
+
# Look at the first 1000 characters for indicators
|
|
1330
|
+
sample = content_lower[:1000]
|
|
1331
|
+
|
|
1332
|
+
# If we find HTML indicators, it's likely HTML
|
|
1333
|
+
if any(indicator in sample for indicator in html_indicators):
|
|
1334
|
+
return False
|
|
1335
|
+
|
|
1336
|
+
# If we find XML indicators without HTML indicators, it's likely XML
|
|
1337
|
+
if any(indicator in sample for indicator in xml_indicators):
|
|
1338
|
+
return True
|
|
1339
|
+
|
|
1340
|
+
# Check if it starts with a root element that looks like XML
|
|
1341
|
+
import re
|
|
1342
|
+
root_match = re.search(r'<([^?\s/>]+)', content)
|
|
1343
|
+
if root_match:
|
|
1344
|
+
root_element = root_match.group(1).lower()
|
|
1345
|
+
# Common XML root elements that are not HTML
|
|
1346
|
+
xml_roots = ['rss', 'feed', 'urlset', 'sitemap', 'configuration', 'data', 'response']
|
|
1347
|
+
if root_element in xml_roots:
|
|
1348
|
+
return True
|
|
1349
|
+
|
|
1350
|
+
return False
|
|
1351
|
+
|
|
1352
|
+
|
|
1353
|
+
def _is_json_content(content: str) -> bool:
|
|
1354
|
+
"""Detect if content is JSON."""
|
|
1355
|
+
if not content:
|
|
1356
|
+
return False
|
|
1357
|
+
|
|
1358
|
+
content_stripped = content.strip()
|
|
1359
|
+
|
|
1360
|
+
# Quick check for JSON structure
|
|
1361
|
+
if (content_stripped.startswith('{') and content_stripped.endswith('}')) or \
|
|
1362
|
+
(content_stripped.startswith('[') and content_stripped.endswith(']')):
|
|
1363
|
+
try:
|
|
1364
|
+
import json
|
|
1365
|
+
json.loads(content_stripped)
|
|
1366
|
+
return True
|
|
1367
|
+
except (json.JSONDecodeError, ValueError):
|
|
1368
|
+
pass
|
|
1369
|
+
|
|
1370
|
+
return False
|
|
1371
|
+
|
|
1372
|
+
|
|
1373
|
+
def _get_appropriate_parser(content: str) -> str:
|
|
1374
|
+
"""Get the appropriate BeautifulSoup parser for the content."""
|
|
1375
|
+
if not BS4_AVAILABLE:
|
|
1376
|
+
return None
|
|
1377
|
+
|
|
1378
|
+
# If lxml is available and content looks like XML, use xml parser
|
|
1379
|
+
if 'lxml' in BS4_PARSER and _is_xml_content(content):
|
|
1380
|
+
try:
|
|
1381
|
+
import lxml
|
|
1382
|
+
return 'xml'
|
|
1383
|
+
except ImportError:
|
|
1384
|
+
pass
|
|
1385
|
+
|
|
1386
|
+
# Default to the configured parser (lxml or html.parser)
|
|
1387
|
+
return BS4_PARSER
|
|
1388
|
+
|
|
1389
|
+
|
|
1308
1390
|
def _parse_html_content(html_content: str, url: str, extract_links: bool = True) -> str:
|
|
1309
1391
|
"""Parse HTML content and extract meaningful information."""
|
|
1310
1392
|
if not html_content:
|
|
1311
1393
|
return "❌ No HTML content to parse"
|
|
1312
1394
|
|
|
1395
|
+
# Detect if content is actually XML (fallback detection)
|
|
1396
|
+
if _is_xml_content(html_content):
|
|
1397
|
+
return _parse_xml_content(html_content)
|
|
1398
|
+
|
|
1313
1399
|
result_parts = []
|
|
1314
1400
|
result_parts.append("🌐 HTML Document Analysis")
|
|
1315
1401
|
|
|
1316
1402
|
# Use BeautifulSoup if available for better parsing
|
|
1317
1403
|
if BS4_AVAILABLE:
|
|
1318
1404
|
try:
|
|
1319
|
-
|
|
1405
|
+
# Choose appropriate parser based on content analysis
|
|
1406
|
+
parser = _get_appropriate_parser(html_content)
|
|
1407
|
+
|
|
1408
|
+
# Suppress XML parsing warnings when using HTML parser on XML content
|
|
1409
|
+
import warnings
|
|
1410
|
+
from bs4 import XMLParsedAsHTMLWarning
|
|
1411
|
+
|
|
1412
|
+
with warnings.catch_warnings():
|
|
1413
|
+
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
|
|
1414
|
+
soup = BeautifulSoup(html_content, parser)
|
|
1320
1415
|
|
|
1321
1416
|
# Extract title
|
|
1322
1417
|
title = soup.find('title')
|
abstractcore/utils/__init__.py
CHANGED
|
@@ -13,6 +13,7 @@ from .token_utils import (
|
|
|
13
13
|
ContentType
|
|
14
14
|
)
|
|
15
15
|
from .message_preprocessor import MessagePreprocessor, parse_files, has_files
|
|
16
|
+
from .trace_export import export_traces, summarize_traces
|
|
16
17
|
|
|
17
18
|
__all__ = [
|
|
18
19
|
'configure_logging',
|
|
@@ -27,5 +28,7 @@ __all__ = [
|
|
|
27
28
|
'ContentType',
|
|
28
29
|
'MessagePreprocessor',
|
|
29
30
|
'parse_files',
|
|
30
|
-
'has_files'
|
|
31
|
+
'has_files',
|
|
32
|
+
'export_traces',
|
|
33
|
+
'summarize_traces'
|
|
31
34
|
]
|
abstractcore/utils/cli.py
CHANGED
|
@@ -39,7 +39,7 @@ except ImportError:
|
|
|
39
39
|
|
|
40
40
|
from .. import create_llm, BasicSession
|
|
41
41
|
from ..tools.common_tools import list_files, read_file, write_file, execute_command, search_files
|
|
42
|
-
from ..processing import BasicExtractor, BasicJudge
|
|
42
|
+
from ..processing import BasicExtractor, BasicJudge, BasicIntentAnalyzer
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class SimpleCLI:
|
|
@@ -193,6 +193,10 @@ class SimpleCLI:
|
|
|
193
193
|
print(" • /facts - Display in chat")
|
|
194
194
|
print(" • /facts data - Save as data.jsonld")
|
|
195
195
|
print(" /judge Evaluate conversation quality")
|
|
196
|
+
print(" /intent [participant] Analyze intents behind conversation")
|
|
197
|
+
print(" • /intent - Analyze all participants")
|
|
198
|
+
print(" • /intent user - Focus on user intents")
|
|
199
|
+
print(" • /intent assistant - Focus on assistant intents")
|
|
196
200
|
|
|
197
201
|
print("\n⚙️ CONFIGURATION")
|
|
198
202
|
print("─" * 50)
|
|
@@ -317,6 +321,17 @@ class SimpleCLI:
|
|
|
317
321
|
elif cmd == 'judge':
|
|
318
322
|
self.handle_judge()
|
|
319
323
|
|
|
324
|
+
elif cmd.startswith('intent'):
|
|
325
|
+
# Parse /intent [participant] command
|
|
326
|
+
parts = cmd.split()
|
|
327
|
+
if len(parts) == 1:
|
|
328
|
+
# No participant specified - analyze all
|
|
329
|
+
self.handle_intent(None)
|
|
330
|
+
else:
|
|
331
|
+
# Participant specified
|
|
332
|
+
participant = parts[1]
|
|
333
|
+
self.handle_intent(participant)
|
|
334
|
+
|
|
320
335
|
elif cmd.startswith('system'):
|
|
321
336
|
# Parse /system [prompt] command
|
|
322
337
|
if cmd == 'system':
|
|
@@ -623,6 +638,103 @@ class SimpleCLI:
|
|
|
623
638
|
import traceback
|
|
624
639
|
traceback.print_exc()
|
|
625
640
|
|
|
641
|
+
def handle_intent(self, focus_participant: str = None):
|
|
642
|
+
"""Handle /intent [participant] command - analyze intents behind conversation"""
|
|
643
|
+
messages = self.session.get_messages()
|
|
644
|
+
|
|
645
|
+
if len(messages) <= 1: # Only system message
|
|
646
|
+
print("📝 No conversation history to analyze intents from")
|
|
647
|
+
return
|
|
648
|
+
|
|
649
|
+
try:
|
|
650
|
+
if focus_participant:
|
|
651
|
+
print(f"🎯 Analyzing {focus_participant} intents in conversation...")
|
|
652
|
+
else:
|
|
653
|
+
print("🎯 Analyzing conversation intents for all participants...")
|
|
654
|
+
|
|
655
|
+
# Create intent analyzer using current provider for consistency
|
|
656
|
+
analyzer = BasicIntentAnalyzer(self.provider)
|
|
657
|
+
|
|
658
|
+
# Convert session messages to the format expected by intent analyzer
|
|
659
|
+
conversation_messages = [msg for msg in messages if msg.role != 'system']
|
|
660
|
+
message_dicts = [{"role": msg.role, "content": msg.content} for msg in conversation_messages]
|
|
661
|
+
|
|
662
|
+
if not message_dicts:
|
|
663
|
+
print("📝 No substantive conversation content found")
|
|
664
|
+
return
|
|
665
|
+
|
|
666
|
+
print(f" Processing {len(message_dicts)} messages...")
|
|
667
|
+
|
|
668
|
+
start_time = time.time()
|
|
669
|
+
|
|
670
|
+
# Analyze conversation intents
|
|
671
|
+
from ..processing.basic_intent import IntentDepth
|
|
672
|
+
results = analyzer.analyze_conversation_intents(
|
|
673
|
+
messages=message_dicts,
|
|
674
|
+
focus_participant=focus_participant,
|
|
675
|
+
depth=IntentDepth.UNDERLYING
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
duration = time.time() - start_time
|
|
679
|
+
print(f"✅ Intent analysis completed in {duration:.1f}s")
|
|
680
|
+
|
|
681
|
+
if not results:
|
|
682
|
+
print("❌ No intents could be analyzed from the conversation")
|
|
683
|
+
return
|
|
684
|
+
|
|
685
|
+
# Display results in a conversational format
|
|
686
|
+
print("\n🎯 CONVERSATION INTENT ANALYSIS")
|
|
687
|
+
print("=" * 60)
|
|
688
|
+
|
|
689
|
+
for participant, analysis in results.items():
|
|
690
|
+
print(f"\n👤 {participant.upper()} INTENTS:")
|
|
691
|
+
print("─" * 40)
|
|
692
|
+
|
|
693
|
+
# Primary Intent
|
|
694
|
+
primary = analysis.primary_intent
|
|
695
|
+
print(f"🎯 Primary Intent: {primary.intent_type.value.replace('_', ' ').title()}")
|
|
696
|
+
print(f" Description: {primary.description}")
|
|
697
|
+
print(f" Underlying Goal: {primary.underlying_goal}")
|
|
698
|
+
print(f" Emotional Undertone: {primary.emotional_undertone}")
|
|
699
|
+
print(f" Confidence: {primary.confidence:.2f} | Urgency: {primary.urgency_level:.2f}")
|
|
700
|
+
|
|
701
|
+
# Secondary Intents (show top 2 for brevity)
|
|
702
|
+
if analysis.secondary_intents:
|
|
703
|
+
print(f"\n🔄 Secondary Intents:")
|
|
704
|
+
for i, intent in enumerate(analysis.secondary_intents[:2], 1):
|
|
705
|
+
print(f" {i}. {intent.intent_type.value.replace('_', ' ').title()}")
|
|
706
|
+
print(f" Goal: {intent.underlying_goal}")
|
|
707
|
+
print(f" Confidence: {intent.confidence:.2f}")
|
|
708
|
+
|
|
709
|
+
# Key contextual factors (show top 3)
|
|
710
|
+
if analysis.contextual_factors:
|
|
711
|
+
print(f"\n🌍 Key Context Factors:")
|
|
712
|
+
for factor in analysis.contextual_factors[:3]:
|
|
713
|
+
print(f" • {factor}")
|
|
714
|
+
|
|
715
|
+
# Response approach
|
|
716
|
+
print(f"\n💡 Suggested Response Approach:")
|
|
717
|
+
# Truncate long response approaches for readability
|
|
718
|
+
response_approach = analysis.suggested_response_approach
|
|
719
|
+
if len(response_approach) > 200:
|
|
720
|
+
response_approach = response_approach[:197] + "..."
|
|
721
|
+
print(f" {response_approach}")
|
|
722
|
+
|
|
723
|
+
# Analysis metadata
|
|
724
|
+
print(f"\n📊 Analysis: {analysis.word_count_analyzed} words | "
|
|
725
|
+
f"Complexity: {analysis.intent_complexity:.2f} | "
|
|
726
|
+
f"Confidence: {analysis.overall_confidence:.2f} | "
|
|
727
|
+
f"Time: {duration:.1f}s")
|
|
728
|
+
|
|
729
|
+
print("\n" + "=" * 60)
|
|
730
|
+
print("💡 Note: This analysis identifies underlying motivations and goals behind communication")
|
|
731
|
+
|
|
732
|
+
except Exception as e:
|
|
733
|
+
print(f"❌ Intent analysis failed: {e}")
|
|
734
|
+
if self.debug_mode:
|
|
735
|
+
import traceback
|
|
736
|
+
traceback.print_exc()
|
|
737
|
+
|
|
626
738
|
def _format_conversation_for_extraction(self, messages):
|
|
627
739
|
"""Format conversation messages for fact extraction"""
|
|
628
740
|
formatted_lines = []
|
|
@@ -1337,6 +1449,7 @@ Key Commands:
|
|
|
1337
1449
|
/compact [focus] Compress chat history with optional focus
|
|
1338
1450
|
/facts [file] Extract knowledge facts
|
|
1339
1451
|
/judge Evaluate conversation quality
|
|
1452
|
+
/intent [participant] Analyze conversation intents and motivations
|
|
1340
1453
|
/system [prompt] View/change system prompt
|
|
1341
1454
|
|
|
1342
1455
|
Tools: list_files, search_files, read_file, write_file, execute_command
|