abstractcore 2.4.2__py3-none-any.whl → 2.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. abstractcore/apps/app_config_utils.py +19 -0
  2. abstractcore/apps/summarizer.py +85 -56
  3. abstractcore/architectures/detection.py +15 -4
  4. abstractcore/assets/architecture_formats.json +1 -1
  5. abstractcore/assets/model_capabilities.json +420 -11
  6. abstractcore/core/interface.py +2 -0
  7. abstractcore/core/session.py +4 -0
  8. abstractcore/embeddings/manager.py +54 -16
  9. abstractcore/media/__init__.py +116 -148
  10. abstractcore/media/auto_handler.py +363 -0
  11. abstractcore/media/base.py +456 -0
  12. abstractcore/media/capabilities.py +335 -0
  13. abstractcore/media/types.py +300 -0
  14. abstractcore/media/vision_fallback.py +260 -0
  15. abstractcore/providers/anthropic_provider.py +18 -1
  16. abstractcore/providers/base.py +187 -0
  17. abstractcore/providers/huggingface_provider.py +111 -12
  18. abstractcore/providers/lmstudio_provider.py +88 -5
  19. abstractcore/providers/mlx_provider.py +33 -1
  20. abstractcore/providers/ollama_provider.py +37 -3
  21. abstractcore/providers/openai_provider.py +18 -1
  22. abstractcore/server/app.py +1390 -104
  23. abstractcore/tools/common_tools.py +12 -8
  24. abstractcore/utils/__init__.py +9 -5
  25. abstractcore/utils/cli.py +199 -17
  26. abstractcore/utils/message_preprocessor.py +182 -0
  27. abstractcore/utils/structured_logging.py +117 -16
  28. abstractcore/utils/version.py +1 -1
  29. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/METADATA +214 -20
  30. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/RECORD +34 -27
  31. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/entry_points.txt +1 -0
  32. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/WHEEL +0 -0
  33. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/licenses/LICENSE +0 -0
  34. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/top_level.txt +0 -0
@@ -1,31 +1,51 @@
1
1
  """
2
- AbstractCore Server - Clean Architecture with Universal Tool Call Syntax Support
2
+ AbstractCore Server - Universal LLM Gateway with Media Processing
3
3
 
4
4
  A focused FastAPI server that provides OpenAI-compatible endpoints with support for
5
- multiple agent formats through the enhanced syntax rewriter.
5
+ multiple agent formats, tool calling, and comprehensive media processing capabilities.
6
6
 
7
7
  Key Features:
8
8
  - Universal tool call syntax conversion (OpenAI, Codex, Qwen3, LLaMA3, custom)
9
9
  - Auto-detection of target agent format
10
+ - Media processing for images, documents, and data files
11
+ - OpenAI Vision API compatible format support
12
+ - Streaming responses with media attachments
10
13
  - Clean delegation to AbstractCore
11
14
  - Proper ReAct loop support
12
15
  - Comprehensive model listing from AbstractCore providers
16
+
17
+ Media Support:
18
+ - Images: PNG, JPEG, GIF, WEBP, BMP, TIFF
19
+ - Documents: PDF, DOCX, XLSX, PPTX
20
+ - Data: CSV, TSV, JSON, XML, TXT, MD
21
+ - Size limits: 10MB per file, 32MB total per request
22
+ - Both base64 data URLs and HTTP URLs supported
13
23
  """
14
24
 
15
25
  import os
16
26
  import json
17
27
  import time
18
28
  import uuid
19
- from typing import List, Dict, Any, Optional, Literal, Union, Iterator
29
+ import base64
30
+ import tempfile
31
+ import urllib.request
32
+ import urllib.parse
33
+ import argparse
34
+ import sys
35
+ import logging
36
+ from typing import List, Dict, Any, Optional, Literal, Union, Iterator, Tuple, Annotated
20
37
  from enum import Enum
21
- from fastapi import FastAPI, HTTPException, Request, Query
22
- from fastapi.responses import StreamingResponse
38
+ from fastapi import FastAPI, HTTPException, Request, Query, Body
39
+ from fastapi.responses import StreamingResponse, JSONResponse
23
40
  from fastapi.middleware.cors import CORSMiddleware
24
- from pydantic import BaseModel, Field
41
+ from fastapi.exceptions import RequestValidationError
42
+ from pydantic import BaseModel, Field, ValidationError
43
+ from starlette.exceptions import HTTPException as StarletteHTTPException
25
44
 
26
45
  from ..core.factory import create_llm
27
46
  from ..utils.structured_logging import get_logger, configure_logging
28
47
  from ..utils.version import __version__
48
+ from ..utils.message_preprocessor import MessagePreprocessor
29
49
  # Removed simple_model_discovery import - now using provider methods directly
30
50
  from ..tools.syntax_rewriter import (
31
51
  ToolCallSyntaxRewriter,
@@ -40,21 +60,52 @@ from ..tools.syntax_rewriter import (
40
60
  # Configuration
41
61
  # ============================================================================
42
62
 
43
- # Configure structured logging
63
+ # Initialize with default logging configuration (can be overridden later)
44
64
  debug_mode = os.getenv("ABSTRACTCORE_DEBUG", "false").lower() == "true"
65
+
66
+ # Initial logging setup (will be reconfigured if --debug is used)
67
+ # Check environment variable for debug mode
68
+ initial_console_level = logging.DEBUG if debug_mode else logging.INFO
45
69
  configure_logging(
46
- console_level="DEBUG" if debug_mode else "INFO",
47
- file_level="DEBUG",
70
+ console_level=initial_console_level,
71
+ file_level=logging.DEBUG,
48
72
  log_dir="logs",
49
73
  verbatim_enabled=True,
50
74
  console_json=False,
51
75
  file_json=True
52
76
  )
53
77
 
54
- # Create FastAPI app
78
+ # Get initial logger
79
+ logger = get_logger("server")
80
+
81
+ # Log initial startup with debug mode status
82
+ logger.info("🚀 AbstractCore Server Initializing", version=__version__, debug_mode=debug_mode)
83
+
84
+ def reconfigure_for_debug():
85
+ """Reconfigure logging for debug mode when --debug flag is used."""
86
+ global debug_mode, logger
87
+
88
+ debug_mode = True
89
+
90
+ # Reconfigure with debug levels
91
+ configure_logging(
92
+ console_level=logging.DEBUG,
93
+ file_level=logging.DEBUG,
94
+ log_dir="logs",
95
+ verbatim_enabled=True,
96
+ console_json=False,
97
+ file_json=True
98
+ )
99
+
100
+ # Update logger instance
101
+ logger = get_logger("server")
102
+
103
+ return logger
104
+
105
+ # Create FastAPI app (will be initialized after argument parsing)
55
106
  app = FastAPI(
56
107
  title="AbstractCore Server",
57
- description="Universal LLM Gateway with Multi-Agent Tool Call Syntax Support",
108
+ description="Universal LLM Gateway with Multi-Agent Tool Call Syntax Support and Media Processing",
58
109
  version=__version__
59
110
  )
60
111
 
@@ -66,9 +117,145 @@ app.add_middleware(
66
117
  allow_headers=["*"],
67
118
  )
68
119
 
69
- # Get logger
70
- logger = get_logger("server")
71
- logger.info("🚀 AbstractCore Server Starting", version=__version__, debug_mode=debug_mode)
120
+ # ============================================================================
121
+ # Enhanced Error Handling and Logging Middleware
122
+ # ============================================================================
123
+
124
+ @app.middleware("http")
125
+ async def debug_logging_middleware(request: Request, call_next):
126
+ """Enhanced logging middleware for debug mode."""
127
+ start_time = time.time()
128
+
129
+ # Log request details in debug mode
130
+ if debug_mode:
131
+ logger.debug(
132
+ "📥 HTTP Request",
133
+ method=request.method,
134
+ url=str(request.url),
135
+ headers=dict(request.headers),
136
+ client=request.client.host if request.client else "unknown"
137
+ )
138
+
139
+ response = await call_next(request)
140
+
141
+ process_time = time.time() - start_time
142
+
143
+ # Log response details
144
+ log_data = {
145
+ "method": request.method,
146
+ "url": str(request.url),
147
+ "status_code": response.status_code,
148
+ "process_time_ms": round(process_time * 1000, 2)
149
+ }
150
+
151
+ if response.status_code >= 400:
152
+ logger.error("❌ HTTP Error Response", **log_data)
153
+ elif debug_mode:
154
+ logger.debug("📤 HTTP Response", **log_data)
155
+ else:
156
+ logger.info("✅ HTTP Request", **log_data)
157
+
158
+ return response
159
+
160
+ @app.exception_handler(RequestValidationError)
161
+ async def validation_exception_handler(request: Request, exc: RequestValidationError):
162
+ """Enhanced handler for 422 validation errors with detailed logging."""
163
+ error_details = []
164
+ for error in exc.errors():
165
+ error_details.append({
166
+ "field": " -> ".join(str(loc) for loc in error["loc"]),
167
+ "message": error["msg"],
168
+ "type": error["type"],
169
+ "input": error.get("input")
170
+ })
171
+
172
+ # Log detailed validation error information
173
+ logger.error(
174
+ "🔴 Request Validation Error (422)",
175
+ method=request.method,
176
+ url=str(request.url),
177
+ error_count=len(error_details),
178
+ errors=error_details,
179
+ client=request.client.host if request.client else "unknown"
180
+ )
181
+
182
+ # In debug mode, also try to log the request body if possible
183
+ if debug_mode:
184
+ try:
185
+ # Try to get the request body for debugging
186
+ body = await request.body()
187
+ if body:
188
+ try:
189
+ import json
190
+ body_json = json.loads(body)
191
+ logger.debug(
192
+ "📋 Request Body (Validation Error)",
193
+ body=body_json
194
+ )
195
+ except json.JSONDecodeError:
196
+ logger.debug(
197
+ "📋 Request Body (Validation Error)",
198
+ body_text=body.decode('utf-8', errors='replace')[:1000] # Limit to 1000 chars
199
+ )
200
+ except Exception as e:
201
+ logger.debug(f"Could not read request body for debugging: {e}")
202
+
203
+ # Return detailed error response
204
+ return JSONResponse(
205
+ status_code=422,
206
+ content={
207
+ "error": {
208
+ "message": "Request validation failed",
209
+ "type": "validation_error",
210
+ "details": error_details
211
+ }
212
+ }
213
+ )
214
+
215
+ @app.exception_handler(StarletteHTTPException)
216
+ async def http_exception_handler(request: Request, exc: StarletteHTTPException):
217
+ """Enhanced handler for HTTP exceptions with detailed logging."""
218
+ logger.error(
219
+ "🔴 HTTP Exception",
220
+ method=request.method,
221
+ url=str(request.url),
222
+ status_code=exc.status_code,
223
+ detail=str(exc.detail),
224
+ client=request.client.host if request.client else "unknown"
225
+ )
226
+
227
+ return JSONResponse(
228
+ status_code=exc.status_code,
229
+ content={
230
+ "error": {
231
+ "message": str(exc.detail),
232
+ "type": "http_error"
233
+ }
234
+ }
235
+ )
236
+
237
+ @app.exception_handler(Exception)
238
+ async def general_exception_handler(request: Request, exc: Exception):
239
+ """Handler for unexpected exceptions with detailed logging."""
240
+ logger.error(
241
+ "💥 Unexpected Server Error",
242
+ method=request.method,
243
+ url=str(request.url),
244
+ exception_type=type(exc).__name__,
245
+ exception_message=str(exc),
246
+ client=request.client.host if request.client else "unknown",
247
+ exc_info=True # This will include the full stack trace
248
+ )
249
+
250
+ return JSONResponse(
251
+ status_code=500,
252
+ content={
253
+ "error": {
254
+ "message": "Internal server error",
255
+ "type": "server_error"
256
+ }
257
+ }
258
+ )
72
259
 
73
260
  # ============================================================================
74
261
  # Model Type Detection
@@ -121,19 +308,91 @@ def get_models_from_provider(provider_name: str) -> List[str]:
121
308
  logger.debug(f"Failed to get models from provider {provider_name}: {e}")
122
309
  return []
123
310
 
311
+ # ============================================================================
312
+ # OpenAI Responses API Models (100% Compatible)
313
+ # ============================================================================
314
+
315
+ class OpenAIInputContent(BaseModel):
316
+ """OpenAI Responses API content item"""
317
+ type: Literal["input_text", "input_file"] = Field(
318
+ description="Content type - 'input_text' for text or 'input_file' for files"
319
+ )
320
+ text: Optional[str] = Field(
321
+ default=None,
322
+ description="Text content (required when type='input_text')"
323
+ )
324
+ file_url: Optional[str] = Field(
325
+ default=None,
326
+ description="Direct file URL (required when type='input_file')"
327
+ )
328
+
329
+ class OpenAIResponsesInput(BaseModel):
330
+ """OpenAI Responses API input message"""
331
+ role: Literal["user"] = Field(
332
+ description="Message role (OpenAI responses only supports 'user')"
333
+ )
334
+ content: List[OpenAIInputContent] = Field(
335
+ description="Array of input content items"
336
+ )
337
+
338
+ class OpenAIResponsesRequest(BaseModel):
339
+ """OpenAI Responses API request format (100% compatible)"""
340
+ model: str = Field(
341
+ description="Model identifier",
342
+ example="gpt-4o"
343
+ )
344
+ input: List[OpenAIResponsesInput] = Field(
345
+ description="Array of input messages"
346
+ )
347
+ max_tokens: Optional[int] = Field(
348
+ default=None,
349
+ description="Maximum tokens to generate"
350
+ )
351
+ temperature: Optional[float] = Field(
352
+ default=None,
353
+ description="Sampling temperature"
354
+ )
355
+ top_p: Optional[float] = Field(
356
+ default=None,
357
+ description="Top-p sampling"
358
+ )
359
+ stream: Optional[bool] = Field(
360
+ default=False,
361
+ description="Enable streaming (false by default, set to true for real-time responses)"
362
+ )
363
+
124
364
  # ============================================================================
125
365
  # Models
126
366
  # ============================================================================
127
367
 
368
+ class ContentItem(BaseModel):
369
+ """Individual content item within a message (OpenAI Vision API format with file support)"""
370
+ type: Literal["text", "image_url", "file"] = Field(
371
+ description="Content type - 'text' for text content, 'image_url' for images, or 'file' for file attachments"
372
+ )
373
+ text: Optional[str] = Field(
374
+ default=None,
375
+ description="Text content (required when type='text')"
376
+ )
377
+ image_url: Optional[Dict[str, Any]] = Field(
378
+ default=None,
379
+ description="Image URL object (required when type='image_url'). Should contain 'url' field with base64 data URL or HTTP(S) URL"
380
+ )
381
+ file_url: Optional[Dict[str, Any]] = Field(
382
+ default=None,
383
+ description="File URL object (required when type='file'). Should contain 'url' field with HTTP(S) URL, local path, or base64 data URL"
384
+ )
385
+
128
386
  class ChatMessage(BaseModel):
129
- """OpenAI-compatible message format"""
387
+ """Enhanced OpenAI-compatible message format with media support"""
130
388
  role: Literal["system", "user", "assistant", "tool"] = Field(
131
389
  description="The role of the message author. One of 'system', 'user', 'assistant', or 'tool'.",
132
390
  example="user"
133
391
  )
134
- content: Optional[str] = Field(
392
+ content: Optional[Union[str, List[ContentItem]]] = Field(
135
393
  default=None,
136
- description="The contents of the message. Can be null for assistant messages with tool calls.",
394
+ description="Message content - can be a string or array of content objects for multimodal messages. "
395
+ "For multimodal messages, use array format with text, image_url, and file objects.",
137
396
  example="What is the capital of France?"
138
397
  )
139
398
  tool_call_id: Optional[str] = Field(
@@ -260,21 +519,209 @@ class ChatCompletionRequest(BaseModel):
260
519
 
261
520
  class Config:
262
521
  schema_extra = {
263
- "example": {
264
- "model": "openai/gpt-4",
265
- "messages": [
266
- {
267
- "role": "system",
268
- "content": "You are a helpful assistant."
269
- },
270
- {
271
- "role": "user",
272
- "content": "What is the capital of France?"
522
+ "examples": {
523
+ "basic_text": {
524
+ "summary": "Basic Text Chat",
525
+ "description": "Simple text-based conversation",
526
+ "value": {
527
+ "model": "openai/gpt-4",
528
+ "messages": [
529
+ {
530
+ "role": "system",
531
+ "content": "You are a helpful assistant."
532
+ },
533
+ {
534
+ "role": "user",
535
+ "content": "What is the capital of France?"
536
+ }
537
+ ],
538
+ "temperature": 0.7,
539
+ "max_tokens": 150,
540
+ "stream": False
541
+ }
542
+ },
543
+ "vision_image": {
544
+ "summary": "Image Analysis",
545
+ "description": "Analyze images using vision-capable models with OpenAI Vision API format",
546
+ "value": {
547
+ "model": "ollama/qwen2.5vl:7b",
548
+ "messages": [
549
+ {
550
+ "role": "user",
551
+ "content": [
552
+ {
553
+ "type": "text",
554
+ "text": "What's in this image?"
555
+ },
556
+ {
557
+ "type": "image_url",
558
+ "image_url": {
559
+ "url": ""
560
+ }
561
+ }
562
+ ]
563
+ }
564
+ ],
565
+ "temperature": 0.7,
566
+ "max_tokens": 200
567
+ }
568
+ },
569
+ "document_analysis": {
570
+ "summary": "Document Analysis",
571
+ "description": "Process documents (PDF, CSV, Excel, etc.) with file attachments",
572
+ "value": {
573
+ "model": "lmstudio/qwen/qwen3-next-80b",
574
+ "messages": [
575
+ {
576
+ "role": "user",
577
+ "content": [
578
+ {
579
+ "type": "text",
580
+ "text": "Analyze this CSV file and calculate the total sales"
581
+ },
582
+ {
583
+ "type": "image_url",
584
+ "image_url": {
585
+ "url": "data:text/csv;base64,RGF0ZSxQcm9kdWN0LFNhbGVzCjIwMjQtMDEtMDEsUHJvZHVjdCBBLDEwMDAwCjIwMjQtMDEtMDIsUHJvZHVjdCBCLDE1MDAwCjIwMjQtMDEtMDMsUHJvZHVjdCBDLDI1MDAw"
586
+ }
587
+ }
588
+ ]
589
+ }
590
+ ],
591
+ "temperature": 0.3,
592
+ "max_tokens": 300
593
+ }
594
+ },
595
+ "mixed_media": {
596
+ "summary": "Mixed Media Analysis",
597
+ "description": "Process multiple file types in a single request",
598
+ "value": {
599
+ "model": "ollama/qwen2.5vl:7b",
600
+ "messages": [
601
+ {
602
+ "role": "user",
603
+ "content": [
604
+ {
605
+ "type": "text",
606
+ "text": "Compare this chart image with the data in this PDF report"
607
+ },
608
+ {
609
+ "type": "image_url",
610
+ "image_url": {
611
+ "url": ""
612
+ }
613
+ },
614
+ {
615
+ "type": "image_url",
616
+ "image_url": {
617
+ "url": "data:application/pdf;base64,JVBERi0xLjQKJdPr6eEKMSAwIG9iago8PAovVHlwZSAvQ2F0YWxvZwovUGFnZXMgMiAwIFIKPj4KZW5kb2JqCjIgMCBvYmoKPDwKL1R5cGUgL1BhZ2VzCi9LaWRzIFszIDAgUl0KL0NvdW50IDEKPJ4KZW5kb2JqCjMgMCBvYmoKPDwKL1R5cGUgL1BhZ2UKL1BhcmVudCAyIDAgUgovTWVkaWFCb3ggWzAgMCA2MTIgNzkyXQo+PgplbmRvYmoKeHJlZgowIDQKMDAwMDAwMDAwMCA2NTUzNSBmIAowMDAwMDAwMDA5IDAwMDAwIG4gCjAwMDAwMDAwNTggMDAwMDAgbiAKMDAwMDAwMDExNSAwMDAwMCBuIAp0cmFpbGVyCjw8Ci9TaXplIDQKL1Jvb3QgMSAwIFIKPj4Kc3RhcnR4cmVmCjE5NQolJUVPRgo="
618
+ }
619
+ }
620
+ ]
621
+ }
622
+ ],
623
+ "temperature": 0.5,
624
+ "max_tokens": 500,
625
+ "stream": False
626
+ }
627
+ },
628
+ "tools_with_media": {
629
+ "summary": "Tools + Media",
630
+ "description": "Combine tool usage with file attachments for complex workflows",
631
+ "value": {
632
+ "model": "openai/gpt-4",
633
+ "messages": [
634
+ {
635
+ "role": "user",
636
+ "content": [
637
+ {
638
+ "type": "text",
639
+ "text": "Analyze this financial data and create a summary chart"
640
+ },
641
+ {
642
+ "type": "image_url",
643
+ "image_url": {
644
+ "url": "data:text/csv;base64,Q29tcGFueSxRMSxRMixRMyxRNApBY21lIEluYywyMDAsMjUwLDMwMCwzNTAKVGVjaCBDb3JwLDE1MCwyMDAsMjUwLDMwMApCaXogTHRkLDEwMCwxMjAsMTQwLDE2MA=="
645
+ }
646
+ }
647
+ ]
648
+ }
649
+ ],
650
+ "temperature": 0.7,
651
+ "max_tokens": 2048,
652
+ "stream": False,
653
+ "tools": [
654
+ {
655
+ "type": "function",
656
+ "function": {
657
+ "name": "create_chart",
658
+ "description": "Create a chart from data",
659
+ "parameters": {
660
+ "type": "object",
661
+ "properties": {
662
+ "chart_type": {"type": "string"},
663
+ "data": {"type": "array"}
664
+ }
665
+ }
666
+ }
667
+ }
668
+ ],
669
+ "tool_choice": "auto"
273
670
  }
274
- ],
275
- "temperature": 0.7,
276
- "max_tokens": 150,
277
- "stream": False
671
+ },
672
+ "complete_request": {
673
+ "summary": "Complete Request with Media",
674
+ "description": "Full example showing all possible fields with file attachment",
675
+ "value": {
676
+ "model": "openai/gpt-4",
677
+ "messages": [
678
+ {
679
+ "role": "user",
680
+ "content": [
681
+ {
682
+ "type": "text",
683
+ "text": "Analyze this CSV file and provide insights"
684
+ },
685
+ {
686
+ "type": "image_url",
687
+ "image_url": {
688
+ "url": "data:text/csv;base64,RGF0ZSxQcm9kdWN0LFNhbGVzCjIwMjQtMDEtMDEsUHJvZHVjdCBBLDEwMDAwCjIwMjQtMDEtMDIsUHJvZHVjdCBCLDE1MDAwCjIwMjQtMDEtMDMsUHJvZHVjdCBDLDI1MDAw"
689
+ }
690
+ }
691
+ ],
692
+ "tool_call_id": None,
693
+ "tool_calls": None,
694
+ "name": "DataAnalyst"
695
+ }
696
+ ],
697
+ "temperature": 0.7,
698
+ "max_tokens": 2048,
699
+ "top_p": 1,
700
+ "stream": False,
701
+ "tools": [
702
+ {
703
+ "type": "function",
704
+ "function": {
705
+ "name": "analyze_data",
706
+ "description": "Analyze data and generate insights",
707
+ "parameters": {
708
+ "type": "object",
709
+ "properties": {
710
+ "analysis_type": {"type": "string"},
711
+ "metrics": {"type": "array"}
712
+ }
713
+ }
714
+ }
715
+ }
716
+ ],
717
+ "tool_choice": "auto",
718
+ "stop": ["END"],
719
+ "seed": 12345,
720
+ "frequency_penalty": 0.0,
721
+ "presence_penalty": 0.0,
722
+ "agent_format": "auto"
723
+ }
724
+ }
278
725
  }
279
726
  }
280
727
 
@@ -324,6 +771,117 @@ class EmbeddingRequest(BaseModel):
324
771
  }
325
772
  }
326
773
 
774
+ # ============================================================================
775
+ # Union Request Model for /v1/responses endpoint
776
+ # ============================================================================
777
+
778
+ class ResponsesAPIRequest(BaseModel):
779
+ """
780
+ Union request model for /v1/responses endpoint supporting both OpenAI and legacy formats.
781
+
782
+ The endpoint automatically detects the format based on the presence of 'input' vs 'messages' field.
783
+ """
784
+ class Config:
785
+ schema_extra = {
786
+ "oneOf": [
787
+ {
788
+ "title": "OpenAI Responses API Format",
789
+ "description": "OpenAI-compatible responses format with input_file support",
790
+ "$ref": "#/components/schemas/OpenAIResponsesRequest"
791
+ },
792
+ {
793
+ "title": "Legacy Format (ChatCompletionRequest)",
794
+ "description": "Backward-compatible format using messages array",
795
+ "$ref": "#/components/schemas/ChatCompletionRequest"
796
+ }
797
+ ],
798
+ "examples": {
799
+ "openai_format": {
800
+ "summary": "OpenAI Responses API Format",
801
+ "description": "Use input array with input_text and input_file objects",
802
+ "value": {
803
+ "model": "gpt-4o",
804
+ "input": [
805
+ {
806
+ "role": "user",
807
+ "content": [
808
+ {"type": "input_text", "text": "Analyze this document"},
809
+ {"type": "input_file", "file_url": "https://example.com/doc.pdf"}
810
+ ]
811
+ }
812
+ ],
813
+ "stream": False
814
+ }
815
+ },
816
+ "legacy_format": {
817
+ "summary": "Legacy Format (Backward Compatible)",
818
+ "description": "Use messages array like standard chat completions",
819
+ "value": {
820
+ "model": "openai/gpt-4",
821
+ "messages": [
822
+ {"role": "user", "content": "Tell me a story"}
823
+ ],
824
+ "stream": False
825
+ }
826
+ }
827
+ }
828
+ }
829
+
830
+ # ============================================================================
831
+ # OpenAI Responses API Compatibility
832
+ # ============================================================================
833
+
834
+ def convert_openai_responses_to_chat_completion(openai_request: OpenAIResponsesRequest) -> ChatCompletionRequest:
835
+ """
836
+ Convert OpenAI Responses API format to internal ChatCompletionRequest format.
837
+
838
+ Transforms:
839
+ - input -> messages
840
+ - input_text -> text
841
+ - input_file -> file with file_url
842
+
843
+ Args:
844
+ openai_request: OpenAI responses API request
845
+
846
+ Returns:
847
+ ChatCompletionRequest compatible with our internal processing
848
+ """
849
+ # Convert input messages to chat messages
850
+ messages = []
851
+
852
+ for input_msg in openai_request.input:
853
+ # Build content array as list of dicts (not ContentItem objects)
854
+ content_items = []
855
+
856
+ for content in input_msg.content:
857
+ if content.type == "input_text":
858
+ content_items.append({
859
+ "type": "text",
860
+ "text": content.text
861
+ })
862
+ elif content.type == "input_file":
863
+ content_items.append({
864
+ "type": "file",
865
+ "file_url": {"url": content.file_url} # Convert to our format
866
+ })
867
+
868
+ # Create chat message with list content (not ContentItem objects)
869
+ message_dict = {
870
+ "role": input_msg.role,
871
+ "content": content_items
872
+ }
873
+ messages.append(ChatMessage(**message_dict))
874
+
875
+ # Build ChatCompletionRequest
876
+ return ChatCompletionRequest(
877
+ model=openai_request.model,
878
+ messages=messages,
879
+ max_tokens=openai_request.max_tokens,
880
+ temperature=openai_request.temperature,
881
+ top_p=openai_request.top_p,
882
+ stream=openai_request.stream
883
+ )
884
+
327
885
  # ============================================================================
328
886
  # Helper Functions
329
887
  # ============================================================================
@@ -588,31 +1146,101 @@ async def list_providers():
588
1146
  }
589
1147
 
590
1148
  @app.post("/v1/responses")
591
- async def create_response(request: ChatCompletionRequest, http_request: Request):
1149
+ async def create_response(
1150
+ http_request: Request,
1151
+ request_body: Annotated[
1152
+ Dict[str, Any],
1153
+ Body(
1154
+ ...,
1155
+ examples={
1156
+ "openai_format": {
1157
+ "summary": "OpenAI Responses API Format",
1158
+ "description": "Use input array with input_text and input_file objects",
1159
+ "value": {
1160
+ "model": "gpt-4o",
1161
+ "input": [
1162
+ {
1163
+ "role": "user",
1164
+ "content": [
1165
+ {"type": "input_text", "text": "Analyze this document"},
1166
+ {"type": "input_file", "file_url": "https://example.com/doc.pdf"}
1167
+ ]
1168
+ }
1169
+ ],
1170
+ "stream": False
1171
+ }
1172
+ },
1173
+ "legacy_format": {
1174
+ "summary": "Legacy Format (Backward Compatible)",
1175
+ "description": "Use messages array like standard chat completions",
1176
+ "value": {
1177
+ "model": "openai/gpt-4",
1178
+ "messages": [
1179
+ {"role": "user", "content": "Tell me a story"}
1180
+ ],
1181
+ "stream": False
1182
+ }
1183
+ },
1184
+ "file_analysis": {
1185
+ "summary": "Document Analysis",
1186
+ "description": "Analyze files using OpenAI format",
1187
+ "value": {
1188
+ "model": "openai/gpt-4",
1189
+ "input": [
1190
+ {
1191
+ "role": "user",
1192
+ "content": [
1193
+ {"type": "input_text", "text": "What's the key information in this CSV?"},
1194
+ {"type": "input_file", "file_url": "data:text/csv;base64,RGF0ZSxQcm9kdWN0LFNhbGVzCjIwMjQtMDEtMDEsUHJvZHVjdCBBLDEwMDAwCjIwMjQtMDEtMDIsUHJvZHVjdCBCLDE1MDAwCjIwMjQtMDEtMDMsUHJvZHVjdCBDLDI1MDAw"}
1195
+ ]
1196
+ }
1197
+ ]
1198
+ }
1199
+ },
1200
+ "streaming_example": {
1201
+ "summary": "Streaming Response",
1202
+ "description": "Enable streaming for real-time responses",
1203
+ "value": {
1204
+ "model": "lmstudio/qwen/qwen3-next-80b",
1205
+ "input": [
1206
+ {
1207
+ "role": "user",
1208
+ "content": [
1209
+ {"type": "input_text", "text": "Analyze the letter and provide a summary of the key points."},
1210
+ {"type": "input_file", "file_url": "https://www.berkshirehathaway.com/letters/2024ltr.pdf"}
1211
+ ]
1212
+ }
1213
+ ],
1214
+ "stream": True
1215
+ }
1216
+ }
1217
+ }
1218
+ )
1219
+ ]
1220
+ ):
592
1221
  """
593
- Create a real-time streaming response for the given chat conversation.
594
-
595
- This endpoint provides real-time conversation capabilities optimized for streaming interaction.
596
- It's similar to OpenAI's Realtime/Responses API, automatically enabling streaming for immediate token delivery.
597
-
598
- **Key Features:**
599
- - **Always Streams**: Streaming is automatically enabled for real-time interaction
600
- - **Lower Latency**: Optimized for quick first-token delivery
601
- - **Same Parameters**: Uses the same request format as `/v1/chat/completions`
602
- - **Multi-Provider**: Supports all providers (OpenAI, Anthropic, Ollama, etc.)
603
-
604
- **Use Cases:**
605
- - Real-time chat interfaces
606
- - Voice-to-text streaming
607
- - Live coding assistants
608
- - Interactive agents
609
-
610
- **Differences from `/v1/chat/completions`:**
611
- - Streaming is always enabled (ignores `stream: false`)
612
- - Optimized for immediate response delivery
613
- - Better for user-facing real-time applications
614
-
615
- **Example:**
1222
+ OpenAI Responses API (100% Compatible) + Backward Compatibility
1223
+
1224
+ Supports both OpenAI's responses format and our legacy format for seamless migration.
1225
+ Streaming can be enabled by setting "stream": true for real-time interaction.
1226
+
1227
+ **OpenAI Format (input_file support):**
1228
+ ```json
1229
+ {
1230
+ "model": "gpt-4o",
1231
+ "input": [
1232
+ {
1233
+ "role": "user",
1234
+ "content": [
1235
+ {"type": "input_text", "text": "Analyze this document"},
1236
+ {"type": "input_file", "file_url": "https://example.com/doc.pdf"}
1237
+ ]
1238
+ }
1239
+ ]
1240
+ }
1241
+ ```
1242
+
1243
+ **Legacy Format (backward compatibility):**
616
1244
  ```json
617
1245
  {
618
1246
  "model": "openai/gpt-4",
@@ -621,24 +1249,65 @@ async def create_response(request: ChatCompletionRequest, http_request: Request)
621
1249
  ]
622
1250
  }
623
1251
  ```
624
-
625
- **Returns:** Server-sent events stream of chat completion chunks, terminated by `data: [DONE]`.
1252
+
1253
+ **Key Features:**
1254
+ - **100% OpenAI Compatible**: Supports input_file with file_url
1255
+ - **Universal File Support**: PDF, DOCX, XLSX, CSV, images, and more
1256
+ - **Multi-Provider**: Works with all providers (OpenAI, Anthropic, Ollama, etc.)
1257
+ - **Optional Streaming**: Set "stream": true for real-time responses
1258
+ - **Backward Compatible**: Existing clients continue to work
1259
+
1260
+ **Returns:** Chat completion object, or server-sent events stream if streaming is enabled.
626
1261
  """
627
- # For now, delegate to chat completions with streaming enabled
628
- # The OpenAI Responses API is essentially streaming chat completions with enhanced real-time features
629
- request.stream = True # Force streaming for responses API
1262
+ try:
1263
+ # Use the parsed request body directly
1264
+ request_data = request_body
630
1265
 
631
- provider, model = parse_model_string(request.model)
1266
+ # Detect OpenAI responses format vs legacy format
1267
+ if "input" in request_data:
1268
+ # OpenAI Responses API format
1269
+ logger.info("📡 OpenAI Responses API format detected")
632
1270
 
633
- logger.info(
634
- "📡 Responses API Request",
635
- provider=provider,
636
- model=model,
637
- messages=len(request.messages),
638
- has_tools=bool(request.tools)
639
- )
1271
+ # Parse as OpenAI format
1272
+ openai_request = OpenAIResponsesRequest(**request_data)
640
1273
 
641
- return await process_chat_completion(provider, model, request, http_request)
1274
+ # Convert to internal format
1275
+ chat_request = convert_openai_responses_to_chat_completion(openai_request)
1276
+
1277
+ elif "messages" in request_data:
1278
+ # Legacy format (backward compatibility)
1279
+ logger.info("📡 Legacy responses format detected")
1280
+
1281
+ # Parse as ChatCompletionRequest
1282
+ chat_request = ChatCompletionRequest(**request_data)
1283
+
1284
+ else:
1285
+ raise HTTPException(
1286
+ status_code=400,
1287
+ detail={"error": {"message": "Request must contain either 'input' (OpenAI format) or 'messages' (legacy format)", "type": "invalid_request"}}
1288
+ )
1289
+
1290
+ # Respect user's streaming preference (defaults to False)
1291
+
1292
+ # Process using our standard pipeline
1293
+ provider, model = parse_model_string(chat_request.model)
1294
+
1295
+ logger.info(
1296
+ "📡 Responses API Request",
1297
+ provider=provider,
1298
+ model=model,
1299
+ format="openai" if "input" in request_data else "legacy",
1300
+ messages=len(chat_request.messages)
1301
+ )
1302
+
1303
+ return await process_chat_completion(provider, model, chat_request, http_request)
1304
+
1305
+ except Exception as e:
1306
+ logger.error(f"Responses API error: {e}")
1307
+ raise HTTPException(
1308
+ status_code=400,
1309
+ detail={"error": {"message": str(e), "type": "processing_error"}}
1310
+ )
642
1311
 
643
1312
  @app.post("/v1/embeddings")
644
1313
  async def create_embeddings(request: EmbeddingRequest):
@@ -755,25 +1424,491 @@ async def create_embeddings(request: EmbeddingRequest):
755
1424
  detail={"error": {"message": str(e), "type": "embedding_error"}}
756
1425
  )
757
1426
 
1427
+ # ============================================================================
1428
+ # Media Processing Utilities
1429
+ # ============================================================================
1430
+
1431
+ def handle_base64_image(data_url: str) -> str:
1432
+ """
1433
+ Process base64 data URL and save to temporary file.
1434
+
1435
+ Args:
1436
+ data_url: Base64 data URL (e.g., "data:image/jpeg;base64,..." or "data:application/pdf;base64,...")
1437
+
1438
+ Returns:
1439
+ Path to temporary file
1440
+ """
1441
+ try:
1442
+ # Parse data URL
1443
+ if not data_url.startswith("data:"):
1444
+ raise ValueError("Invalid data URL format")
1445
+
1446
+ # Extract media type and base64 data
1447
+ header, data = data_url.split(",", 1)
1448
+ media_type = header.split(";")[0].split(":")[1]
1449
+
1450
+ # Determine file extension for all supported media types
1451
+ ext_map = {
1452
+ # Images
1453
+ "image/jpeg": ".jpg",
1454
+ "image/jpg": ".jpg",
1455
+ "image/png": ".png",
1456
+ "image/gif": ".gif",
1457
+ "image/webp": ".webp",
1458
+ "image/bmp": ".bmp",
1459
+ "image/tiff": ".tiff",
1460
+ # Documents
1461
+ "application/pdf": ".pdf",
1462
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
1463
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
1464
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
1465
+ # Data files
1466
+ "text/csv": ".csv",
1467
+ "text/tab-separated-values": ".tsv",
1468
+ "application/json": ".json",
1469
+ "application/xml": ".xml",
1470
+ "text/xml": ".xml",
1471
+ "text/plain": ".txt",
1472
+ "text/markdown": ".md",
1473
+ # Generic fallback
1474
+ "application/octet-stream": ".bin"
1475
+ }
1476
+ extension = ext_map.get(media_type, ".bin")
1477
+
1478
+ # Decode base64 data
1479
+ file_data = base64.b64decode(data)
1480
+
1481
+ # Save to temporary file with request-specific prefix for better isolation
1482
+ import hashlib
1483
+ data_hash = hashlib.md5(data[:100].encode() if len(data) > 100 else data.encode()).hexdigest()[:8]
1484
+ request_id = uuid.uuid4().hex[:8]
1485
+ prefix = f"abstractcore_b64_{data_hash}_{request_id}_"
1486
+
1487
+ with tempfile.NamedTemporaryFile(delete=False, suffix=extension, prefix=prefix) as temp_file:
1488
+ temp_file.write(file_data)
1489
+ temp_file_path = temp_file.name
1490
+
1491
+ # Log the temporary file creation for debugging
1492
+ logger.debug(f"Processed base64 media to temporary file: {temp_file_path} (size: {len(file_data)} bytes)")
1493
+ return temp_file_path
1494
+
1495
+ except Exception as e:
1496
+ logger.error(f"Failed to process base64 media: {e}")
1497
+ raise HTTPException(
1498
+ status_code=400,
1499
+ detail={"error": {"message": f"Invalid base64 media data: {e}", "type": "media_error"}}
1500
+ )
1501
+
1502
+ def download_file_temporarily(url: str) -> str:
1503
+ """
1504
+ Download file from URL to temporary file (supports images, documents, data files).
1505
+
1506
+ Args:
1507
+ url: HTTP(S) URL to file
1508
+
1509
+ Returns:
1510
+ Path to temporary file
1511
+ """
1512
+ try:
1513
+ # Validate URL
1514
+ parsed = urllib.parse.urlparse(url)
1515
+ if parsed.scheme not in ("http", "https"):
1516
+ raise ValueError("Only HTTP and HTTPS URLs are allowed")
1517
+
1518
+ # Create request with browser-like headers to avoid 403 Forbidden errors
1519
+ request = urllib.request.Request(url)
1520
+ request.add_header('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36')
1521
+ # Generic accept header for all file types
1522
+ request.add_header('Accept', '*/*')
1523
+ request.add_header('Accept-Language', 'en-US,en;q=0.9')
1524
+ request.add_header('Accept-Encoding', 'gzip, deflate, br')
1525
+ request.add_header('Connection', 'keep-alive')
1526
+ request.add_header('Upgrade-Insecure-Requests', '1')
1527
+ request.add_header('Sec-Fetch-Dest', 'document') # More generic than 'image'
1528
+ request.add_header('Sec-Fetch-Mode', 'no-cors')
1529
+ request.add_header('Sec-Fetch-Site', 'cross-site')
1530
+
1531
+ # Download with size limit (10MB)
1532
+ response = urllib.request.urlopen(request, timeout=30)
1533
+ if response.getheader('content-length'):
1534
+ size = int(response.getheader('content-length'))
1535
+ if size > 10 * 1024 * 1024: # 10MB limit
1536
+ raise ValueError("File too large (max 10MB)")
1537
+
1538
+ # Read data with size check
1539
+ data = b""
1540
+ while True:
1541
+ chunk = response.read(8192)
1542
+ if not chunk:
1543
+ break
1544
+ data += chunk
1545
+ if len(data) > 10 * 1024 * 1024: # 10MB limit
1546
+ raise ValueError("File too large (max 10MB)")
1547
+
1548
+ # Determine extension from content-type or URL
1549
+ content_type = response.getheader('content-type', '').lower()
1550
+ ext_map = {
1551
+ # Images
1552
+ "image/jpeg": ".jpg",
1553
+ "image/jpg": ".jpg",
1554
+ "image/png": ".png",
1555
+ "image/gif": ".gif",
1556
+ "image/webp": ".webp",
1557
+ "image/bmp": ".bmp",
1558
+ "image/tiff": ".tiff",
1559
+ # Documents
1560
+ "application/pdf": ".pdf",
1561
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
1562
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
1563
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
1564
+ # Data files
1565
+ "text/csv": ".csv",
1566
+ "text/tab-separated-values": ".tsv",
1567
+ "application/json": ".json",
1568
+ "application/xml": ".xml",
1569
+ "text/xml": ".xml",
1570
+ "text/plain": ".txt",
1571
+ "text/markdown": ".md",
1572
+ # Generic fallback
1573
+ "application/octet-stream": ".bin"
1574
+ }
1575
+
1576
+ # Try to get extension from content-type first, then URL
1577
+ extension = ext_map.get(content_type)
1578
+ if not extension:
1579
+ # Try to get extension from URL
1580
+ url_path = parsed.path.lower()
1581
+ if url_path.endswith('.pdf'):
1582
+ extension = '.pdf'
1583
+ elif url_path.endswith('.jpg') or url_path.endswith('.jpeg'):
1584
+ extension = '.jpg'
1585
+ elif url_path.endswith('.png'):
1586
+ extension = '.png'
1587
+ elif url_path.endswith('.docx'):
1588
+ extension = '.docx'
1589
+ elif url_path.endswith('.xlsx'):
1590
+ extension = '.xlsx'
1591
+ elif url_path.endswith('.csv'):
1592
+ extension = '.csv'
1593
+ else:
1594
+ extension = '.bin' # Generic fallback
1595
+
1596
+ # Save to temporary file with request-specific prefix for better isolation
1597
+ import hashlib
1598
+ url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
1599
+ request_id = uuid.uuid4().hex[:8]
1600
+ prefix = f"abstractcore_file_{url_hash}_{request_id}_"
1601
+
1602
+ with tempfile.NamedTemporaryFile(delete=False, suffix=extension, prefix=prefix) as temp_file:
1603
+ temp_file.write(data)
1604
+ temp_file_path = temp_file.name
1605
+
1606
+ # Log the temporary file creation for debugging
1607
+ logger.info(f"Downloaded file to temporary file: {temp_file_path} (size: {len(data)} bytes, type: {content_type})")
1608
+ return temp_file_path
1609
+
1610
+ except Exception as e:
1611
+ logger.error(f"Failed to download file from URL {url}: {e}")
1612
+ raise HTTPException(
1613
+ status_code=400,
1614
+ detail={"error": {"message": f"Failed to download file: {e}", "type": "media_error"}}
1615
+ )
1616
+
1617
+ def download_image_temporarily(url: str) -> str:
1618
+ """
1619
+ Download image from URL to temporary file (backward compatibility wrapper).
1620
+
1621
+ Args:
1622
+ url: HTTP(S) URL to image
1623
+
1624
+ Returns:
1625
+ Path to temporary file
1626
+ """
1627
+ return download_file_temporarily(url)
1628
+
1629
+ def process_image_url_object(image_url_obj: Dict[str, Any]) -> Optional[str]:
1630
+ """
1631
+ Process OpenAI image_url object and return local file path.
1632
+
1633
+ Args:
1634
+ image_url_obj: Image URL object with 'url' field
1635
+
1636
+ Returns:
1637
+ Local file path or None if processing failed
1638
+ """
1639
+ try:
1640
+ url = image_url_obj.get("url", "")
1641
+ if not url:
1642
+ return None
1643
+
1644
+ if url.startswith("data:"):
1645
+ # Base64 encoded image
1646
+ return handle_base64_image(url)
1647
+ elif url.startswith(("http://", "https://")):
1648
+ # Download from URL
1649
+ return download_image_temporarily(url)
1650
+ else:
1651
+ # Assume local file path
1652
+ if os.path.exists(url):
1653
+ return url
1654
+ else:
1655
+ logger.warning(f"Local file not found: {url}")
1656
+ return None
1657
+
1658
+ except Exception as e:
1659
+ logger.error(f"Failed to process image URL object: {e}")
1660
+ return None
1661
+
1662
+ def process_file_url_object(file_url_obj: Dict[str, Any]) -> Optional[str]:
1663
+ """
1664
+ Process OpenAI file_url object and return local file path.
1665
+
1666
+ Simplified format (consistent with image_url):
1667
+ {"url": "https://example.com/file.pdf"} or
1668
+ {"url": "/local/path/file.pdf"} or
1669
+ {"url": "data:application/pdf;base64,..."}
1670
+
1671
+ Args:
1672
+ file_url_obj: File URL object with 'url' field (same as image_url)
1673
+
1674
+ Returns:
1675
+ Local file path or None if processing failed
1676
+ """
1677
+ try:
1678
+ # Reuse existing image URL processing logic - works perfectly for any file type
1679
+ return process_image_url_object(file_url_obj)
1680
+
1681
+ except Exception as e:
1682
+ logger.error(f"Failed to process file URL object: {e}")
1683
+ return None
1684
+
1685
+ def process_message_content(message: ChatMessage) -> Tuple[str, List[str]]:
1686
+ """
1687
+ Extract media files from message content and return clean text + media list.
1688
+
1689
+ Supports both OpenAI formats:
1690
+ - content as string: "Analyze this @image.jpg"
1691
+ - content as array: [{"type": "text", "text": "..."}, {"type": "image_url", "image_url": {...}}, {"type": "file", "file_url": {...}}]
1692
+
1693
+ Args:
1694
+ message: ChatMessage with content to process
1695
+
1696
+ Returns:
1697
+ Tuple of (clean_text, media_file_paths)
1698
+ """
1699
+ if message.content is None:
1700
+ return "", []
1701
+
1702
+ if isinstance(message.content, str):
1703
+ # Legacy format: extract @filename references
1704
+ clean_text, media_files = MessagePreprocessor.parse_file_attachments(
1705
+ message.content,
1706
+ validate_existence=True,
1707
+ verbose=False
1708
+ )
1709
+ return clean_text, media_files
1710
+
1711
+ elif isinstance(message.content, list):
1712
+ # OpenAI array format: extract image_url objects
1713
+ text_parts = []
1714
+ media_files = []
1715
+
1716
+ for item in message.content:
1717
+ if isinstance(item, dict):
1718
+ item_type = item.get("type")
1719
+ if item_type == "text" and item.get("text"):
1720
+ text_parts.append(item["text"])
1721
+ elif item_type == "image_url" and item.get("image_url"):
1722
+ media_file = process_image_url_object(item["image_url"])
1723
+ if media_file:
1724
+ media_files.append(media_file)
1725
+ elif item_type == "file" and item.get("file_url"):
1726
+ media_file = process_file_url_object(item["file_url"])
1727
+ if media_file:
1728
+ media_files.append(media_file)
1729
+ elif hasattr(item, 'type'):
1730
+ # Pydantic ContentItem object
1731
+ if item.type == "text" and item.text:
1732
+ text_parts.append(item.text)
1733
+ elif item.type == "image_url" and item.image_url:
1734
+ media_file = process_image_url_object(item.image_url)
1735
+ if media_file:
1736
+ media_files.append(media_file)
1737
+ elif item.type == "file" and item.file_url:
1738
+ media_file = process_file_url_object(item.file_url)
1739
+ if media_file:
1740
+ media_files.append(media_file)
1741
+
1742
+ return " ".join(text_parts), media_files
1743
+
1744
+ return str(message.content), []
1745
+
1746
+ def adapt_prompt_for_media_types(text: str, media_files: List[str]) -> str:
1747
+ """
1748
+ Intelligently adapt prompts based on attached media file types.
1749
+
1750
+ Fixes common mismatches like "What is in this image?" when sending documents.
1751
+
1752
+ Args:
1753
+ text: Original text content
1754
+ media_files: List of media file paths
1755
+
1756
+ Returns:
1757
+ Adapted text content
1758
+ """
1759
+ if not media_files or not text:
1760
+ return text
1761
+
1762
+ # Analyze media file types
1763
+ image_extensions = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}
1764
+ document_extensions = {'.pdf', '.docx', '.xlsx', '.pptx'}
1765
+ data_extensions = {'.csv', '.tsv', '.json', '.xml'}
1766
+ text_extensions = {'.txt', '.md'}
1767
+
1768
+ has_images = False
1769
+ has_documents = False
1770
+ has_data = False
1771
+ has_text = False
1772
+
1773
+ for file_path in media_files:
1774
+ ext = os.path.splitext(file_path)[1].lower()
1775
+ if ext in image_extensions:
1776
+ has_images = True
1777
+ elif ext in document_extensions:
1778
+ has_documents = True
1779
+ elif ext in data_extensions:
1780
+ has_data = True
1781
+ elif ext in text_extensions:
1782
+ has_text = True
1783
+
1784
+ # Common prompt adaptations
1785
+ adapted_text = text
1786
+
1787
+ # Fix "What is in this image?" when not dealing with images
1788
+ if "what is in this image" in text.lower():
1789
+ if has_documents and not has_images:
1790
+ adapted_text = text.replace("What is in this image?", "What is in this document?")
1791
+ adapted_text = adapted_text.replace("what is in this image?", "what is in this document?")
1792
+ adapted_text = adapted_text.replace("What is in this image", "What is in this document")
1793
+ adapted_text = adapted_text.replace("what is in this image", "what is in this document")
1794
+ elif has_data and not has_images:
1795
+ adapted_text = text.replace("What is in this image?", "What data is in this file?")
1796
+ adapted_text = adapted_text.replace("what is in this image?", "what data is in this file?")
1797
+ adapted_text = adapted_text.replace("What is in this image", "What data is in this file")
1798
+ adapted_text = adapted_text.replace("what is in this image", "what data is in this file")
1799
+ elif has_text and not has_images:
1800
+ adapted_text = text.replace("What is in this image?", "What is in this text file?")
1801
+ adapted_text = adapted_text.replace("what is in this image?", "what is in this text file?")
1802
+ adapted_text = adapted_text.replace("What is in this image", "What is in this text file")
1803
+ adapted_text = adapted_text.replace("what is in this image", "what is in this text file")
1804
+
1805
+ # Fix "What is in this document?" when dealing with images
1806
+ elif "what is in this document" in text.lower() and has_images and not (has_documents or has_data or has_text):
1807
+ adapted_text = text.replace("What is in this document?", "What is in this image?")
1808
+ adapted_text = adapted_text.replace("what is in this document?", "what is in this image?")
1809
+ adapted_text = adapted_text.replace("What is in this document", "What is in this image")
1810
+ adapted_text = adapted_text.replace("what is in this document", "what is in this image")
1811
+
1812
+ # Handle mixed content with specific naming
1813
+ if adapted_text != text:
1814
+ # Count media types for better description
1815
+ total_files = len(media_files)
1816
+ if total_files > 1:
1817
+ types = []
1818
+ if has_images:
1819
+ types.append("image(s)")
1820
+ if has_documents:
1821
+ types.append("document(s)")
1822
+ if has_data:
1823
+ types.append("data file(s)")
1824
+ if has_text:
1825
+ types.append("text file(s)")
1826
+
1827
+ if len(types) > 1:
1828
+ adapted_text = adapted_text.replace("this document", f"these {' and '.join(types)}")
1829
+ adapted_text = adapted_text.replace("this image", f"these {' and '.join(types)}")
1830
+ adapted_text = adapted_text.replace("this file", f"these {' and '.join(types)}")
1831
+
1832
+ if adapted_text != text:
1833
+ logger.info(f"Adapted prompt for media types: '{text}' → '{adapted_text}'")
1834
+
1835
+ return adapted_text
1836
+
1837
+ def validate_media_files(files: List[str]) -> None:
1838
+ """
1839
+ Validate media files for security and size limits.
1840
+
1841
+ Args:
1842
+ files: List of file paths to validate
1843
+
1844
+ Raises:
1845
+ HTTPException: If validation fails
1846
+ """
1847
+ ALLOWED_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff',
1848
+ '.pdf', '.docx', '.xlsx', '.pptx', '.csv', '.tsv', '.txt', '.md',
1849
+ '.json', '.xml'}
1850
+
1851
+ total_size = 0
1852
+ max_total_size = 32 * 1024 * 1024 # 32MB total limit
1853
+
1854
+ for file_path in files:
1855
+ if not os.path.exists(file_path):
1856
+ raise HTTPException(
1857
+ status_code=400,
1858
+ detail={"error": {"message": f"File not found: {file_path}", "type": "file_not_found"}}
1859
+ )
1860
+
1861
+ # Check extension
1862
+ ext = os.path.splitext(file_path)[1].lower()
1863
+ if ext not in ALLOWED_EXTENSIONS:
1864
+ raise HTTPException(
1865
+ status_code=400,
1866
+ detail={"error": {"message": f"File type {ext} not allowed", "type": "invalid_file_type"}}
1867
+ )
1868
+
1869
+ # Check individual file size (10MB per file)
1870
+ file_size = os.path.getsize(file_path)
1871
+ if file_size > 10 * 1024 * 1024:
1872
+ raise HTTPException(
1873
+ status_code=400,
1874
+ detail={"error": {"message": f"File too large: {file_path} (max 10MB per file)", "type": "file_too_large"}}
1875
+ )
1876
+
1877
+ total_size += file_size
1878
+
1879
+ # Check total size across all files
1880
+ if total_size > max_total_size:
1881
+ raise HTTPException(
1882
+ status_code=400,
1883
+ detail={"error": {"message": "Total file size exceeds 32MB limit", "type": "total_size_exceeded"}}
1884
+ )
1885
+
758
1886
  @app.post("/v1/chat/completions")
759
1887
  async def chat_completions(request: ChatCompletionRequest, http_request: Request):
760
1888
  """
761
- Create a model response for the given chat conversation.
762
-
763
- Given a list of messages comprising a conversation, the model will return a response.
764
- This endpoint supports streaming, tool calling, and multiple providers.
765
-
1889
+ Create a model response for the given chat conversation with optional media attachments.
1890
+
1891
+ Given a list of messages comprising a conversation, the model will return a response.
1892
+ This endpoint supports streaming, tool calling, media attachments, and multiple providers.
1893
+
766
1894
  **Key Features:**
767
1895
  - Multi-provider support (OpenAI, Anthropic, Ollama, LMStudio, etc.)
768
1896
  - Streaming responses with server-sent events
769
1897
  - Tool/function calling with automatic syntax conversion
770
- - OpenAI-compatible format
1898
+ - Media attachments (images, documents, data files)
1899
+ - OpenAI Vision API compatible format
771
1900
 
772
1901
  **Provider Format:** Use `provider/model` format in the model field:
773
1902
  - `openai/gpt-4` - OpenAI GPT-4
774
1903
  - `ollama/llama3:latest` - Ollama LLaMA 3
775
1904
  - `anthropic/claude-3-opus-20240229` - Anthropic Claude 3 Opus
776
-
1905
+
1906
+ **Media Attachments:** Support for OpenAI Vision API compatible format:
1907
+ - String content: "Analyze this @image.jpg" (AbstractCore @filename syntax)
1908
+ - Array content: [{"type": "text", "text": "..."}, {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,..."}}]
1909
+ - Supported formats: Images (PNG, JPEG, GIF, WEBP), Documents (PDF, DOCX, XLSX, PPTX), Data (CSV, TSV, TXT, MD)
1910
+ - Size limits: 10MB per file, 32MB total per request
1911
+
777
1912
  **To see available models:** `GET /v1/models?type=text-generation`
778
1913
 
779
1914
  **Returns:** A chat completion object, or a stream of chat completion chunks if streaming is enabled.
@@ -833,11 +1968,40 @@ async def process_chat_completion(
833
1968
  user_agent=http_request.headers.get("user-agent", "")[:50]
834
1969
  )
835
1970
 
1971
+ # Process media from messages
1972
+ all_media_files = []
1973
+ processed_messages = []
1974
+
1975
+ for message in request.messages:
1976
+ clean_text, media_files = process_message_content(message)
1977
+ all_media_files.extend(media_files)
1978
+
1979
+ # Adapt prompt based on media file types to avoid confusion
1980
+ if media_files:
1981
+ adapted_text = adapt_prompt_for_media_types(clean_text, media_files)
1982
+ else:
1983
+ adapted_text = clean_text
1984
+
1985
+ # Create processed message with adapted text
1986
+ processed_message = message.model_copy()
1987
+ processed_message.content = adapted_text
1988
+ processed_messages.append(processed_message)
1989
+
1990
+ # Validate media files if any were found
1991
+ if all_media_files:
1992
+ validate_media_files(all_media_files)
1993
+ logger.info(
1994
+ "📎 Media Files Processed",
1995
+ request_id=request_id,
1996
+ file_count=len(all_media_files),
1997
+ files=[os.path.basename(f) for f in all_media_files[:5]] # Log first 5 filenames
1998
+ )
1999
+
836
2000
  # Create LLM instance
837
2001
  llm = create_llm(provider, model=model)
838
2002
 
839
2003
  # Convert messages
840
- messages = convert_to_abstractcore_messages(request.messages)
2004
+ messages = convert_to_abstractcore_messages(processed_messages)
841
2005
 
842
2006
  # Create syntax rewriter
843
2007
  syntax_rewriter = create_syntax_rewriter(target_format, f"{provider}/{model}")
@@ -846,6 +2010,7 @@ async def process_chat_completion(
846
2010
  gen_kwargs = {
847
2011
  "prompt": "", # Empty when using messages
848
2012
  "messages": messages,
2013
+ "media": all_media_files if all_media_files else None, # Add media files
849
2014
  "temperature": request.temperature,
850
2015
  "max_tokens": request.max_tokens,
851
2016
  "stream": request.stream,
@@ -865,19 +2030,53 @@ async def process_chat_completion(
865
2030
  gen_kwargs["presence_penalty"] = request.presence_penalty
866
2031
 
867
2032
  # Generate response
868
- if request.stream:
869
- return StreamingResponse(
870
- generate_streaming_response(
871
- llm, gen_kwargs, provider, model, syntax_rewriter, request_id
872
- ),
873
- media_type="text/event-stream",
874
- headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
875
- )
876
- else:
877
- response = llm.generate(**gen_kwargs)
878
- return convert_to_openai_response(
879
- response, provider, model, syntax_rewriter, request_id
2033
+ # Only cleanup files created by this request (with our specific prefixes)
2034
+ temp_files_to_cleanup = [
2035
+ f for f in all_media_files
2036
+ if f.startswith("/tmp/") and (
2037
+ "abstractcore_img_" in f or
2038
+ "abstractcore_file_" in f or
2039
+ "abstractcore_b64_" in f or
2040
+ "temp" in f
880
2041
  )
2042
+ ]
2043
+
2044
+ try:
2045
+ if request.stream:
2046
+ return StreamingResponse(
2047
+ generate_streaming_response(
2048
+ llm, gen_kwargs, provider, model, syntax_rewriter, request_id, temp_files_to_cleanup
2049
+ ),
2050
+ media_type="text/event-stream",
2051
+ headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
2052
+ )
2053
+ else:
2054
+ response = llm.generate(**gen_kwargs)
2055
+ return convert_to_openai_response(
2056
+ response, provider, model, syntax_rewriter, request_id
2057
+ )
2058
+ finally:
2059
+ # Cleanup temporary files (base64 and downloaded images) with delay to avoid race conditions
2060
+ import threading
2061
+
2062
+ def delayed_cleanup():
2063
+ """Cleanup temporary files after a short delay to avoid race conditions"""
2064
+ time.sleep(1) # Short delay to ensure generation is complete
2065
+ for temp_file in temp_files_to_cleanup:
2066
+ try:
2067
+ if os.path.exists(temp_file):
2068
+ # Additional check: only delete files created by this session
2069
+ if ("abstractcore_img_" in temp_file or "abstractcore_file_" in temp_file or "abstractcore_b64_" in temp_file):
2070
+ os.unlink(temp_file)
2071
+ logger.debug(f"Cleaned up temporary file: {temp_file}")
2072
+ else:
2073
+ logger.debug(f"Skipped cleanup of non-AbstractCore file: {temp_file}")
2074
+ except Exception as e:
2075
+ logger.warning(f"Failed to cleanup temporary file {temp_file}: {e}")
2076
+
2077
+ # Run cleanup in background thread to avoid blocking response
2078
+ cleanup_thread = threading.Thread(target=delayed_cleanup, daemon=True)
2079
+ cleanup_thread.start()
881
2080
 
882
2081
  except Exception as e:
883
2082
  logger.error(
@@ -897,7 +2096,8 @@ def generate_streaming_response(
897
2096
  provider: str,
898
2097
  model: str,
899
2098
  syntax_rewriter: ToolCallSyntaxRewriter,
900
- request_id: str
2099
+ request_id: str,
2100
+ temp_files_to_cleanup: List[str] = None
901
2101
  ) -> Iterator[str]:
902
2102
  """Generate OpenAI-compatible streaming response with syntax rewriting."""
903
2103
  try:
@@ -983,6 +2183,29 @@ def generate_streaming_response(
983
2183
  has_tool_calls=has_tool_calls
984
2184
  )
985
2185
 
2186
+ # Cleanup temporary files for streaming with delay to avoid race conditions
2187
+ if temp_files_to_cleanup:
2188
+ import threading
2189
+
2190
+ def delayed_streaming_cleanup():
2191
+ """Cleanup temporary files after streaming completes"""
2192
+ time.sleep(2) # Longer delay for streaming to ensure all chunks are sent
2193
+ for temp_file in temp_files_to_cleanup:
2194
+ try:
2195
+ if os.path.exists(temp_file):
2196
+ # Additional check: only delete files created by this session
2197
+ if ("abstractcore_img_" in temp_file or "abstractcore_file_" in temp_file or "abstractcore_b64_" in temp_file):
2198
+ os.unlink(temp_file)
2199
+ logger.debug(f"Cleaned up temporary file during streaming: {temp_file}")
2200
+ else:
2201
+ logger.debug(f"Skipped cleanup of non-AbstractCore streaming file: {temp_file}")
2202
+ except Exception as cleanup_error:
2203
+ logger.warning(f"Failed to cleanup temporary file {temp_file}: {cleanup_error}")
2204
+
2205
+ # Run cleanup in background thread
2206
+ cleanup_thread = threading.Thread(target=delayed_streaming_cleanup, daemon=True)
2207
+ cleanup_thread.start()
2208
+
986
2209
  except Exception as e:
987
2210
  logger.error(
988
2211
  "❌ Streaming failed",
@@ -1072,25 +2295,88 @@ def run_server(host: str = "0.0.0.0", port: int = 8000):
1072
2295
  uvicorn.run(app, host=host, port=port)
1073
2296
 
1074
2297
  # ============================================================================
1075
- # Startup
2298
+ # Server Runner Function
1076
2299
  # ============================================================================
1077
2300
 
1078
- if __name__ == "__main__":
1079
- import uvicorn
2301
+ def run_server_with_args():
2302
+ """Run the server with argument parsing for CLI usage."""
2303
+ parser = argparse.ArgumentParser(
2304
+ description="AbstractCore Server - Universal LLM Gateway with Media Processing",
2305
+ formatter_class=argparse.RawDescriptionHelpFormatter,
2306
+ epilog="""
2307
+ Examples:
2308
+ python -m abstractcore.server.app # Start server with defaults
2309
+ python -m abstractcore.server.app --debug # Start with debug logging
2310
+ python -m abstractcore.server.app --host 127.0.0.1 --port 8080 # Custom host/port
2311
+ python -m abstractcore.server.app --debug --port 8080 # Debug on custom port
2312
+
2313
+ Environment Variables:
2314
+ ABSTRACTCORE_DEBUG=true # Enable debug mode (equivalent to --debug)
2315
+ HOST=127.0.0.1 # Server host (overridden by --host)
2316
+ PORT=8080 # Server port (overridden by --port)
2317
+
2318
+ Debug Mode:
2319
+ The --debug flag enables verbose logging and better error reporting, including:
2320
+ - Detailed HTTP request/response logging
2321
+ - Full error traces for 422 Unprocessable Entity errors
2322
+ - Media processing diagnostics
2323
+ - Provider initialization details
2324
+ """
2325
+ )
2326
+
2327
+ parser.add_argument(
2328
+ '--debug',
2329
+ action='store_true',
2330
+ help='Enable debug logging and show detailed diagnostics (overrides centralized config)'
2331
+ )
2332
+ parser.add_argument(
2333
+ '--host',
2334
+ default=os.getenv("HOST", "0.0.0.0"),
2335
+ help='Host to bind the server to (default: 0.0.0.0)'
2336
+ )
2337
+ parser.add_argument(
2338
+ '--port',
2339
+ type=int,
2340
+ default=int(os.getenv("PORT", "8000")),
2341
+ help='Port to bind the server to (default: 8000)'
2342
+ )
1080
2343
 
1081
- port = int(os.getenv("PORT", "8000"))
1082
- host = os.getenv("HOST", "0.0.0.0")
2344
+ args = parser.parse_args()
2345
+
2346
+ # Reconfigure logging if debug mode is requested (--debug overrides config defaults)
2347
+ if args.debug:
2348
+ reconfigure_for_debug()
2349
+ print("🐛 Debug mode enabled - detailed logging active")
1083
2350
 
1084
2351
  logger.info(
1085
2352
  "🚀 Starting AbstractCore Server",
1086
- host=host,
1087
- port=port,
1088
- debug=debug_mode
2353
+ host=args.host,
2354
+ port=args.port,
2355
+ debug=debug_mode,
2356
+ version=__version__
1089
2357
  )
1090
2358
 
1091
- uvicorn.run(
1092
- app,
1093
- host=host,
1094
- port=port,
1095
- log_level="debug" if debug_mode else "info"
1096
- )
2359
+ # Enhanced uvicorn configuration for debug mode
2360
+ uvicorn_config = {
2361
+ "app": app,
2362
+ "host": args.host,
2363
+ "port": args.port,
2364
+ "log_level": "debug" if debug_mode else "info"
2365
+ }
2366
+
2367
+ # In debug mode, enable more detailed uvicorn logging
2368
+ if debug_mode:
2369
+ uvicorn_config.update({
2370
+ "access_log": True,
2371
+ "use_colors": True,
2372
+ })
2373
+
2374
+ import uvicorn
2375
+ uvicorn.run(**uvicorn_config)
2376
+
2377
+ # ============================================================================
2378
+ # Startup
2379
+ # ============================================================================
2380
+
2381
+ if __name__ == "__main__":
2382
+ run_server_with_args()