abstractcore 2.4.9__py3-none-any.whl → 2.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/__main__.py +8 -1
- abstractcore/apps/deepsearch.py +644 -0
- abstractcore/apps/intent.py +614 -0
- abstractcore/assets/model_capabilities.json +50 -34
- abstractcore/config/__init__.py +10 -0
- abstractcore/{cli → config}/main.py +13 -1
- abstractcore/config/manager.py +355 -0
- abstractcore/core/session.py +46 -1
- abstractcore/processing/__init__.py +5 -1
- abstractcore/processing/basic_deepsearch.py +2173 -0
- abstractcore/processing/basic_intent.py +690 -0
- abstractcore/providers/anthropic_provider.py +1 -0
- abstractcore/providers/base.py +1 -0
- abstractcore/providers/huggingface_provider.py +95 -4
- abstractcore/providers/lmstudio_provider.py +14 -0
- abstractcore/providers/mlx_provider.py +76 -2
- abstractcore/providers/ollama_provider.py +6 -2
- abstractcore/providers/openai_provider.py +1 -0
- abstractcore/providers/registry.py +6 -6
- abstractcore/structured/handler.py +161 -1
- abstractcore/tools/common_tools.py +98 -3
- abstractcore/utils/cli.py +114 -1
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/METADATA +38 -18
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/RECORD +30 -25
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/entry_points.txt +6 -2
- abstractcore/cli/__init__.py +0 -9
- /abstractcore/{cli → config}/vision_config.py +0 -0
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/WHEEL +0 -0
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.4.9.dist-info → abstractcore-2.5.2.dist-info}/top_level.txt +0 -0
|
@@ -32,6 +32,7 @@ class AnthropicProvider(BaseProvider):
|
|
|
32
32
|
|
|
33
33
|
def __init__(self, model: str = "claude-3-haiku-20240307", api_key: Optional[str] = None, **kwargs):
|
|
34
34
|
super().__init__(model, **kwargs)
|
|
35
|
+
self.provider = "anthropic"
|
|
35
36
|
|
|
36
37
|
if not ANTHROPIC_AVAILABLE:
|
|
37
38
|
raise ImportError("Anthropic package not installed. Install with: pip install anthropic")
|
abstractcore/providers/base.py
CHANGED
|
@@ -38,6 +38,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
38
38
|
|
|
39
39
|
def __init__(self, model: str, **kwargs):
|
|
40
40
|
AbstractCoreInterface.__init__(self, model, **kwargs)
|
|
41
|
+
self.provider = None
|
|
41
42
|
|
|
42
43
|
# Setup structured logging
|
|
43
44
|
self.logger = get_logger(self.__class__.__name__)
|
|
@@ -35,6 +35,13 @@ try:
|
|
|
35
35
|
except ImportError:
|
|
36
36
|
LLAMACPP_AVAILABLE = False
|
|
37
37
|
|
|
38
|
+
# Try to import Outlines (native structured output for transformers models)
|
|
39
|
+
try:
|
|
40
|
+
import outlines
|
|
41
|
+
OUTLINES_AVAILABLE = True
|
|
42
|
+
except ImportError:
|
|
43
|
+
OUTLINES_AVAILABLE = False
|
|
44
|
+
|
|
38
45
|
# We no longer download models - cache-only approach
|
|
39
46
|
# huggingface_hub not required for basic operation
|
|
40
47
|
|
|
@@ -42,9 +49,10 @@ except ImportError:
|
|
|
42
49
|
class HuggingFaceProvider(BaseProvider):
|
|
43
50
|
"""HuggingFace provider with dual support for transformers and GGUF models"""
|
|
44
51
|
|
|
45
|
-
def __init__(self, model: str = "
|
|
52
|
+
def __init__(self, model: str = "unsloth/Qwen3-4B-Instruct-2507-GGUF",
|
|
46
53
|
device: Optional[str] = None,
|
|
47
54
|
n_gpu_layers: Optional[int] = None,
|
|
55
|
+
structured_output_method: str = "auto",
|
|
48
56
|
**kwargs):
|
|
49
57
|
|
|
50
58
|
# Handle legacy context_size parameter with deprecation warning
|
|
@@ -61,10 +69,18 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
61
69
|
kwargs["max_tokens"] = context_size
|
|
62
70
|
|
|
63
71
|
super().__init__(model, **kwargs)
|
|
72
|
+
self.provider = "huggingface"
|
|
64
73
|
|
|
65
74
|
# Handle timeout parameter for local models
|
|
66
75
|
self._handle_timeout_parameter(kwargs)
|
|
67
76
|
|
|
77
|
+
# Structured output method: "auto", "native_outlines", "prompted"
|
|
78
|
+
# auto: Use Outlines if available (for transformers), otherwise prompted (default)
|
|
79
|
+
# native_outlines: Force Outlines (error if unavailable)
|
|
80
|
+
# prompted: Always use prompted fallback (fastest for transformers, still 100% success)
|
|
81
|
+
# Note: GGUF models always use llama-cpp-python native support regardless of this setting
|
|
82
|
+
self.structured_output_method = structured_output_method
|
|
83
|
+
|
|
68
84
|
# Initialize tool handler
|
|
69
85
|
self.tool_handler = UniversalToolHandler(model)
|
|
70
86
|
|
|
@@ -481,9 +497,9 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
481
497
|
"""Generate response using appropriate backend"""
|
|
482
498
|
|
|
483
499
|
if self.model_type == "gguf":
|
|
484
|
-
return self._generate_gguf(prompt, messages, system_prompt, tools, media, stream, **kwargs)
|
|
500
|
+
return self._generate_gguf(prompt, messages, system_prompt, tools, media, stream, response_model, **kwargs)
|
|
485
501
|
else:
|
|
486
|
-
return self._generate_transformers(prompt, messages, system_prompt, tools, media, stream, **kwargs)
|
|
502
|
+
return self._generate_transformers(prompt, messages, system_prompt, tools, media, stream, response_model, **kwargs)
|
|
487
503
|
|
|
488
504
|
def _generate_transformers(self,
|
|
489
505
|
prompt: str,
|
|
@@ -492,8 +508,9 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
492
508
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
493
509
|
media: Optional[List['MediaContent']] = None,
|
|
494
510
|
stream: bool = False,
|
|
511
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
495
512
|
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
|
496
|
-
"""Generate using transformers backend
|
|
513
|
+
"""Generate using transformers backend with optional Outlines native structured output"""
|
|
497
514
|
|
|
498
515
|
if not self.pipeline:
|
|
499
516
|
return GenerateResponse(
|
|
@@ -502,6 +519,66 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
502
519
|
finish_reason="error"
|
|
503
520
|
)
|
|
504
521
|
|
|
522
|
+
# Native structured output via Outlines (if configured and available)
|
|
523
|
+
should_use_outlines = (
|
|
524
|
+
response_model and
|
|
525
|
+
PYDANTIC_AVAILABLE and
|
|
526
|
+
not stream and
|
|
527
|
+
self.structured_output_method != "prompted" # Skip if explicitly prompted
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
if should_use_outlines:
|
|
531
|
+
# Check if Outlines is required but unavailable
|
|
532
|
+
if self.structured_output_method == "native_outlines" and not OUTLINES_AVAILABLE:
|
|
533
|
+
return GenerateResponse(
|
|
534
|
+
content="Error: structured_output_method='native_outlines' requires Outlines library. Install with: pip install abstractcore[huggingface]",
|
|
535
|
+
model=self.model,
|
|
536
|
+
finish_reason="error"
|
|
537
|
+
)
|
|
538
|
+
|
|
539
|
+
# Try Outlines if available (auto or native_outlines mode)
|
|
540
|
+
if OUTLINES_AVAILABLE:
|
|
541
|
+
try:
|
|
542
|
+
# Cache Outlines model wrapper to avoid re-initialization
|
|
543
|
+
if not hasattr(self, '_outlines_model') or self._outlines_model is None:
|
|
544
|
+
self.logger.debug("Creating Outlines model wrapper for native structured output")
|
|
545
|
+
self._outlines_model = outlines.from_transformers(
|
|
546
|
+
self.model_instance,
|
|
547
|
+
self.tokenizer
|
|
548
|
+
)
|
|
549
|
+
|
|
550
|
+
# Build input text (same as normal generation)
|
|
551
|
+
input_text = self._build_input_text_transformers(prompt, messages, system_prompt, tools)
|
|
552
|
+
|
|
553
|
+
# Create constrained generator with JSON schema
|
|
554
|
+
self.logger.debug(f"Using Outlines native structured output for {response_model.__name__}")
|
|
555
|
+
generator = self._outlines_model(
|
|
556
|
+
input_text,
|
|
557
|
+
outlines.json_schema(response_model),
|
|
558
|
+
max_tokens=kwargs.get("max_tokens", self.max_tokens or 512)
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
# Validate and return
|
|
562
|
+
validated_obj = response_model.model_validate(generator)
|
|
563
|
+
|
|
564
|
+
return GenerateResponse(
|
|
565
|
+
content=validated_obj.model_dump_json(),
|
|
566
|
+
model=self.model,
|
|
567
|
+
finish_reason="stop",
|
|
568
|
+
validated_object=validated_obj
|
|
569
|
+
)
|
|
570
|
+
except Exception as e:
|
|
571
|
+
# If native_outlines was explicitly requested, don't fall back
|
|
572
|
+
if self.structured_output_method == "native_outlines":
|
|
573
|
+
return GenerateResponse(
|
|
574
|
+
content=f"Error: Outlines native structured output failed: {str(e)}",
|
|
575
|
+
model=self.model,
|
|
576
|
+
finish_reason="error"
|
|
577
|
+
)
|
|
578
|
+
# Otherwise fall back to prompted approach
|
|
579
|
+
self.logger.debug(f"Outlines generation failed, falling back to prompted: {e}")
|
|
580
|
+
# Continue with normal generation below
|
|
581
|
+
|
|
505
582
|
# Build input text with tool and media support
|
|
506
583
|
# Handle media content first if present
|
|
507
584
|
if media:
|
|
@@ -568,6 +645,7 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
568
645
|
tools: Optional[List[Dict[str, Any]]] = None,
|
|
569
646
|
media: Optional[List['MediaContent']] = None,
|
|
570
647
|
stream: bool = False,
|
|
648
|
+
response_model: Optional[Type[BaseModel]] = None,
|
|
571
649
|
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
|
572
650
|
"""Generate using GGUF backend with llama-cpp-python"""
|
|
573
651
|
|
|
@@ -663,6 +741,19 @@ class HuggingFaceProvider(BaseProvider):
|
|
|
663
741
|
if seed_value is not None:
|
|
664
742
|
generation_kwargs["seed"] = seed_value
|
|
665
743
|
|
|
744
|
+
# Add native structured output support (llama-cpp-python format)
|
|
745
|
+
# llama-cpp-python supports native structured outputs using the response_format parameter
|
|
746
|
+
# This provides server-side guaranteed schema compliance
|
|
747
|
+
if response_model and PYDANTIC_AVAILABLE:
|
|
748
|
+
json_schema = response_model.model_json_schema()
|
|
749
|
+
generation_kwargs["response_format"] = {
|
|
750
|
+
"type": "json_schema",
|
|
751
|
+
"json_schema": {
|
|
752
|
+
"name": response_model.__name__,
|
|
753
|
+
"schema": json_schema
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
|
|
666
757
|
# Handle tools - both native and prompted support
|
|
667
758
|
has_native_tools = False
|
|
668
759
|
if tools:
|
|
@@ -25,6 +25,7 @@ class LMStudioProvider(BaseProvider):
|
|
|
25
25
|
|
|
26
26
|
def __init__(self, model: str = "local-model", base_url: str = "http://localhost:1234/v1", **kwargs):
|
|
27
27
|
super().__init__(model, **kwargs)
|
|
28
|
+
self.provider = "lmstudio"
|
|
28
29
|
|
|
29
30
|
# Initialize tool handler
|
|
30
31
|
self.tool_handler = UniversalToolHandler(model)
|
|
@@ -207,6 +208,19 @@ class LMStudioProvider(BaseProvider):
|
|
|
207
208
|
if seed_value is not None:
|
|
208
209
|
payload["seed"] = seed_value
|
|
209
210
|
|
|
211
|
+
# Add structured output support (OpenAI-compatible format)
|
|
212
|
+
# LMStudio supports native structured outputs using the response_format parameter
|
|
213
|
+
# This provides server-side guaranteed schema compliance
|
|
214
|
+
if response_model and PYDANTIC_AVAILABLE:
|
|
215
|
+
json_schema = response_model.model_json_schema()
|
|
216
|
+
payload["response_format"] = {
|
|
217
|
+
"type": "json_schema",
|
|
218
|
+
"json_schema": {
|
|
219
|
+
"name": response_model.__name__,
|
|
220
|
+
"schema": json_schema
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
210
224
|
if stream:
|
|
211
225
|
# Return streaming response - BaseProvider will handle tag rewriting via UnifiedStreamProcessor
|
|
212
226
|
return self._stream_generate(payload)
|
|
@@ -11,6 +11,14 @@ try:
|
|
|
11
11
|
except ImportError:
|
|
12
12
|
PYDANTIC_AVAILABLE = False
|
|
13
13
|
BaseModel = None
|
|
14
|
+
|
|
15
|
+
# Try to import Outlines (native structured output for MLX models)
|
|
16
|
+
try:
|
|
17
|
+
import outlines
|
|
18
|
+
OUTLINES_AVAILABLE = True
|
|
19
|
+
except ImportError:
|
|
20
|
+
OUTLINES_AVAILABLE = False
|
|
21
|
+
|
|
14
22
|
from .base import BaseProvider
|
|
15
23
|
from ..core.types import GenerateResponse
|
|
16
24
|
from ..exceptions import ProviderAPIError, ModelNotFoundError, format_model_error
|
|
@@ -21,12 +29,20 @@ from ..events import EventType
|
|
|
21
29
|
class MLXProvider(BaseProvider):
|
|
22
30
|
"""MLX provider for Apple Silicon models with full integration"""
|
|
23
31
|
|
|
24
|
-
def __init__(self, model: str = "mlx-community/Mistral-7B-Instruct-v0.1-4bit",
|
|
32
|
+
def __init__(self, model: str = "mlx-community/Mistral-7B-Instruct-v0.1-4bit",
|
|
33
|
+
structured_output_method: str = "auto", **kwargs):
|
|
25
34
|
super().__init__(model, **kwargs)
|
|
35
|
+
self.provider = "mlx"
|
|
26
36
|
|
|
27
37
|
# Handle timeout parameter for local models
|
|
28
38
|
self._handle_timeout_parameter(kwargs)
|
|
29
39
|
|
|
40
|
+
# Structured output method: "auto", "native_outlines", "prompted"
|
|
41
|
+
# auto: Use Outlines if available, otherwise prompted (default)
|
|
42
|
+
# native_outlines: Force Outlines (error if unavailable)
|
|
43
|
+
# prompted: Always use prompted fallback (fastest, still 100% success)
|
|
44
|
+
self.structured_output_method = structured_output_method
|
|
45
|
+
|
|
30
46
|
# Initialize tool handler
|
|
31
47
|
self.tool_handler = UniversalToolHandler(model)
|
|
32
48
|
|
|
@@ -143,7 +159,7 @@ class MLXProvider(BaseProvider):
|
|
|
143
159
|
stream: bool = False,
|
|
144
160
|
response_model: Optional[Type[BaseModel]] = None,
|
|
145
161
|
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
|
|
146
|
-
"""Internal generation with MLX"""
|
|
162
|
+
"""Internal generation with MLX and optional Outlines native structured output"""
|
|
147
163
|
|
|
148
164
|
if not self.llm or not self.tokenizer:
|
|
149
165
|
return GenerateResponse(
|
|
@@ -152,6 +168,64 @@ class MLXProvider(BaseProvider):
|
|
|
152
168
|
finish_reason="error"
|
|
153
169
|
)
|
|
154
170
|
|
|
171
|
+
# Native structured output via Outlines (if configured and available)
|
|
172
|
+
should_use_outlines = (
|
|
173
|
+
response_model and
|
|
174
|
+
PYDANTIC_AVAILABLE and
|
|
175
|
+
not stream and
|
|
176
|
+
self.structured_output_method != "prompted" # Skip if explicitly prompted
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
if should_use_outlines:
|
|
180
|
+
# Check if Outlines is required but unavailable
|
|
181
|
+
if self.structured_output_method == "native_outlines" and not OUTLINES_AVAILABLE:
|
|
182
|
+
return GenerateResponse(
|
|
183
|
+
content="Error: structured_output_method='native_outlines' requires Outlines library. Install with: pip install abstractcore[mlx]",
|
|
184
|
+
model=self.model,
|
|
185
|
+
finish_reason="error"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Try Outlines if available (auto or native_outlines mode)
|
|
189
|
+
if OUTLINES_AVAILABLE:
|
|
190
|
+
try:
|
|
191
|
+
# Cache Outlines MLX model wrapper to avoid re-initialization
|
|
192
|
+
if not hasattr(self, '_outlines_model') or self._outlines_model is None:
|
|
193
|
+
self.logger.debug("Creating Outlines MLX model wrapper for native structured output")
|
|
194
|
+
self._outlines_model = outlines.from_mlxlm(self.llm, self.tokenizer)
|
|
195
|
+
|
|
196
|
+
# Build full prompt (same as normal generation)
|
|
197
|
+
processed_prompt = prompt
|
|
198
|
+
full_prompt = self._build_prompt(processed_prompt, messages, system_prompt, tools)
|
|
199
|
+
|
|
200
|
+
# Create constrained generator with JSON schema
|
|
201
|
+
self.logger.debug(f"Using Outlines native structured output for {response_model.__name__}")
|
|
202
|
+
generator = self._outlines_model(
|
|
203
|
+
full_prompt,
|
|
204
|
+
outlines.json_schema(response_model),
|
|
205
|
+
max_tokens=kwargs.get("max_tokens", self.max_tokens or 512)
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Validate and return
|
|
209
|
+
validated_obj = response_model.model_validate(generator)
|
|
210
|
+
|
|
211
|
+
return GenerateResponse(
|
|
212
|
+
content=validated_obj.model_dump_json(),
|
|
213
|
+
model=self.model,
|
|
214
|
+
finish_reason="stop",
|
|
215
|
+
validated_object=validated_obj
|
|
216
|
+
)
|
|
217
|
+
except Exception as e:
|
|
218
|
+
# If native_outlines was explicitly requested, don't fall back
|
|
219
|
+
if self.structured_output_method == "native_outlines":
|
|
220
|
+
return GenerateResponse(
|
|
221
|
+
content=f"Error: Outlines native structured output failed: {str(e)}",
|
|
222
|
+
model=self.model,
|
|
223
|
+
finish_reason="error"
|
|
224
|
+
)
|
|
225
|
+
# Otherwise fall back to prompted approach
|
|
226
|
+
self.logger.debug(f"Outlines generation failed, falling back to prompted: {e}")
|
|
227
|
+
# Continue with normal generation below
|
|
228
|
+
|
|
155
229
|
# Handle media content first if present
|
|
156
230
|
processed_prompt = prompt
|
|
157
231
|
if media:
|
|
@@ -23,8 +23,10 @@ from ..events import EventType
|
|
|
23
23
|
class OllamaProvider(BaseProvider):
|
|
24
24
|
"""Ollama provider for local models with full integration"""
|
|
25
25
|
|
|
26
|
-
def __init__(self, model: str = "
|
|
26
|
+
def __init__(self, model: str = "qwen3:4b-instruct-2507-q4_K_M", base_url: str = "http://localhost:11434", **kwargs):
|
|
27
27
|
super().__init__(model, **kwargs)
|
|
28
|
+
self.provider = "ollama"
|
|
29
|
+
|
|
28
30
|
self.base_url = base_url.rstrip('/')
|
|
29
31
|
self.client = httpx.Client(timeout=self._timeout)
|
|
30
32
|
|
|
@@ -143,9 +145,11 @@ class OllamaProvider(BaseProvider):
|
|
|
143
145
|
payload["options"]["seed"] = seed_value
|
|
144
146
|
|
|
145
147
|
# Add structured output support (Ollama native JSON schema)
|
|
148
|
+
# Ollama accepts the full JSON schema in the "format" parameter
|
|
149
|
+
# This provides server-side guaranteed schema compliance
|
|
146
150
|
if response_model and PYDANTIC_AVAILABLE:
|
|
147
151
|
json_schema = response_model.model_json_schema()
|
|
148
|
-
payload["format"] = json_schema
|
|
152
|
+
payload["format"] = json_schema # Pass the full schema, not just "json"
|
|
149
153
|
|
|
150
154
|
# Use chat format by default (recommended by Ollama docs), especially when tools are present
|
|
151
155
|
# Only use generate format for very simple cases without tools or messages
|
|
@@ -32,6 +32,7 @@ class OpenAIProvider(BaseProvider):
|
|
|
32
32
|
|
|
33
33
|
def __init__(self, model: str = "gpt-3.5-turbo", api_key: Optional[str] = None, **kwargs):
|
|
34
34
|
super().__init__(model, **kwargs)
|
|
35
|
+
self.provider = "openai"
|
|
35
36
|
|
|
36
37
|
if not OPENAI_AVAILABLE:
|
|
37
38
|
raise ImportError("OpenAI package not installed. Install with: pip install openai")
|
|
@@ -86,8 +86,8 @@ class ProviderRegistry:
|
|
|
86
86
|
display_name="Ollama",
|
|
87
87
|
provider_class=None,
|
|
88
88
|
description="Local LLM server for running open-source models",
|
|
89
|
-
default_model="qwen3-
|
|
90
|
-
supported_features=["chat", "completion", "embeddings", "prompted_tools", "streaming"],
|
|
89
|
+
default_model="qwen3:4b-instruct-2507-q4_K_M",
|
|
90
|
+
supported_features=["chat", "completion", "embeddings", "prompted_tools", "streaming", "structured_output"],
|
|
91
91
|
authentication_required=False,
|
|
92
92
|
local_provider=True,
|
|
93
93
|
installation_extras="ollama",
|
|
@@ -101,7 +101,7 @@ class ProviderRegistry:
|
|
|
101
101
|
provider_class=None,
|
|
102
102
|
description="Local model development and testing platform",
|
|
103
103
|
default_model="qwen/qwen3-4b-2507",
|
|
104
|
-
supported_features=["chat", "completion", "embeddings", "prompted_tools", "streaming"],
|
|
104
|
+
supported_features=["chat", "completion", "embeddings", "prompted_tools", "streaming", "structured_output"],
|
|
105
105
|
authentication_required=False,
|
|
106
106
|
local_provider=True,
|
|
107
107
|
installation_extras=None,
|
|
@@ -115,7 +115,7 @@ class ProviderRegistry:
|
|
|
115
115
|
provider_class=None,
|
|
116
116
|
description="Apple Silicon optimized local inference",
|
|
117
117
|
default_model="mlx-community/Qwen3-4B",
|
|
118
|
-
supported_features=["chat", "completion", "prompted_tools", "streaming", "apple_silicon"],
|
|
118
|
+
supported_features=["chat", "completion", "prompted_tools", "streaming", "structured_output", "apple_silicon"],
|
|
119
119
|
authentication_required=False,
|
|
120
120
|
local_provider=True,
|
|
121
121
|
installation_extras="mlx",
|
|
@@ -128,8 +128,8 @@ class ProviderRegistry:
|
|
|
128
128
|
display_name="HuggingFace",
|
|
129
129
|
provider_class=None,
|
|
130
130
|
description="Access to HuggingFace models (transformers and embeddings)",
|
|
131
|
-
default_model="
|
|
132
|
-
supported_features=["chat", "completion", "embeddings", "prompted_tools", "local_models"],
|
|
131
|
+
default_model="unsloth/Qwen3-4B-Instruct-2507-GGUF",
|
|
132
|
+
supported_features=["chat", "completion", "embeddings", "prompted_tools", "local_models", "structured_output"],
|
|
133
133
|
authentication_required=False, # Optional for public models
|
|
134
134
|
local_provider=True,
|
|
135
135
|
installation_extras="huggingface",
|
|
@@ -6,6 +6,7 @@ import json
|
|
|
6
6
|
import re
|
|
7
7
|
import time
|
|
8
8
|
from typing import Type, Dict, Any, Optional
|
|
9
|
+
from enum import Enum
|
|
9
10
|
from pydantic import BaseModel, ValidationError
|
|
10
11
|
|
|
11
12
|
from .retry import FeedbackRetry
|
|
@@ -69,6 +70,9 @@ class StructuredOutputHandler:
|
|
|
69
70
|
max_retries=self.retry_strategy.max_attempts)
|
|
70
71
|
|
|
71
72
|
try:
|
|
73
|
+
# Store provider for schema generation
|
|
74
|
+
self.current_provider = provider
|
|
75
|
+
|
|
72
76
|
# Strategy 1: Use native support if available
|
|
73
77
|
if self._has_native_support(provider):
|
|
74
78
|
self.logger.debug("Using native structured output support",
|
|
@@ -125,12 +129,44 @@ class StructuredOutputHandler:
|
|
|
125
129
|
"""
|
|
126
130
|
Check if provider has native structured output support.
|
|
127
131
|
|
|
132
|
+
Checks both provider type (Ollama, LMStudio, HuggingFace, MLX with Outlines)
|
|
133
|
+
and model capabilities configuration as fallback.
|
|
134
|
+
|
|
128
135
|
Args:
|
|
129
136
|
provider: The LLM provider instance
|
|
130
137
|
|
|
131
138
|
Returns:
|
|
132
139
|
True if provider supports native structured outputs
|
|
133
140
|
"""
|
|
141
|
+
# Ollama and LMStudio always support native structured outputs
|
|
142
|
+
# via the format and response_format parameters respectively
|
|
143
|
+
provider_name = provider.__class__.__name__
|
|
144
|
+
if provider_name in ['OllamaProvider', 'LMStudioProvider']:
|
|
145
|
+
return True
|
|
146
|
+
|
|
147
|
+
# HuggingFaceProvider supports native via GGUF or Transformers+Outlines
|
|
148
|
+
if provider_name == 'HuggingFaceProvider':
|
|
149
|
+
# Check if it's a GGUF model - these use llama-cpp-python which supports native structured outputs
|
|
150
|
+
if hasattr(provider, 'model_type') and provider.model_type == 'gguf':
|
|
151
|
+
return True
|
|
152
|
+
|
|
153
|
+
# Check if it's a Transformers model with Outlines available
|
|
154
|
+
if hasattr(provider, 'model_type') and provider.model_type == 'transformers':
|
|
155
|
+
try:
|
|
156
|
+
import outlines
|
|
157
|
+
return True
|
|
158
|
+
except ImportError:
|
|
159
|
+
return False
|
|
160
|
+
|
|
161
|
+
# MLXProvider supports native via Outlines
|
|
162
|
+
if provider_name == 'MLXProvider':
|
|
163
|
+
try:
|
|
164
|
+
import outlines
|
|
165
|
+
return True
|
|
166
|
+
except ImportError:
|
|
167
|
+
return False
|
|
168
|
+
|
|
169
|
+
# For other providers, check model capabilities
|
|
134
170
|
capabilities = getattr(provider, 'model_capabilities', {})
|
|
135
171
|
return capabilities.get("structured_output") == "native"
|
|
136
172
|
|
|
@@ -242,6 +278,9 @@ class StructuredOutputHandler:
|
|
|
242
278
|
# Try parsing the extracted JSON
|
|
243
279
|
try:
|
|
244
280
|
data = json.loads(json_content)
|
|
281
|
+
# Preprocess enum responses if we have mappings
|
|
282
|
+
if hasattr(self, '_enum_mappings') and self._enum_mappings:
|
|
283
|
+
data = self._preprocess_enum_response(data, self._enum_mappings)
|
|
245
284
|
result = response_model.model_validate(data)
|
|
246
285
|
except (json.JSONDecodeError, ValidationError) as parse_error:
|
|
247
286
|
# Try to fix the JSON
|
|
@@ -254,6 +293,9 @@ class StructuredOutputHandler:
|
|
|
254
293
|
if fixed_json:
|
|
255
294
|
try:
|
|
256
295
|
data = json.loads(fixed_json)
|
|
296
|
+
# Preprocess enum responses if we have mappings
|
|
297
|
+
if hasattr(self, '_enum_mappings') and self._enum_mappings:
|
|
298
|
+
data = self._preprocess_enum_response(data, self._enum_mappings)
|
|
257
299
|
result = response_model.model_validate(data)
|
|
258
300
|
self.logger.info("JSON self-fix successful", attempt=attempt + 1)
|
|
259
301
|
except (json.JSONDecodeError, ValidationError) as fix_error:
|
|
@@ -350,6 +392,14 @@ class StructuredOutputHandler:
|
|
|
350
392
|
Enhanced prompt with schema information
|
|
351
393
|
"""
|
|
352
394
|
schema = response_model.model_json_schema()
|
|
395
|
+
|
|
396
|
+
# For prompted providers, simplify enum schemas to avoid LLM confusion
|
|
397
|
+
# Store original enum mappings for response preprocessing
|
|
398
|
+
if hasattr(self, 'current_provider') and not self._has_native_support(self.current_provider):
|
|
399
|
+
schema, self._enum_mappings = self._simplify_enum_schemas(schema)
|
|
400
|
+
else:
|
|
401
|
+
self._enum_mappings = {}
|
|
402
|
+
|
|
353
403
|
model_name = response_model.__name__
|
|
354
404
|
|
|
355
405
|
# Create example from schema
|
|
@@ -432,4 +482,114 @@ Important: Return ONLY the JSON object, no additional text or formatting."""
|
|
|
432
482
|
return match.group(0)
|
|
433
483
|
|
|
434
484
|
# If nothing found, try the original content
|
|
435
|
-
return content
|
|
485
|
+
return content
|
|
486
|
+
|
|
487
|
+
def _simplify_enum_schemas(self, schema: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Dict[str, str]]]:
|
|
488
|
+
"""
|
|
489
|
+
Simplify enum schemas for prompted providers while preserving enum mappings.
|
|
490
|
+
|
|
491
|
+
Args:
|
|
492
|
+
schema: Original JSON schema
|
|
493
|
+
|
|
494
|
+
Returns:
|
|
495
|
+
Tuple of (simplified_schema, enum_mappings)
|
|
496
|
+
enum_mappings maps field_paths to {enum_notation: enum_value}
|
|
497
|
+
"""
|
|
498
|
+
if '$defs' not in schema:
|
|
499
|
+
return schema, {}
|
|
500
|
+
|
|
501
|
+
# Find enum definitions and build mappings
|
|
502
|
+
enum_mappings = {}
|
|
503
|
+
enum_refs_to_simplify = {}
|
|
504
|
+
|
|
505
|
+
for def_name, def_schema in schema['$defs'].items():
|
|
506
|
+
if def_schema.get('type') == 'string' and 'enum' in def_schema:
|
|
507
|
+
ref_key = f"#/$defs/{def_name}"
|
|
508
|
+
enum_values = def_schema['enum']
|
|
509
|
+
|
|
510
|
+
# Build mapping from Python enum notation to actual values
|
|
511
|
+
enum_class_name = def_name
|
|
512
|
+
field_mappings = {}
|
|
513
|
+
for value in enum_values:
|
|
514
|
+
# Map both "EnumClass.VALUE_NAME" and "<EnumClass.VALUE_NAME: 'value'>" patterns
|
|
515
|
+
enum_notation = f"{enum_class_name}.{value.upper().replace(' ', '_')}"
|
|
516
|
+
field_mappings[enum_notation] = value
|
|
517
|
+
# Also handle the repr format
|
|
518
|
+
repr_notation = f"<{enum_class_name}.{value.upper().replace(' ', '_')}: '{value}'>"
|
|
519
|
+
field_mappings[repr_notation] = value
|
|
520
|
+
|
|
521
|
+
enum_refs_to_simplify[ref_key] = {
|
|
522
|
+
'type': 'string',
|
|
523
|
+
'description': f"Use one of: {', '.join(enum_values)}. IMPORTANT: Use the exact string values, not Python enum notation.",
|
|
524
|
+
'enum': enum_values
|
|
525
|
+
}
|
|
526
|
+
|
|
527
|
+
# Store mappings by reference for later use
|
|
528
|
+
enum_mappings[ref_key] = field_mappings
|
|
529
|
+
|
|
530
|
+
# Create simplified schema by replacing enum references
|
|
531
|
+
def replace_enum_refs(obj, path=""):
|
|
532
|
+
if isinstance(obj, dict):
|
|
533
|
+
if '$ref' in obj and obj['$ref'] in enum_refs_to_simplify:
|
|
534
|
+
# Store the field path for this enum reference
|
|
535
|
+
if path:
|
|
536
|
+
enum_mappings[path] = enum_mappings[obj['$ref']]
|
|
537
|
+
return enum_refs_to_simplify[obj['$ref']]
|
|
538
|
+
return {k: replace_enum_refs(v, f"{path}.{k}" if path else k) for k, v in obj.items()}
|
|
539
|
+
elif isinstance(obj, list):
|
|
540
|
+
return [replace_enum_refs(item, path) for item in obj]
|
|
541
|
+
return obj
|
|
542
|
+
|
|
543
|
+
simplified_schema = replace_enum_refs(schema)
|
|
544
|
+
|
|
545
|
+
# Remove the $defs section since we've inlined the enum definitions
|
|
546
|
+
if '$defs' in simplified_schema:
|
|
547
|
+
# Only remove enum definitions, keep other definitions
|
|
548
|
+
remaining_defs = {k: v for k, v in simplified_schema['$defs'].items()
|
|
549
|
+
if not (v.get('type') == 'string' and 'enum' in v)}
|
|
550
|
+
if remaining_defs:
|
|
551
|
+
simplified_schema['$defs'] = remaining_defs
|
|
552
|
+
else:
|
|
553
|
+
del simplified_schema['$defs']
|
|
554
|
+
|
|
555
|
+
return simplified_schema, enum_mappings
|
|
556
|
+
|
|
557
|
+
def _preprocess_enum_response(self, data: Dict[str, Any], enum_mappings: Dict[str, Dict[str, str]]) -> Dict[str, Any]:
|
|
558
|
+
"""
|
|
559
|
+
Preprocess LLM response to convert Python enum notation back to valid enum values.
|
|
560
|
+
|
|
561
|
+
Args:
|
|
562
|
+
data: Parsed JSON data from LLM
|
|
563
|
+
enum_mappings: Mappings from field paths to enum notation conversions
|
|
564
|
+
|
|
565
|
+
Returns:
|
|
566
|
+
Preprocessed data with enum notations converted to valid values
|
|
567
|
+
"""
|
|
568
|
+
if not enum_mappings:
|
|
569
|
+
return data
|
|
570
|
+
|
|
571
|
+
def convert_enum_values(obj, path=""):
|
|
572
|
+
if isinstance(obj, dict):
|
|
573
|
+
result = {}
|
|
574
|
+
for key, value in obj.items():
|
|
575
|
+
field_path = f"{path}.{key}" if path else key
|
|
576
|
+
|
|
577
|
+
# Check if this field has enum mappings
|
|
578
|
+
field_mappings = None
|
|
579
|
+
for enum_path, mappings in enum_mappings.items():
|
|
580
|
+
if field_path in enum_path or enum_path in field_path:
|
|
581
|
+
field_mappings = mappings
|
|
582
|
+
break
|
|
583
|
+
|
|
584
|
+
if field_mappings and isinstance(value, str):
|
|
585
|
+
# Try to convert enum notation to actual value
|
|
586
|
+
converted_value = field_mappings.get(value, value)
|
|
587
|
+
result[key] = converted_value
|
|
588
|
+
else:
|
|
589
|
+
result[key] = convert_enum_values(value, field_path)
|
|
590
|
+
return result
|
|
591
|
+
elif isinstance(obj, list):
|
|
592
|
+
return [convert_enum_values(item, path) for item in obj]
|
|
593
|
+
return obj
|
|
594
|
+
|
|
595
|
+
return convert_enum_values(data)
|