abstractcore 2.4.2__py3-none-any.whl → 2.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. abstractcore/apps/app_config_utils.py +19 -0
  2. abstractcore/apps/summarizer.py +85 -56
  3. abstractcore/architectures/detection.py +15 -4
  4. abstractcore/assets/architecture_formats.json +1 -1
  5. abstractcore/assets/model_capabilities.json +420 -11
  6. abstractcore/core/interface.py +2 -0
  7. abstractcore/core/session.py +4 -0
  8. abstractcore/embeddings/manager.py +54 -16
  9. abstractcore/media/__init__.py +116 -148
  10. abstractcore/media/auto_handler.py +363 -0
  11. abstractcore/media/base.py +456 -0
  12. abstractcore/media/capabilities.py +335 -0
  13. abstractcore/media/types.py +300 -0
  14. abstractcore/media/vision_fallback.py +260 -0
  15. abstractcore/providers/anthropic_provider.py +18 -1
  16. abstractcore/providers/base.py +187 -0
  17. abstractcore/providers/huggingface_provider.py +111 -12
  18. abstractcore/providers/lmstudio_provider.py +88 -5
  19. abstractcore/providers/mlx_provider.py +33 -1
  20. abstractcore/providers/ollama_provider.py +37 -3
  21. abstractcore/providers/openai_provider.py +18 -1
  22. abstractcore/server/app.py +1390 -104
  23. abstractcore/tools/common_tools.py +12 -8
  24. abstractcore/utils/__init__.py +9 -5
  25. abstractcore/utils/cli.py +199 -17
  26. abstractcore/utils/message_preprocessor.py +182 -0
  27. abstractcore/utils/structured_logging.py +117 -16
  28. abstractcore/utils/version.py +1 -1
  29. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/METADATA +214 -20
  30. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/RECORD +34 -27
  31. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/entry_points.txt +1 -0
  32. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/WHEEL +0 -0
  33. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/licenses/LICENSE +0 -0
  34. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/top_level.txt +0 -0
@@ -13,16 +13,40 @@
13
13
  "aliases": [],
14
14
  "max_tokens": 128000
15
15
  },
16
- "gpt-4o": {
16
+ "gpt-4-turbo-with-vision": {
17
17
  "max_output_tokens": 4096,
18
18
  "tool_support": "native",
19
19
  "structured_output": "native",
20
20
  "parallel_tools": true,
21
21
  "max_tools": -1,
22
22
  "vision_support": true,
23
+ "audio_support": false,
24
+ "image_resolutions": [
25
+ "variable"
26
+ ],
27
+ "notes": "GPT-4 Turbo with vision capabilities",
28
+ "source": "OpenAI official docs 2025",
29
+ "canonical_name": "gpt-4-turbo-with-vision",
30
+ "aliases": [
31
+ "gpt-4-turbo-vision",
32
+ "gpt-4-vision-preview"
33
+ ],
34
+ "max_tokens": 128000
35
+ },
36
+ "gpt-4o": {
37
+ "max_output_tokens": 16384,
38
+ "tool_support": "native",
39
+ "structured_output": "native",
40
+ "parallel_tools": true,
41
+ "max_tools": -1,
42
+ "vision_support": true,
23
43
  "audio_support": true,
24
- "notes": "Multimodal, optimized for speed",
25
- "source": "OpenAI official docs",
44
+ "video_support": true,
45
+ "image_resolutions": [
46
+ "variable"
47
+ ],
48
+ "notes": "Multimodal omni model, 2x faster, half price, 5x higher rate limits (updated Nov 2024)",
49
+ "source": "OpenAI official docs 2025",
26
50
  "canonical_name": "gpt-4o",
27
51
  "aliases": [],
28
52
  "max_tokens": 128000
@@ -168,7 +192,9 @@
168
192
  "notes": "More likely to call unnecessary tools",
169
193
  "source": "Anthropic official docs",
170
194
  "canonical_name": "claude-3.5-haiku",
171
- "aliases": [],
195
+ "aliases": [
196
+ "claude-3-5-haiku-20241022"
197
+ ],
172
198
  "max_tokens": 200000
173
199
  },
174
200
  "claude-3-opus": {
@@ -790,6 +816,128 @@
790
816
  "aliases": [],
791
817
  "max_tokens": 32768
792
818
  },
819
+ "magistral-small-2509": {
820
+ "max_output_tokens": 8192,
821
+ "tool_support": "native",
822
+ "structured_output": "native",
823
+ "parallel_tools": true,
824
+ "vision_support": true,
825
+ "audio_support": false,
826
+ "video_support": false,
827
+ "image_resolutions": [
828
+ "variable"
829
+ ],
830
+ "max_image_resolution": "variable",
831
+ "notes": "Mistral vision model optimized for multimodal tasks",
832
+ "source": "Mistral AI 2025 release",
833
+ "canonical_name": "magistral-small-2509",
834
+ "aliases": [
835
+ "mistralai/magistral-small-2509"
836
+ ],
837
+ "max_tokens": 128000
838
+ },
839
+ "Qwen/Qwen3-VL-8B-Instruct-FP8": {
840
+ "max_output_tokens": 8192,
841
+ "tool_support": "native",
842
+ "structured_output": "native",
843
+ "parallel_tools": true,
844
+ "vision_support": true,
845
+ "audio_support": false,
846
+ "video_support": false,
847
+ "image_resolutions": [
848
+ "variable"
849
+ ],
850
+ "max_image_resolution": "variable",
851
+ "image_patch_size": 16,
852
+ "max_image_tokens": 24576,
853
+ "pixel_grouping": "32x32",
854
+ "notes": "Qwen3-VL 8B model with FP8 quantization for HuggingFace, optimized for efficient inference",
855
+ "source": "Qwen team 2025 HuggingFace release",
856
+ "canonical_name": "Qwen/Qwen3-VL-8B-Instruct-FP8",
857
+ "aliases": [
858
+ "qwen3-vl-8b-fp8",
859
+ "qwen3-vl-8b-instruct-fp8"
860
+ ],
861
+ "max_tokens": 262144
862
+ },
863
+ "llama3.2-vision:11b": {
864
+ "max_output_tokens": 4096,
865
+ "tool_support": "prompted",
866
+ "structured_output": "prompted",
867
+ "parallel_tools": false,
868
+ "vision_support": true,
869
+ "audio_support": false,
870
+ "video_support": false,
871
+ "image_resolutions": [
872
+ "560x560",
873
+ "1120x560",
874
+ "560x1120",
875
+ "1120x1120"
876
+ ],
877
+ "max_image_resolution": "1120x1120",
878
+ "image_patch_size": 14,
879
+ "max_image_tokens": 6400,
880
+ "notes": "Llama 3.2 Vision 11B model with multimodal capabilities for visual recognition and reasoning",
881
+ "source": "Meta AI Llama 3.2 release",
882
+ "canonical_name": "llama3.2-vision:11b",
883
+ "aliases": [
884
+ "llama3.2-vision-11b",
885
+ "llama-3.2-vision:11b"
886
+ ],
887
+ "max_tokens": 131072
888
+ },
889
+ "llama3.2-vision:70b": {
890
+ "max_output_tokens": 4096,
891
+ "tool_support": "prompted",
892
+ "structured_output": "prompted",
893
+ "parallel_tools": false,
894
+ "vision_support": true,
895
+ "audio_support": false,
896
+ "video_support": false,
897
+ "image_resolutions": [
898
+ "560x560",
899
+ "1120x560",
900
+ "560x1120",
901
+ "1120x1120"
902
+ ],
903
+ "max_image_resolution": "1120x1120",
904
+ "image_patch_size": 14,
905
+ "max_image_tokens": 6400,
906
+ "notes": "Llama 3.2 Vision 70B model with advanced multimodal capabilities for complex visual reasoning",
907
+ "source": "Meta AI Llama 3.2 release",
908
+ "canonical_name": "llama3.2-vision:70b",
909
+ "aliases": [
910
+ "llama3.2-vision-70b",
911
+ "llama-3.2-vision:70b"
912
+ ],
913
+ "max_tokens": 131072
914
+ },
915
+ "llama3.2-vision:90b": {
916
+ "max_output_tokens": 4096,
917
+ "tool_support": "prompted",
918
+ "structured_output": "prompted",
919
+ "parallel_tools": false,
920
+ "vision_support": true,
921
+ "audio_support": false,
922
+ "video_support": false,
923
+ "image_resolutions": [
924
+ "560x560",
925
+ "1120x560",
926
+ "560x1120",
927
+ "1120x1120"
928
+ ],
929
+ "max_image_resolution": "1120x1120",
930
+ "image_patch_size": 14,
931
+ "max_image_tokens": 6400,
932
+ "notes": "Llama 3.2 Vision 90B model with top-tier multimodal capabilities for advanced visual understanding",
933
+ "source": "Meta AI Llama 3.2 release",
934
+ "canonical_name": "llama3.2-vision:90b",
935
+ "aliases": [
936
+ "llama3.2-vision-90b",
937
+ "llama-3.2-vision:90b"
938
+ ],
939
+ "max_tokens": 131072
940
+ },
793
941
  "gemma-2b": {
794
942
  "max_output_tokens": 8192,
795
943
  "tool_support": "none",
@@ -1089,26 +1237,202 @@
1089
1237
  "image_resolutions": [
1090
1238
  "variable"
1091
1239
  ],
1092
- "notes": "Qwen3-VL multimodal model with vision and video support",
1240
+ "image_patch_size": 16,
1241
+ "max_image_tokens": 24576,
1242
+ "pixel_grouping": "32x32",
1243
+ "notes": "Qwen3-VL multimodal model with vision and video support, 32x32 pixel patches",
1093
1244
  "source": "Alibaba Qwen3-VL technical report",
1094
1245
  "canonical_name": "qwen3-vl",
1095
1246
  "aliases": [],
1096
1247
  "max_tokens": 131072
1097
1248
  },
1098
- "gemma3n": {
1249
+ "qwen3-vl-4b": {
1250
+ "max_output_tokens": 8192,
1251
+ "tool_support": "prompted",
1252
+ "structured_output": "prompted",
1253
+ "parallel_tools": false,
1254
+ "vision_support": true,
1255
+ "video_support": true,
1256
+ "audio_support": false,
1257
+ "image_resolutions": [
1258
+ "variable"
1259
+ ],
1260
+ "max_image_resolution": "variable",
1261
+ "image_patch_size": 16,
1262
+ "max_image_tokens": 24576,
1263
+ "pixel_grouping": "32x32",
1264
+ "notes": "Qwen3-VL 4B dense model with 256K context, optimized for LMStudio",
1265
+ "source": "Alibaba Qwen3-VL technical report 2025",
1266
+ "canonical_name": "qwen3-vl-4b",
1267
+ "aliases": [
1268
+ "qwen/qwen3-vl-4b"
1269
+ ],
1270
+ "max_tokens": 262144
1271
+ },
1272
+ "qwen3-vl-8b": {
1273
+ "max_output_tokens": 8192,
1274
+ "tool_support": "prompted",
1275
+ "structured_output": "prompted",
1276
+ "parallel_tools": false,
1277
+ "vision_support": true,
1278
+ "video_support": true,
1279
+ "audio_support": false,
1280
+ "image_resolutions": [
1281
+ "variable"
1282
+ ],
1283
+ "max_image_resolution": "variable",
1284
+ "image_patch_size": 16,
1285
+ "max_image_tokens": 24576,
1286
+ "pixel_grouping": "32x32",
1287
+ "notes": "Qwen3-VL 8B dense model with 256K context, optimized for LMStudio",
1288
+ "source": "Alibaba Qwen3-VL technical report 2025",
1289
+ "canonical_name": "qwen3-vl-8b",
1290
+ "aliases": [
1291
+ "qwen/qwen3-vl-8b"
1292
+ ],
1293
+ "max_tokens": 262144
1294
+ },
1295
+ "qwen3-vl-30b": {
1296
+ "max_output_tokens": 8192,
1297
+ "tool_support": "prompted",
1298
+ "structured_output": "prompted",
1299
+ "parallel_tools": false,
1300
+ "vision_support": true,
1301
+ "video_support": true,
1302
+ "audio_support": false,
1303
+ "image_resolutions": [
1304
+ "variable"
1305
+ ],
1306
+ "max_image_resolution": "variable",
1307
+ "image_patch_size": 16,
1308
+ "max_image_tokens": 24576,
1309
+ "pixel_grouping": "32x32",
1310
+ "notes": "Qwen3-VL 30B MoE model (30.5B total/3.3B active), best performing vision model, 256K context",
1311
+ "source": "Alibaba Qwen3-VL technical report 2025",
1312
+ "canonical_name": "qwen3-vl-30b",
1313
+ "aliases": [
1314
+ "qwen/qwen3-vl-30b"
1315
+ ],
1316
+ "max_tokens": 262144
1317
+ },
1318
+ "qwen2.5-vl-7b": {
1319
+ "max_output_tokens": 8192,
1320
+ "tool_support": "prompted",
1321
+ "structured_output": "native",
1322
+ "parallel_tools": false,
1323
+ "vision_support": true,
1324
+ "audio_support": false,
1325
+ "image_resolutions": [
1326
+ "56x56 to 3584x3584"
1327
+ ],
1328
+ "max_image_resolution": "3584x3584",
1329
+ "image_patch_size": 14,
1330
+ "max_image_tokens": 16384,
1331
+ "pixel_grouping": "28x28",
1332
+ "notes": "Qwen2.5-VL 7B parameter vision model, 28x28 pixel patches, max 3584x3584 resolution",
1333
+ "source": "Alibaba official docs",
1334
+ "canonical_name": "qwen2.5-vl-7b",
1335
+ "aliases": [
1336
+ "qwen/qwen2.5-vl-7b",
1337
+ "unsloth/Qwen2.5-VL-7B-Instruct-GGUF"
1338
+ ],
1339
+ "max_tokens": 128000
1340
+ },
1341
+ "gemma3-4b": {
1099
1342
  "max_output_tokens": 8192,
1100
1343
  "tool_support": "native",
1101
1344
  "structured_output": "native",
1102
1345
  "parallel_tools": true,
1103
1346
  "vision_support": true,
1104
- "audio_support": true,
1105
- "video_support": true,
1106
- "notes": "Gemma3n device-optimized multimodal model",
1107
- "source": "Google Gemma3n announcement",
1108
- "canonical_name": "gemma3n",
1347
+ "audio_support": false,
1348
+ "video_support": false,
1349
+ "image_resolutions": [
1350
+ "896x896"
1351
+ ],
1352
+ "max_image_resolution": "896x896",
1353
+ "vision_encoder": "SigLIP-400M",
1354
+ "image_tokens_per_image": 256,
1355
+ "adaptive_windowing": true,
1356
+ "notes": "Gemma3 4B parameter model with vision support, 896x896 fixed resolution with adaptive windowing",
1357
+ "source": "Google Gemma3 documentation 2025",
1358
+ "canonical_name": "gemma3-4b",
1359
+ "aliases": [
1360
+ "gemma3:4b"
1361
+ ],
1362
+ "max_tokens": 128000
1363
+ },
1364
+ "qwen2.5vl:7b": {
1365
+ "max_output_tokens": 8192,
1366
+ "tool_support": "prompted",
1367
+ "structured_output": "native",
1368
+ "parallel_tools": false,
1369
+ "vision_support": true,
1370
+ "audio_support": false,
1371
+ "image_resolutions": [
1372
+ "56x56 to 3584x3584"
1373
+ ],
1374
+ "max_image_resolution": "3584x3584",
1375
+ "image_patch_size": 14,
1376
+ "max_image_tokens": 16384,
1377
+ "pixel_grouping": "28x28",
1378
+ "notes": "Qwen2.5-VL 7B Ollama variant, 28x28 pixel patches, max 3584x3584 resolution",
1379
+ "source": "Ollama model library",
1380
+ "canonical_name": "qwen2.5vl:7b",
1381
+ "aliases": [
1382
+ "qwen2.5vl"
1383
+ ],
1384
+ "max_tokens": 128000
1385
+ },
1386
+ "gemma3:4b-it-qat": {
1387
+ "max_output_tokens": 8192,
1388
+ "tool_support": "native",
1389
+ "structured_output": "native",
1390
+ "parallel_tools": true,
1391
+ "vision_support": true,
1392
+ "audio_support": false,
1393
+ "video_support": false,
1394
+ "image_resolutions": [
1395
+ "896x896"
1396
+ ],
1397
+ "max_image_resolution": "896x896",
1398
+ "vision_encoder": "SigLIP-400M",
1399
+ "image_tokens_per_image": 256,
1400
+ "adaptive_windowing": true,
1401
+ "notes": "Gemma3 4B instruct-tuned quantized model for Ollama, 896x896 fixed resolution",
1402
+ "source": "Ollama model library",
1403
+ "canonical_name": "gemma3:4b-it-qat",
1109
1404
  "aliases": [],
1110
1405
  "max_tokens": 128000
1111
1406
  },
1407
+ "gemma3n:e4b": {
1408
+ "max_output_tokens": 8192,
1409
+ "tool_support": "native",
1410
+ "structured_output": "native",
1411
+ "parallel_tools": true,
1412
+ "vision_support": true,
1413
+ "audio_support": true,
1414
+ "video_support": true,
1415
+ "image_resolutions": [
1416
+ "896x896"
1417
+ ],
1418
+ "max_image_resolution": "896x896",
1419
+ "vision_encoder": "SigLIP-400M",
1420
+ "image_tokens_per_image": 256,
1421
+ "adaptive_windowing": true,
1422
+ "memory_footprint": "3GB",
1423
+ "notes": "Gemma3n device-optimized multimodal model, 896x896 fixed resolution",
1424
+ "source": "Google Gemma3n documentation 2025",
1425
+ "canonical_name": "gemma3n:e4b",
1426
+ "aliases": [
1427
+ "gemma3n:e4b:latest",
1428
+ "gemma-3n-e4b",
1429
+ "google/gemma-3n-e4b",
1430
+ "gemma3n",
1431
+ "gemma3n:e2b:latest",
1432
+ "gemma3n:e2b"
1433
+ ],
1434
+ "max_tokens": 32768
1435
+ },
1112
1436
  "seed-oss": {
1113
1437
  "max_output_tokens": 8192,
1114
1438
  "tool_support": "prompted",
@@ -1178,6 +1502,91 @@
1178
1502
  "canonical_name": "llama-4-109b",
1179
1503
  "aliases": [],
1180
1504
  "max_tokens": 10000000
1505
+ },
1506
+ "granite3.2:2b": {
1507
+ "max_output_tokens": 8192,
1508
+ "tool_support": "prompted",
1509
+ "structured_output": "prompted",
1510
+ "parallel_tools": false,
1511
+ "vision_support": false,
1512
+ "audio_support": false,
1513
+ "notes": "IBM Granite 3.2 2B text-only model with reasoning capabilities",
1514
+ "source": "IBM Granite 3.2 technical report",
1515
+ "canonical_name": "granite3.2:2b",
1516
+ "aliases": [
1517
+ "granite3.2-2b"
1518
+ ],
1519
+ "max_tokens": 32768
1520
+ },
1521
+ "granite3.2:8b": {
1522
+ "max_output_tokens": 8192,
1523
+ "tool_support": "prompted",
1524
+ "structured_output": "prompted",
1525
+ "parallel_tools": false,
1526
+ "vision_support": false,
1527
+ "audio_support": false,
1528
+ "notes": "IBM Granite 3.2 8B text-only model with reasoning capabilities",
1529
+ "source": "IBM Granite 3.2 technical report",
1530
+ "canonical_name": "granite3.2:8b",
1531
+ "aliases": [
1532
+ "granite3.2-8b"
1533
+ ],
1534
+ "max_tokens": 32768
1535
+ },
1536
+ "granite3.2-vision:2b": {
1537
+ "max_output_tokens": 8192,
1538
+ "tool_support": "prompted",
1539
+ "structured_output": "prompted",
1540
+ "parallel_tools": false,
1541
+ "vision_support": true,
1542
+ "audio_support": false,
1543
+ "video_support": false,
1544
+ "image_resolutions": [
1545
+ "768x768"
1546
+ ],
1547
+ "max_image_resolution": "768x768",
1548
+ "vision_encoder": "SigLIP2-so400m-patch14-384",
1549
+ "image_patch_size": 14,
1550
+ "notes": "IBM Granite 3.2-Vision 2B model with SigLIP2 encoder, optimized for visual document understanding",
1551
+ "source": "IBM Granite 3.2 technical report arXiv:2502.09927",
1552
+ "canonical_name": "granite3.2-vision:2b",
1553
+ "aliases": [
1554
+ "granite3.2-vision:latest",
1555
+ "granite3.2-vision",
1556
+ "granite-vision",
1557
+ "ibm-granite-vision"
1558
+ ],
1559
+ "max_tokens": 32768
1560
+ },
1561
+ "granite3.3:2b": {
1562
+ "max_output_tokens": 8192,
1563
+ "tool_support": "prompted",
1564
+ "structured_output": "prompted",
1565
+ "parallel_tools": false,
1566
+ "vision_support": false,
1567
+ "audio_support": false,
1568
+ "notes": "IBM Granite 3.3 2B text-only model with enhanced reasoning capabilities",
1569
+ "source": "IBM Granite 3.3 release announcement",
1570
+ "canonical_name": "granite3.3:2b",
1571
+ "aliases": [
1572
+ "granite3.3-2b"
1573
+ ],
1574
+ "max_tokens": 32768
1575
+ },
1576
+ "granite3.3:8b": {
1577
+ "max_output_tokens": 8192,
1578
+ "tool_support": "prompted",
1579
+ "structured_output": "prompted",
1580
+ "parallel_tools": false,
1581
+ "vision_support": false,
1582
+ "audio_support": false,
1583
+ "notes": "IBM Granite 3.3 8B text-only model with enhanced reasoning capabilities",
1584
+ "source": "IBM Granite 3.3 release announcement",
1585
+ "canonical_name": "granite3.3:8b",
1586
+ "aliases": [
1587
+ "granite3.3-8b"
1588
+ ],
1589
+ "max_tokens": 32768
1181
1590
  }
1182
1591
  },
1183
1592
  "tool_support_levels": {
@@ -90,6 +90,7 @@ class AbstractCoreInterface(ABC):
90
90
  messages: Optional[List[Dict[str, str]]] = None,
91
91
  system_prompt: Optional[str] = None,
92
92
  tools: Optional[List[Dict[str, Any]]] = None,
93
+ media: Optional[List[Union[str, Dict[str, Any], 'MediaContent']]] = None,
93
94
  stream: bool = False,
94
95
  **kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse]]:
95
96
  """
@@ -100,6 +101,7 @@ class AbstractCoreInterface(ABC):
100
101
  messages: Optional conversation history
101
102
  system_prompt: Optional system prompt
102
103
  tools: Optional list of available tools
104
+ media: Optional list of media files (file paths, MediaContent objects, or dicts)
103
105
  stream: Whether to stream the response
104
106
  **kwargs: Additional provider-specific parameters
105
107
 
@@ -186,11 +186,15 @@ class BasicSession:
186
186
  if hasattr(self, 'tool_call_tags') and self.tool_call_tags is not None and 'tool_call_tags' not in kwargs:
187
187
  kwargs['tool_call_tags'] = self.tool_call_tags
188
188
 
189
+ # Extract media parameter explicitly (fix for media parameter passing)
190
+ media = kwargs.pop('media', None)
191
+
189
192
  # Call provider
190
193
  response = self.provider.generate(
191
194
  prompt=prompt,
192
195
  messages=messages,
193
196
  system_prompt=self.system_prompt,
197
+ media=media,
194
198
  **kwargs
195
199
  )
196
200
 
@@ -119,7 +119,7 @@ class EmbeddingManager:
119
119
  def __init__(
120
120
  self,
121
121
  model: str = None,
122
- provider: str = "huggingface",
122
+ provider: str = None,
123
123
  backend: Union[str, EmbeddingBackend] = "auto",
124
124
  cache_dir: Optional[Path] = None,
125
125
  cache_size: int = 1000,
@@ -130,15 +130,20 @@ class EmbeddingManager:
130
130
 
131
131
  Args:
132
132
  model: Model identifier (HuggingFace model ID for HF provider, model name for others).
133
- provider: Embedding provider ('huggingface', 'ollama', 'lmstudio'). Defaults to 'huggingface'.
133
+ If None, uses configured default from AbstractCore config system.
134
+ provider: Embedding provider ('huggingface', 'ollama', 'lmstudio').
135
+ If None, uses configured default from AbstractCore config system.
134
136
  backend: Inference backend for HuggingFace ('auto', 'pytorch', 'onnx', 'openvino')
135
137
  cache_dir: Directory for persistent cache. Defaults to ~/.abstractcore/embeddings
136
138
  cache_size: Maximum number of embeddings to cache in memory
137
139
  output_dims: Output dimensions for Matryoshka truncation (if supported by provider)
138
140
  trust_remote_code: Whether to trust remote code (HuggingFace only)
139
141
  """
140
- # Store provider
141
- self.provider = provider.lower()
142
+ # Load configuration defaults, but ONLY if parameters weren't explicitly provided
143
+ self._load_config_defaults(model, provider)
144
+
145
+ # Store provider (after config loading)
146
+ self.provider = self._resolved_provider.lower()
142
147
 
143
148
  # Validate provider
144
149
  if self.provider not in ["huggingface", "ollama", "lmstudio"]:
@@ -151,16 +156,16 @@ class EmbeddingManager:
151
156
  # Set up model identifier
152
157
  if self.provider == "huggingface":
153
158
  # Model configuration - HuggingFace only
154
- if model is None:
155
- model = get_default_model() # Returns alias "all-minilm-l6-v2"
159
+ # Use resolved model (which includes config defaults if not explicitly provided)
160
+ resolved_model = self._resolved_model
156
161
 
157
162
  # Handle model aliases from our favored models config
158
- if model in list_available_models():
159
- self.model_config = get_model_config(model)
163
+ if resolved_model in list_available_models():
164
+ self.model_config = get_model_config(resolved_model)
160
165
  self.model_id = self.model_config.model_id
161
166
  else:
162
167
  # Direct HuggingFace model ID
163
- self.model_id = model
168
+ self.model_id = resolved_model
164
169
  self.model_config = None
165
170
 
166
171
  self.backend = EmbeddingBackend(backend) if backend != "auto" else None
@@ -175,22 +180,22 @@ class EmbeddingManager:
175
180
  logger.warning(f"Dimension {output_dims} not in supported dims {self.model_config.matryoshka_dims}")
176
181
  else:
177
182
  # Ollama or LMStudio provider
178
- if model is None:
183
+ if self._resolved_model is None:
179
184
  raise ValueError(f"Model name is required for {self.provider} provider")
180
-
181
- self.model_id = model
185
+
186
+ self.model_id = self._resolved_model
182
187
  self.backend = None
183
188
  self.trust_remote_code = False
184
189
 
185
190
  # Create provider instance for delegation
186
191
  if self.provider == "ollama":
187
192
  from ..providers.ollama_provider import OllamaProvider
188
- self._provider_instance = OllamaProvider(model=model)
189
- logger.info(f"Initialized Ollama embedding provider with model: {model}")
193
+ self._provider_instance = OllamaProvider(model=self._resolved_model)
194
+ logger.info(f"Initialized Ollama embedding provider with model: {self._resolved_model}")
190
195
  elif self.provider == "lmstudio":
191
196
  from ..providers.lmstudio_provider import LMStudioProvider
192
- self._provider_instance = LMStudioProvider(model=model)
193
- logger.info(f"Initialized LMStudio embedding provider with model: {model}")
197
+ self._provider_instance = LMStudioProvider(model=self._resolved_model)
198
+ logger.info(f"Initialized LMStudio embedding provider with model: {self._resolved_model}")
194
199
 
195
200
  # Common setup for all providers
196
201
  self.cache_dir = Path(cache_dir) if cache_dir else Path.home() / ".abstractcore" / "embeddings"
@@ -235,6 +240,39 @@ class EmbeddingManager:
235
240
  else:
236
241
  self.has_events = False
237
242
 
243
+ def _load_config_defaults(self, model: Optional[str], provider: Optional[str]) -> None:
244
+ """Load configuration defaults, but ONLY for parameters not explicitly provided.
245
+
246
+ This ensures that direct parameters always take precedence over config defaults.
247
+ """
248
+ try:
249
+ # Import config manager - use lazy import to avoid circular dependencies
250
+ from ..config import get_config_manager
251
+ config_manager = get_config_manager()
252
+ embeddings_config = config_manager.config.embeddings
253
+
254
+ # Apply defaults ONLY if not explicitly provided
255
+ if provider is None:
256
+ self._resolved_provider = embeddings_config.provider or "huggingface"
257
+ logger.debug(f"Using configured default provider: {self._resolved_provider}")
258
+ else:
259
+ self._resolved_provider = provider
260
+ logger.debug(f"Using explicit provider parameter: {self._resolved_provider}")
261
+
262
+ if model is None:
263
+ # Use config default, or fallback to EmbeddingManager's original default
264
+ self._resolved_model = embeddings_config.model or "all-minilm-l6-v2"
265
+ logger.debug(f"Using configured default model: {self._resolved_model}")
266
+ else:
267
+ self._resolved_model = model
268
+ logger.debug(f"Using explicit model parameter: {self._resolved_model}")
269
+
270
+ except Exception as e:
271
+ # Fallback to hardcoded defaults if config system fails
272
+ logger.debug(f"Config system unavailable, using fallback defaults: {e}")
273
+ self._resolved_provider = provider or "huggingface"
274
+ self._resolved_model = model or "all-minilm-l6-v2"
275
+
238
276
  def _load_model(self):
239
277
  """Load the HuggingFace embedding model with optimal backend and reduced warnings."""
240
278
  try: