abstractcore 2.4.2__py3-none-any.whl → 2.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. abstractcore/apps/app_config_utils.py +19 -0
  2. abstractcore/apps/summarizer.py +85 -56
  3. abstractcore/architectures/detection.py +15 -4
  4. abstractcore/assets/architecture_formats.json +1 -1
  5. abstractcore/assets/model_capabilities.json +420 -11
  6. abstractcore/core/interface.py +2 -0
  7. abstractcore/core/session.py +4 -0
  8. abstractcore/embeddings/manager.py +54 -16
  9. abstractcore/media/__init__.py +116 -148
  10. abstractcore/media/auto_handler.py +363 -0
  11. abstractcore/media/base.py +456 -0
  12. abstractcore/media/capabilities.py +335 -0
  13. abstractcore/media/types.py +300 -0
  14. abstractcore/media/vision_fallback.py +260 -0
  15. abstractcore/providers/anthropic_provider.py +18 -1
  16. abstractcore/providers/base.py +187 -0
  17. abstractcore/providers/huggingface_provider.py +111 -12
  18. abstractcore/providers/lmstudio_provider.py +88 -5
  19. abstractcore/providers/mlx_provider.py +33 -1
  20. abstractcore/providers/ollama_provider.py +37 -3
  21. abstractcore/providers/openai_provider.py +18 -1
  22. abstractcore/server/app.py +1390 -104
  23. abstractcore/tools/common_tools.py +12 -8
  24. abstractcore/utils/__init__.py +9 -5
  25. abstractcore/utils/cli.py +199 -17
  26. abstractcore/utils/message_preprocessor.py +182 -0
  27. abstractcore/utils/structured_logging.py +117 -16
  28. abstractcore/utils/version.py +1 -1
  29. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/METADATA +214 -20
  30. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/RECORD +34 -27
  31. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/entry_points.txt +1 -0
  32. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/WHEEL +0 -0
  33. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/licenses/LICENSE +0 -0
  34. {abstractcore-2.4.2.dist-info → abstractcore-2.4.4.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,19 @@
1
+ """
2
+ Shared configuration utilities for AbstractCore apps.
3
+ """
4
+
5
+ def get_app_defaults(app_name: str) -> tuple[str, str]:
6
+ """Get default provider and model for an app."""
7
+ try:
8
+ from ..config import get_config_manager
9
+ config_manager = get_config_manager()
10
+ return config_manager.get_app_default(app_name)
11
+ except Exception:
12
+ # Fallback to hardcoded defaults if config unavailable
13
+ hardcoded_defaults = {
14
+ 'summarizer': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
15
+ 'extractor': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
16
+ 'judge': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
17
+ 'cli': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
18
+ }
19
+ return hardcoded_defaults.get(app_name, ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'))
@@ -35,6 +35,23 @@ from ..processing import BasicSummarizer, SummaryStyle, SummaryLength
35
35
  from ..core.factory import create_llm
36
36
 
37
37
 
38
+ def get_app_defaults(app_name: str) -> tuple[str, str]:
39
+ """Get default provider and model for an app."""
40
+ try:
41
+ from ..config import get_config_manager
42
+ config_manager = get_config_manager()
43
+ return config_manager.get_app_default(app_name)
44
+ except Exception:
45
+ # Fallback to hardcoded defaults if config unavailable
46
+ hardcoded_defaults = {
47
+ 'summarizer': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
48
+ 'extractor': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
49
+ 'judge': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
50
+ 'cli': ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'),
51
+ }
52
+ return hardcoded_defaults.get(app_name, ('huggingface', 'unsloth/Qwen3-4B-Instruct-2507-GGUF'))
53
+
54
+
38
55
  def read_file_content(file_path: str) -> str:
39
56
  """
40
57
  Read content from various file types
@@ -168,9 +185,9 @@ Examples:
168
185
  Supported file types: .txt, .md, .py, .js, .html, .json, .csv, and most text-based files
169
186
 
170
187
  Default model setup:
171
- - Requires Ollama: https://ollama.com/
172
- - Download model: ollama pull gemma3:1b-it-qat
173
- - Or use --provider and --model for other providers
188
+ - Fresh installs use: huggingface/unsloth/Qwen3-4B-Instruct-2507-GGUF (HuggingFace local model)
189
+ - Configure defaults: abstractcore --set-app-default summarizer <provider> <model>
190
+ - Or use --provider and --model for explicit override
174
191
  """
175
192
  )
176
193
 
@@ -240,6 +257,12 @@ Default model setup:
240
257
  help='Show detailed progress information'
241
258
  )
242
259
 
260
+ parser.add_argument(
261
+ '--debug',
262
+ action='store_true',
263
+ help='Enable debug logging and show detailed diagnostics'
264
+ )
265
+
243
266
  parser.add_argument(
244
267
  '--timeout',
245
268
  default=None,
@@ -249,6 +272,17 @@ Default model setup:
249
272
  # Parse arguments
250
273
  args = parser.parse_args()
251
274
 
275
+ # Configure logging based on arguments (--debug overrides config defaults)
276
+ if args.debug:
277
+ from ..utils.structured_logging import configure_logging
278
+ import logging
279
+ configure_logging(
280
+ console_level=logging.DEBUG,
281
+ file_level=logging.DEBUG,
282
+ verbatim_enabled=True
283
+ )
284
+ print("🐛 Debug logging enabled")
285
+
252
286
  try:
253
287
  # Validate chunk size
254
288
  if args.chunk_size < 1000:
@@ -285,66 +319,61 @@ Default model setup:
285
319
  style = parse_style(args.style)
286
320
  length = parse_length(args.length)
287
321
 
288
- # Initialize LLM and summarizer
322
+ # Get provider and model using centralized configuration
289
323
  if args.provider and args.model:
290
- # Custom provider/model with max_tokens adjusted for chunk size
291
- max_tokens = max(16000, args.chunk_size) # Ensure max_tokens >= chunk_size
292
- if args.verbose:
293
- print(f"Initializing summarizer ({args.provider}, {args.model}, {args.max_tokens} token context, {args.max_output_tokens} output tokens)...")
324
+ # Use explicit parameters
325
+ provider, model = args.provider, args.model
326
+ config_source = "explicit parameters"
327
+ else:
328
+ # Use configured defaults
329
+ provider, model = get_app_defaults('summarizer')
330
+ config_source = "configured defaults"
331
+
332
+ # Adjust max_tokens based on chunk size
333
+ max_tokens = max(args.max_tokens, args.chunk_size)
334
+
335
+ if args.verbose:
336
+ print(f"Initializing summarizer ({provider}, {model}, {max_tokens} token context, {args.max_output_tokens} output tokens) - using {config_source}...")
337
+
338
+ if args.debug:
339
+ print(f"🐛 Debug - Configuration details:")
340
+ print(f" Provider: {provider}")
341
+ print(f" Model: {model}")
342
+ print(f" Config source: {config_source}")
343
+ print(f" Max tokens: {max_tokens}")
344
+ print(f" Max output tokens: {args.max_output_tokens}")
345
+ print(f" Chunk size: {args.chunk_size}")
346
+ print(f" Timeout: {args.timeout}")
347
+ print(f" Style: {args.style}")
348
+ print(f" Length: {args.length}")
349
+ print(f" Focus: {args.focus}")
294
350
 
295
- llm = create_llm(args.provider, model=args.model, max_tokens=args.max_tokens, max_output_tokens=args.max_output_tokens, timeout=args.timeout)
351
+ try:
352
+ llm = create_llm(provider, model=model, max_tokens=max_tokens, max_output_tokens=args.max_output_tokens, timeout=args.timeout)
296
353
  summarizer = BasicSummarizer(
297
- llm,
354
+ llm,
298
355
  max_chunk_size=args.chunk_size,
299
- max_tokens=args.max_tokens,
356
+ max_tokens=max_tokens,
300
357
  max_output_tokens=args.max_output_tokens,
301
358
  timeout=args.timeout
302
359
  )
303
- else:
304
- # Default configuration with chunk size override
305
- if args.chunk_size != 8000:
306
- # Custom chunk size, need to adjust max_tokens if necessary
307
- max_tokens = max(16000, args.chunk_size)
308
- if args.verbose:
309
- print(f"Initializing summarizer (ollama, gemma3:1b-it-qat, {args.max_tokens} token context, {args.max_output_tokens} output tokens, {args.chunk_size} chunk size)...")
310
-
311
- try:
312
- llm = create_llm("ollama", model="gemma3:1b-it-qat", max_tokens=args.max_tokens, max_output_tokens=args.max_output_tokens, timeout=args.timeout)
313
- summarizer = BasicSummarizer(
314
- llm,
315
- max_chunk_size=args.chunk_size,
316
- max_tokens=args.max_tokens,
317
- max_output_tokens=args.max_output_tokens,
318
- timeout=args.timeout
319
- )
320
- except Exception as e:
321
- # Handle default model not available
322
- print(f"\n❌ Failed to initialize default Ollama model 'gemma3:1b-it-qat': {e}")
323
- print("\n💡 To use the default model, please:")
324
- print(" 1. Install Ollama from: https://ollama.com/")
325
- print(" 2. Download the model: ollama pull gemma3:1b-it-qat")
326
- print(" 3. Start Ollama service")
327
- print("\n🚀 Alternatively, specify a different provider:")
328
- print(" - Example: summarizer document.txt --provider openai --model gpt-4o-mini")
329
- sys.exit(1)
330
- else:
331
- # Default configuration
332
- if args.verbose:
333
- print(f"Initializing summarizer (ollama, gemma3:1b-it-qat, {args.max_tokens} token context, {args.max_output_tokens} output tokens, {args.chunk_size} chunk size)...")
334
- try:
335
- summarizer = BasicSummarizer(
336
- max_chunk_size=args.chunk_size,
337
- max_tokens=args.max_tokens,
338
- max_output_tokens=args.max_output_tokens,
339
- timeout=args.timeout
340
- )
341
- except RuntimeError as e:
342
- # Handle default model not available
343
- print(f"\n{e}")
344
- print("\n🚀 Quick alternatives to get started:")
345
- print(" - Use --provider and --model to specify an available provider")
346
- print(" - Example: summarizer document.txt --provider openai --model gpt-4o-mini")
347
- sys.exit(1)
360
+ except Exception as e:
361
+ # Handle model initialization failure
362
+ print(f"\n❌ Failed to initialize LLM '{provider}/{model}': {e}")
363
+
364
+ print(f"\n💡 Solutions:")
365
+ if provider == "ollama":
366
+ print(f" - Install Ollama: https://ollama.com/")
367
+ print(f" - Download the model: ollama pull {model}")
368
+ print(f" - Verify with: ollama list")
369
+
370
+ print(f"\n🚀 Alternatively, specify a different provider:")
371
+ print(f" - Example: summarizer document.txt --provider openai --model gpt-4o-mini")
372
+ print(f" - Example: summarizer document.txt --provider anthropic --model claude-3-5-haiku-20241022")
373
+ print(f"\n🔧 Or configure a different default:")
374
+ print(f" - abstractcore --set-app-default summarizer openai gpt-4o-mini")
375
+ print(f" - abstractcore --status")
376
+ sys.exit(1)
348
377
 
349
378
  # Generate summary
350
379
  if args.verbose:
@@ -100,6 +100,8 @@ def resolve_model_alias(model_name: str, models: Dict[str, Any]) -> str:
100
100
  """
101
101
  Resolve a model name to its canonical name by checking aliases.
102
102
 
103
+ Automatically converts "--" to "/" for HuggingFace cache format compatibility.
104
+
103
105
  Args:
104
106
  model_name: Model name that might be an alias
105
107
  models: Models dictionary from capabilities JSON
@@ -111,15 +113,24 @@ def resolve_model_alias(model_name: str, models: Dict[str, Any]) -> str:
111
113
  if model_name in models:
112
114
  return model_name
113
115
 
114
- # Check if it's an alias of any model
116
+ # Convert "--" to "/" for HuggingFace cache format compatibility
117
+ normalized_model_name = model_name.replace("--", "/")
118
+ if normalized_model_name != model_name:
119
+ logger.debug(f"Normalized model name '{model_name}' to '{normalized_model_name}'")
120
+
121
+ # Check if normalized name is a canonical name
122
+ if normalized_model_name in models:
123
+ return normalized_model_name
124
+
125
+ # Check if it's an alias of any model (try both original and normalized)
115
126
  for canonical_name, model_info in models.items():
116
127
  aliases = model_info.get("aliases", [])
117
- if model_name in aliases:
128
+ if model_name in aliases or normalized_model_name in aliases:
118
129
  logger.debug(f"Resolved alias '{model_name}' to canonical name '{canonical_name}'")
119
130
  return canonical_name
120
131
 
121
- # Return original name if no alias found
122
- return model_name
132
+ # Return normalized name if no alias found
133
+ return normalized_model_name
123
134
 
124
135
 
125
136
  def get_model_capabilities(model_name: str) -> Dict[str, Any]:
@@ -233,7 +233,7 @@
233
233
  "assistant_prefix": "Assistant: ",
234
234
  "assistant_suffix": "\n",
235
235
  "tool_format": "native",
236
- "patterns": ["gemma3n", "gemma3n:e4b", "gemma3n:e2b"]
236
+ "patterns": ["gemma3n", "gemma3n:e4b", "gemma3n:e2b", "gemma-3n", "google/gemma-3n"]
237
237
  },
238
238
  "paligemma": {
239
239
  "description": "Google's PaliGemma vision-language architecture",