cosma-backend 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. cosma_backend/__init__.py +14 -0
  2. cosma_backend/__main__.py +4 -0
  3. cosma_backend/api/__init__.py +29 -0
  4. cosma_backend/api/files.py +154 -0
  5. cosma_backend/api/index.py +114 -0
  6. cosma_backend/api/models.py +28 -0
  7. cosma_backend/api/search.py +166 -0
  8. cosma_backend/api/status.py +28 -0
  9. cosma_backend/api/updates.py +67 -0
  10. cosma_backend/api/watch.py +156 -0
  11. cosma_backend/app.py +192 -0
  12. cosma_backend/db/__init__.py +2 -0
  13. cosma_backend/db/database.py +638 -0
  14. cosma_backend/discoverer/__init__.py +1 -0
  15. cosma_backend/discoverer/discoverer.py +34 -0
  16. cosma_backend/embedder/__init__.py +1 -0
  17. cosma_backend/embedder/embedder.py +637 -0
  18. cosma_backend/logging.py +73 -0
  19. cosma_backend/models/__init__.py +3 -0
  20. cosma_backend/models/file.py +169 -0
  21. cosma_backend/models/status.py +10 -0
  22. cosma_backend/models/update.py +202 -0
  23. cosma_backend/models/watch.py +132 -0
  24. cosma_backend/pipeline/__init__.py +2 -0
  25. cosma_backend/pipeline/pipeline.py +222 -0
  26. cosma_backend/schema.sql +319 -0
  27. cosma_backend/searcher/__init__.py +1 -0
  28. cosma_backend/searcher/searcher.py +397 -0
  29. cosma_backend/summarizer/__init__.py +44 -0
  30. cosma_backend/summarizer/summarizer.py +1075 -0
  31. cosma_backend/utils/bundled.py +24 -0
  32. cosma_backend/utils/pubsub.py +31 -0
  33. cosma_backend/utils/sse.py +92 -0
  34. cosma_backend/watcher/__init__.py +1 -0
  35. cosma_backend/watcher/awatchdog.py +80 -0
  36. cosma_backend/watcher/watcher.py +257 -0
  37. cosma_backend-0.1.0.dist-info/METADATA +23 -0
  38. cosma_backend-0.1.0.dist-info/RECORD +39 -0
  39. cosma_backend-0.1.0.dist-info/WHEEL +4 -0
@@ -0,0 +1,1075 @@
1
+ #!/usr/bin/env python
2
+ # -*-coding:utf-8 -*-
3
+ '''
4
+ @File : summarizer.py
5
+ @Time : 2025/07/06 10:39:00
6
+ @Author : Ethan Pan
7
+ @Version : 1.0
8
+ @Contact : epan@cs.wisc.edu
9
+ @License : (C)Copyright 2025, Ethan Pan
10
+ @Desc : None
11
+ '''
12
+
13
+
14
+ import base64
15
+ import json
16
+ import logging
17
+ import os
18
+ import re
19
+ from abc import ABC, abstractmethod
20
+ from typing import List, Optional, Dict, Any
21
+
22
+ # Import AI libraries
23
+ import litellm
24
+ import ollama
25
+ import tiktoken
26
+
27
+ from backend.models import ProcessingStatus
28
+ from backend.models.file import File
29
+ from backend.logging import sm
30
+
31
+ # Configure standard logger
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ def get_encoding_for_model(model: str) -> tiktoken.Encoding:
36
+ """
37
+ Get the tiktoken encoding for a given model name.
38
+
39
+ Args:
40
+ model: Model name (e.g., "gpt-4", "gpt-3.5-turbo", "llama3.2")
41
+
42
+ Returns:
43
+ tiktoken.Encoding object for the model
44
+ """
45
+ # Try to get encoding directly from tiktoken for known models
46
+ try:
47
+ return tiktoken.encoding_for_model(model)
48
+ except KeyError:
49
+ pass
50
+
51
+ # Handle common model families and aliases
52
+ model_lower = model.lower()
53
+
54
+ # OpenAI models
55
+ if any(x in model_lower for x in ["gpt-4", "gpt-3.5", "gpt-35"]):
56
+ return tiktoken.get_encoding("cl100k_base")
57
+ elif "gpt-3" in model_lower or "davinci" in model_lower or "curie" in model_lower:
58
+ return tiktoken.get_encoding("p50k_base")
59
+
60
+ # Claude models use cl100k_base approximation
61
+ elif "claude" in model_lower:
62
+ return tiktoken.get_encoding("cl100k_base")
63
+
64
+ # Gemini models use cl100k_base approximation
65
+ elif "gemini" in model_lower:
66
+ return tiktoken.get_encoding("cl100k_base")
67
+
68
+ # Llama models (including Ollama) - use cl100k_base as approximation
69
+ elif any(x in model_lower for x in ["llama", "mistral", "mixtral", "phi", "qwen", "gemma", "deepseek"]):
70
+ return tiktoken.get_encoding("cl100k_base")
71
+
72
+ # Default to cl100k_base for unknown models (GPT-4 tokenizer)
73
+ logger.debug(f"Unknown model '{model}', defaulting to cl100k_base encoding")
74
+ return tiktoken.get_encoding("cl100k_base")
75
+
76
+
77
+ def estimate_tokens_fast(text: str, model: Optional[str] = None) -> int:
78
+ """
79
+ Fast token estimation using length-based heuristics.
80
+ Much faster than tiktoken but less accurate.
81
+
82
+ Args:
83
+ text: The text to estimate tokens for
84
+ model: Optional model name (not used in fast estimation)
85
+
86
+ Returns:
87
+ Estimated number of tokens in the text
88
+ """
89
+ return len(text) // 4
90
+
91
+
92
+ def estimate_tokens(text: str, model: Optional[str] = None, use_fast: bool = False) -> int:
93
+ """
94
+ Estimate the number of tokens in a text string.
95
+
96
+ Args:
97
+ text: The text to tokenize
98
+ model: Optional model name to get the correct encoding. If not provided,
99
+ uses cl100k_base (GPT-4 tokenizer) as default.
100
+ use_fast: Use fast character-based estimation instead of tiktoken
101
+
102
+ Returns:
103
+ Number of tokens in the text
104
+ """
105
+ if use_fast or not text:
106
+ return estimate_tokens_fast(text, model)
107
+
108
+ try:
109
+ if model:
110
+ encoding = get_encoding_for_model(model)
111
+ else:
112
+ # Default to cl100k_base (used by GPT-4, GPT-3.5-turbo, etc.)
113
+ encoding = tiktoken.get_encoding("cl100k_base")
114
+
115
+ return len(encoding.encode(text))
116
+ except Exception as e:
117
+ logger.warning(f"Error using tiktoken: {e}, falling back to fast estimation")
118
+ # Fallback to fast estimation
119
+ return estimate_tokens_fast(text, model)
120
+
121
+
122
+ def chunk_content(content: str, max_tokens: int, overlap_tokens: int = 50, model: Optional[str] = None) -> List[str]:
123
+ """
124
+ Split content into chunks that fit within token limits.
125
+ Uses fast token estimation for efficiency with accuracy validation.
126
+
127
+ Args:
128
+ content: The text content to chunk
129
+ max_tokens: Maximum tokens per chunk
130
+ overlap_tokens: Number of tokens to overlap between chunks
131
+ model: Optional model name for accurate tokenization
132
+
133
+ Returns:
134
+ List of content chunks
135
+ """
136
+ # Fast initial check
137
+ if estimate_tokens(content, model, use_fast=True) <= max_tokens:
138
+ # Verify with accurate tokenization if it's close to the limit
139
+ if estimate_tokens(content, model, use_fast=False) <= max_tokens:
140
+ return [content]
141
+
142
+ # Use sentence-based chunking with fast estimation for efficiency
143
+ sentences = content.split('. ')
144
+ chunks = []
145
+ current_chunk = []
146
+ current_tokens = 0
147
+ safety_buffer = int(max_tokens * 0.1) # 10% safety buffer
148
+
149
+ for sentence in sentences:
150
+ sentence_tokens = estimate_tokens(sentence, model, use_fast=True)
151
+
152
+ if sentence_tokens > (max_tokens - safety_buffer):
153
+ logger.info(sm("Sentence too big", sentence_tokens=sentence_tokens, current_tokens=current_tokens, max=max_tokens - safety_buffer))
154
+ continue
155
+
156
+ if current_tokens + sentence_tokens > (max_tokens - safety_buffer) and current_chunk:
157
+ logger.info(sm("Chunk created", chunk=len(chunks) + 1, tokens=current_tokens))
158
+ # Finalize current chunk and verify it's within limits
159
+ chunk_text = '. '.join(current_chunk) + '.'
160
+
161
+ # Safety check: verify the chunk doesn't exceed the limit with accurate tokenization
162
+ accurate_tokens = estimate_tokens(chunk_text, model, use_fast=False)
163
+ if accurate_tokens > max_tokens:
164
+ # Chunk is too large, split it further
165
+ chunk_text = _oversized_chunk_fix(chunk_text, max_tokens, model)
166
+
167
+ chunks.append(chunk_text)
168
+
169
+ # Start new chunk with overlap (fast estimation)
170
+ overlap_sentences = max(1, overlap_tokens // 50) # Rough overlap in sentences
171
+ overlap_content = '. '.join(current_chunk[-overlap_sentences:])
172
+ current_chunk = [overlap_content, sentence] if overlap_content else [sentence]
173
+ current_tokens = estimate_tokens('. '.join(current_chunk), model, use_fast=True)
174
+ else:
175
+ current_chunk.append(sentence)
176
+ current_tokens += sentence_tokens
177
+
178
+ # Add final chunk with safety check
179
+ if current_chunk:
180
+ chunk_text = '. '.join(current_chunk) + '.'
181
+ accurate_tokens = estimate_tokens(chunk_text, model, use_fast=False)
182
+ if accurate_tokens > max_tokens:
183
+ chunk_text = _oversized_chunk_fix(chunk_text, max_tokens, model)
184
+ chunks.append(chunk_text)
185
+
186
+ return chunks
187
+
188
+
189
+ def _oversized_chunk_fix(chunk_text: str, max_tokens: int, model: Optional[str] = None) -> str:
190
+ """
191
+ Fix an oversized chunk by splitting it more aggressively.
192
+ Uses accurate tokenization for this critical operation.
193
+
194
+ Args:
195
+ chunk_text: The oversized chunk text
196
+ max_tokens: Maximum allowed tokens
197
+ model: Model name for tokenization
198
+
199
+ Returns:
200
+ Fixed chunk text within token limits
201
+ """
202
+ # If the chunk is still too large, split by paragraphs then by character count
203
+ paragraphs = chunk_text.split('\n\n')
204
+ if len(paragraphs) > 1:
205
+ # Try including paragraphs one by one
206
+ result_chunks = []
207
+ current_chunk = ""
208
+
209
+ for paragraph in paragraphs:
210
+ test_chunk = current_chunk + ("\n\n" if current_chunk else "") + paragraph
211
+ if estimate_tokens(test_chunk, model, use_fast=False) <= max_tokens:
212
+ current_chunk = test_chunk
213
+ else:
214
+ if current_chunk:
215
+ result_chunks.append(current_chunk)
216
+ current_chunk = paragraph
217
+
218
+ if current_chunk:
219
+ result_chunks.append(current_chunk)
220
+
221
+ # Return the first chunk that fits
222
+ return result_chunks[0] if result_chunks else chunk_text[:max_tokens * 4] # Rough character fallback
223
+
224
+ # Last resort: character-based splitting
225
+ # Estimate characters needed (roughly 4 chars per token)
226
+ max_chars = max_tokens * 4
227
+ if len(chunk_text) <= max_chars:
228
+ return chunk_text
229
+
230
+ # Find a good breaking point near the limit
231
+ break_point = max_chars
232
+ # Try to break at sentence boundary
233
+ for i in range(min(break_point, len(chunk_text)), max(0, break_point - 200), -1):
234
+ if chunk_text[i] == '.' and i + 1 < len(chunk_text) and chunk_text[i + 1] == ' ':
235
+ return chunk_text[:i + 1]
236
+
237
+ # Fallback to hard character limit
238
+ return chunk_text[:max_chars]
239
+
240
+ def extract_json_from_response(content: str):
241
+ """Extract JSON from Gemma 3 response (handles markdown code fences)"""
242
+
243
+ # Try to find JSON in code fence
244
+ json_match = re.search(r'```(?:json)?\s*\n?(.*?)\n?```', content, re.DOTALL)
245
+ if json_match:
246
+ json_str = json_match.group(1).strip()
247
+ else:
248
+ json_str = content.strip()
249
+
250
+ return json_str
251
+
252
+
253
+ class SummarizerError(Exception):
254
+ """Base exception for summarizer errors."""
255
+ pass
256
+
257
+
258
+ class AIProviderError(SummarizerError):
259
+ """Exception for AI provider-specific errors."""
260
+ pass
261
+
262
+
263
+ class BaseSummarizer(ABC):
264
+ """Abstract base class for file summarizers."""
265
+
266
+
267
+ def __init__(self, max_tokens: Optional[int] = None, model: Optional[str] = None):
268
+ """
269
+ Initialize summarizer with context length limit.
270
+
271
+ Args:
272
+ max_tokens: Maximum tokens for the model context
273
+ model: Model name for accurate tokenization (optional)
274
+ """
275
+ self.max_tokens = max_tokens or int(os.getenv("MAX_TOKENS_PER_REQUEST", "100000"))
276
+ self.chunk_overlap = int(os.getenv("CHUNK_OVERLAP_TOKENS", "1000"))
277
+ self.model = model
278
+
279
+ @abstractmethod
280
+ async def summarize(self, file_metadata: File) -> File:
281
+ """
282
+ Summarize the content of a file metadata object.
283
+
284
+ Args:
285
+ file_metadata: The file metadata object to summarize
286
+
287
+ Returns:
288
+ Enhanced file metadata with summary and keywords
289
+ """
290
+ pass
291
+
292
+ @abstractmethod
293
+ async def is_available(self) -> bool:
294
+ """Check if this summarizer is available for use."""
295
+ pass
296
+
297
+ def _validate_content(self, file_metadata: File) -> bool:
298
+ """Validate that the file metadata has content to summarize."""
299
+ if not file_metadata.content:
300
+ logger.warning(sm("File content is empty, cannot summarize", filename=file_metadata.filename))
301
+ return False
302
+
303
+ if len(file_metadata.content.strip()) < 10:
304
+ logger.warning(sm("File content too short to summarize", filename=file_metadata.filename, length=len(file_metadata.content)))
305
+ return False
306
+
307
+ return True
308
+
309
+ def _prepare_images(self, file_metadata: File) -> list[str]:
310
+ images = []
311
+
312
+ if file_metadata.content_type.startswith("image"):
313
+ with open(file_metadata.path, 'rb') as f:
314
+ images.append(base64.b64encode(f.read()).decode('utf-8'))
315
+
316
+ return images
317
+
318
+ def _prepare_content(self, content: str) -> List[str]:
319
+ """
320
+ Prepare content for summarization, chunking if necessary.
321
+ Uses fast token estimation for efficiency.
322
+
323
+ Args:
324
+ content: The content to prepare
325
+
326
+ Returns:
327
+ List of content chunks ready for processing
328
+ """
329
+ # Use fast estimation for initial analysis
330
+ estimated_tokens = estimate_tokens(content, self.model)
331
+
332
+ # If within limits using fast estimation, do a quick accurate check
333
+ if estimated_tokens <= self.max_tokens:
334
+ return [content]
335
+
336
+ # Content is too large, need to chunk
337
+ if estimated_tokens >= 200_000:
338
+ logger.warning(sm("Content exceeds max token limit, will not summarize", estimated_tokens=estimated_tokens))
339
+ raise RuntimeError("File too large to summarize")
340
+
341
+ logger.info(sm("Content exceeds token limit, chunking required", estimated_tokens=estimated_tokens, max_tokens=self.max_tokens))
342
+
343
+ # Use optimized chunking
344
+ chunks = chunk_content(content, self.max_tokens, self.chunk_overlap, self.model)
345
+ logger.info(sm("Content chunked (noverify)", num_chunks=len(chunks)))
346
+
347
+ # Use fast estimation for chunk statistics (sample a few chunks for accurate check)
348
+ if len(chunks) <= 5:
349
+ # For small number of chunks, verify all with accurate tokenization
350
+ accurate_chunk_tokens = [estimate_tokens(chunk, self.model, use_fast=False) for chunk in chunks]
351
+ avg_chunk_tokens = sum(accurate_chunk_tokens) // len(chunks)
352
+ max_chunk_tokens = max(accurate_chunk_tokens)
353
+ logger.info(sm("Content chunked and verified", num_chunks=len(chunks), avg_chunk_tokens=avg_chunk_tokens, max_chunk_tokens=max_chunk_tokens))
354
+ else:
355
+ # For many chunks, sample a few for accurate verification and use fast for rest
356
+ sample_size = min(3, len(chunks))
357
+ sample_chunks = chunks[:sample_size]
358
+ accurate_sample_tokens = [estimate_tokens(chunk, self.model, use_fast=False) for chunk in sample_chunks]
359
+ fast_chunk_tokens = [estimate_tokens(chunk, self.model, use_fast=True) for chunk in chunks]
360
+ avg_chunk_tokens = sum(fast_chunk_tokens) // len(chunks)
361
+ max_chunk_tokens = max(accurate_sample_tokens)
362
+ logger.info(sm("Content chunked", num_chunks=len(chunks), avg_chunk_tokens=avg_chunk_tokens, max_chunk_sample=max_chunk_tokens))
363
+
364
+ if len(chunks) > 5:
365
+ logger.warning(sm("More than 5 chunks, will not summarize", chunks=len(chunks)))
366
+ raise RuntimeError("Too many chunks to summarize")
367
+
368
+ return chunks
369
+
370
+ def _combine_chunk_summaries(self, chunk_summaries: List[Dict[str, Any]]) -> tuple[str, List[str]]:
371
+ """
372
+ Combine summaries and keywords from multiple chunks.
373
+
374
+ Args:
375
+ chunk_summaries: List of summary dictionaries from chunks
376
+
377
+ Returns:
378
+ Tuple of (combined_summary, combined_keywords)
379
+ """
380
+ if not chunk_summaries:
381
+ return "No content available for summarization.", []
382
+
383
+ if len(chunk_summaries) == 1:
384
+ return chunk_summaries[0]["summary"], chunk_summaries[0]["keywords"]
385
+
386
+ # Combine summaries
387
+ summaries = [cs["summary"] for cs in chunk_summaries]
388
+ combined_summary = " ".join(summaries)
389
+
390
+ # If combined summary is too long, summarize it again
391
+ if len(combined_summary) > 500: # Rough character limit
392
+ combined_summary = f"Multi-part document covering: {'; '.join(summaries[:3])}"
393
+ if len(summaries) > 3:
394
+ combined_summary += f" and {len(summaries) - 3} additional topics."
395
+
396
+ # Combine and deduplicate keywords
397
+ all_keywords = []
398
+ for cs in chunk_summaries:
399
+ all_keywords.extend(cs["keywords"])
400
+
401
+ # Remove duplicates while preserving order
402
+ unique_keywords = []
403
+ seen = set()
404
+ for keyword in all_keywords:
405
+ keyword_lower = keyword.lower()
406
+ if keyword_lower not in seen:
407
+ unique_keywords.append(keyword)
408
+ seen.add(keyword_lower)
409
+
410
+ # Limit to reasonable number of keywords
411
+ combined_keywords = unique_keywords[:15]
412
+
413
+ logger.info(sm("Combined chunk summaries", num_chunks=len(chunk_summaries), final_keywords=len(combined_keywords)))
414
+
415
+ return combined_summary, combined_keywords
416
+
417
+ def _parse_ai_response(self, response_content: str) -> tuple[str, str, List[str]]:
418
+ """
419
+ Parse AI response JSON to extract summary and keywords.
420
+
421
+ Args:
422
+ response_content: Raw response from AI model
423
+
424
+ Returns:
425
+ Tuple of (summary, keywords)
426
+
427
+ Raises:
428
+ ValueError: If response format is invalid
429
+ """
430
+ try:
431
+ data = json.loads(response_content.strip())
432
+
433
+ title = data.get("title", "").strip()
434
+ summary = data.get("summary", "").strip()
435
+ keywords = data.get("keywords", [])
436
+
437
+ # Ensure keywords is a list of strings
438
+ if not isinstance(keywords, list):
439
+ keywords = []
440
+ keywords = [str(kw).strip() for kw in keywords if str(kw).strip()]
441
+
442
+ if not summary:
443
+ logger.error(sm("Response did not contain a valid summary", response=response_content))
444
+ raise ValueError("Response did not contain a valid summary")
445
+
446
+ return title, summary, keywords
447
+
448
+ except json.JSONDecodeError as e:
449
+ logger.error(sm("Failed to parse AI response as JSON", response=response_content, error={str(e)}))
450
+ raise ValueError(f"Invalid JSON response: {str(e)}")
451
+
452
+ def _get_system_prompt(self, include_title: bool = False):
453
+ if include_title:
454
+ return (
455
+ "You are a concise summarization assistant. "
456
+ "**Return valid JSON only** with keys `title`, `summary`, and `keywords` (array). "
457
+ "Title should be an extremely concise, 1-5 word title for the content. "
458
+ "Summary should be 1-2 sentences capturing the main topic and key points. "
459
+ "Keywords should be 5-12 relevant nouns or noun-phrases that describe the content."
460
+ "Example: {{'title': 'Proper Title', 'summary': 'A concise summary of the file content', 'keywords': ['keyword1', 'keyword2', 'keyword3']}}"
461
+ )
462
+ else:
463
+ return (
464
+ "You are a concise summarization assistant. "
465
+ "**Return valid JSON only** with keys `summary` and `keywords` (array). "
466
+ "Summary should be 1-2 sentences capturing the main topic and key points. "
467
+ "Keywords should be 5-12 relevant nouns or noun-phrases that describe the content."
468
+ "Example: {{'summary': 'A concise summary of the file content', 'keywords': ['keyword1', 'keyword2', 'keyword3']}}"
469
+ )
470
+
471
+
472
+ class OllamaSummarizer(BaseSummarizer):
473
+ """Summarizer using local Ollama models."""
474
+
475
+ def __init__(self, host: Optional[str] = None, model: Optional[str] = None, max_tokens: Optional[int] = None):
476
+ """
477
+ Initialize Ollama summarizer.
478
+
479
+ Args:
480
+ host: Ollama host URL (default from env)
481
+ model: Model name (default from env)
482
+ max_tokens: Maximum context tokens (default from env)
483
+ """
484
+ # Get model name before initializing base class
485
+ model_name = model or os.getenv("OLLAMA_MODEL", "llama3.2")
486
+
487
+ # Initialize base class with context length and model
488
+ context_length = max_tokens or int(os.getenv("OLLAMA_MODEL_CONTEXT_LENGTH", "128000"))
489
+ super().__init__(max_tokens=context_length, model=model_name)
490
+
491
+ try:
492
+ import ollama
493
+ self.ollama_available = True
494
+ except ImportError:
495
+ self.ollama_available = False
496
+ raise ImportError("ollama package is not installed")
497
+
498
+ self.host = host or os.getenv("OLLAMA_HOST", "http://localhost:11434")
499
+
500
+ try:
501
+ self.client = ollama.AsyncClient(host=self.host)
502
+ logger.info(sm("Ollama summarizer initialized", host=self.host, model=self.model, max_tokens=self.max_tokens))
503
+ except Exception as e:
504
+ logger.error(sm("Failed to initialize Ollama client", host=self.host, error=str(e)))
505
+ raise AIProviderError(f"Failed to initialize Ollama: {str(e)}")
506
+
507
+ async def is_available(self) -> bool:
508
+ """Check if Ollama is available."""
509
+ try:
510
+ # Try to list models to check if Ollama is running
511
+ await self.client.list()
512
+ return True
513
+ except Exception as e:
514
+ logger.debug(f"Ollama not available - error: {str(e)}")
515
+ return False
516
+
517
+ async def summarize(self, file_metadata: File) -> File:
518
+ """Summarize using Ollama with chunking support."""
519
+ if not self._validate_content(file_metadata):
520
+ return file_metadata
521
+
522
+ logger.info(sm("Summarizing with Ollama", filename=file_metadata.filename, model=self.model))
523
+
524
+ try:
525
+ # Prepare content chunks
526
+ content_chunks = self._prepare_content(file_metadata.content)
527
+ chunk_summaries = []
528
+ resolved_title = None
529
+
530
+ images = self._prepare_images(file_metadata)
531
+
532
+ # Process each chunk
533
+ for i, chunk in enumerate(content_chunks):
534
+ logger.info(sm(f"Processing chunk {i+1}/{len(content_chunks)}", length=len(chunk), images=len(images)))
535
+
536
+ # Prepare message - only include images if we have any
537
+ user_message = {"role": "user", "content": chunk}
538
+ if images:
539
+ user_message["images"] = images
540
+
541
+ response = await self.client.chat(
542
+ model=self.model,
543
+ messages=[
544
+ {"role": "system", "content": self._get_system_prompt(include_title=(i == 0))},
545
+ user_message,
546
+ ],
547
+ think=False,
548
+ # format="json",
549
+ options=ollama.Options(
550
+ num_predict=500,
551
+ # temperature=0.3,
552
+ num_ctx=16_000,
553
+ )
554
+ )
555
+
556
+ logger.info(sm("Ollama response", response=response))
557
+ response_content = extract_json_from_response(response['message']['content'])
558
+ if not response_content:
559
+ logger.warning(sm("Empty response for chunk", chunk_num=i+1))
560
+ continue
561
+
562
+ try:
563
+ title, summary, keywords = self._parse_ai_response(response_content)
564
+ chunk_summaries.append({"summary": summary, "keywords": keywords})
565
+ if i == 0 and title:
566
+ resolved_title = title
567
+ except ValueError as e:
568
+ logger.warning(sm("Failed to parse chunk response", chunk_num=i+1, error=str(e)))
569
+ continue
570
+
571
+ if not chunk_summaries:
572
+ raise AIProviderError("No valid responses from Ollama")
573
+
574
+ # Combine chunk summaries
575
+ final_summary, final_keywords = self._combine_chunk_summaries(chunk_summaries)
576
+
577
+ # Update file metadata
578
+ file_metadata.title = resolved_title
579
+ file_metadata.summary = final_summary
580
+ file_metadata.keywords = final_keywords
581
+ file_metadata.status = ProcessingStatus.SUMMARIZED
582
+
583
+ logger.info(sm("Successfully summarized with Ollama", filename=file_metadata.filename,
584
+ title=resolved_title,
585
+ summary_length=len(final_summary), keyword_count=len(final_keywords),
586
+ chunks_processed=len(chunk_summaries)))
587
+
588
+ return file_metadata
589
+
590
+ except Exception as e:
591
+ error_msg = f"Ollama summarization failed: {str(e)}"
592
+ logger.error(sm("Ollama summarization failed", filename=file_metadata.filename, model=self.model, error=str(e)))
593
+ raise AIProviderError(error_msg)
594
+
595
+
596
+ class OnlineSummarizer(BaseSummarizer):
597
+ """Summarizer using online AI models via LiteLLM."""
598
+
599
+ def __init__(self, model: Optional[str] = None, api_key: Optional[str] = None, max_tokens: Optional[int] = None):
600
+ """
601
+ Initialize online summarizer.
602
+
603
+ Args:
604
+ model: Model name (default from env)
605
+ api_key: API key (default from env)
606
+ max_tokens: Maximum context tokens (default from env)
607
+ """
608
+ # Get model name before initializing base class
609
+ model_name = model or os.getenv("ONLINE_MODEL", "gpt-4o")
610
+
611
+ # Initialize base class with context length and model
612
+ context_length = max_tokens or int(os.getenv("ONLINE_MODEL_CONTEXT_LENGTH", "128000"))
613
+ super().__init__(max_tokens=context_length, model=model_name)
614
+
615
+ try:
616
+ import litellm
617
+ self.litellm_available = True
618
+ except ImportError:
619
+ self.litellm_available = False
620
+ raise ImportError("litellm package is not installed")
621
+
622
+ # Set API key if provided
623
+ if api_key:
624
+ os.environ["OPENAI_API_KEY"] = api_key
625
+
626
+ logger.info(sm("Online summarizer initialized", model=self.model, max_tokens=self.max_tokens))
627
+
628
+ async def is_available(self) -> bool:
629
+ """Check if online models are available."""
630
+ # Check for required API keys based on model
631
+ if self.model.startswith("gpt-") or self.model.startswith("o1-"):
632
+ return bool(os.getenv("OPENAI_API_KEY"))
633
+ elif self.model.startswith("claude-"):
634
+ return bool(os.getenv("ANTHROPIC_API_KEY"))
635
+ elif self.model.startswith("gemini-"):
636
+ return bool(os.getenv("GOOGLE_API_KEY"))
637
+ else:
638
+ # Assume OpenAI by default
639
+ return bool(os.getenv("OPENAI_API_KEY"))
640
+
641
+ async def summarize(self, file_metadata: File) -> File:
642
+ """Summarize using online AI models with chunking support."""
643
+ if not self._validate_content(file_metadata):
644
+ return file_metadata
645
+
646
+ logger.info(sm("Summarizing with online model", filename=file_metadata.filename, model=self.model))
647
+
648
+ try:
649
+ import litellm
650
+
651
+ # Prepare content chunks
652
+ content_chunks = self._prepare_content(file_metadata.content)
653
+ chunk_summaries = []
654
+ resolved_title = None
655
+
656
+ images = self._prepare_images(file_metadata)
657
+
658
+ # Process each chunk
659
+ for i, chunk in enumerate(content_chunks):
660
+ logger.info(sm(f"Processing chunk {i+1}/{len(content_chunks)}", length=len(chunk), images=len(images)))
661
+
662
+ user_message = {"role": "user", "content": chunk}
663
+ if images:
664
+ user_message["images"] = images
665
+
666
+ response = litellm.completion(
667
+ model=self.model,
668
+ messages=[
669
+ {"role": "system", "content": self._get_system_prompt(include_title=(i == 0))},
670
+ user_message,
671
+ ],
672
+ temperature=0.1,
673
+ max_tokens=300,
674
+ top_p=1,
675
+ frequency_penalty=0,
676
+ presence_penalty=0,
677
+ response_format={"type": "json_object"},
678
+ timeout=120,
679
+ max_retries=2,
680
+ )
681
+
682
+ response_content = response.choices[0].message.content
683
+ if not response_content:
684
+ logger.warning(sm("Empty response for chunk", chunk_num=i+1))
685
+ continue
686
+
687
+ try:
688
+ title, summary, keywords = self._parse_ai_response(response_content)
689
+ chunk_summaries.append({"summary": summary, "keywords": keywords})
690
+ if i == 0 and title:
691
+ resolved_title = title
692
+ except ValueError as e:
693
+ logger.warning(sm("Failed to parse chunk response", chunk_num=i+1, error=str(e)))
694
+ continue
695
+
696
+ if not chunk_summaries:
697
+ raise AIProviderError("No valid responses from online model")
698
+
699
+ # Combine chunk summaries
700
+ final_summary, final_keywords = self._combine_chunk_summaries(chunk_summaries)
701
+
702
+ # Update file metadata
703
+ file_metadata.title = resolved_title
704
+ file_metadata.summary = final_summary
705
+ file_metadata.keywords = final_keywords
706
+ file_metadata.status = ProcessingStatus.SUMMARIZED
707
+
708
+ logger.info(sm("Successfully summarized with online model", filename=file_metadata.filename,
709
+ title=resolved_title,
710
+ model=self.model, summary_length=len(final_summary),
711
+ keyword_count=len(final_keywords), chunks_processed=len(chunk_summaries)))
712
+
713
+ return file_metadata
714
+
715
+ except Exception as e:
716
+ error_msg = f"Online summarization failed: {str(e)}"
717
+ logger.error(sm("Online summarization failed", filename=file_metadata.filename, model=self.model, error=str(e)))
718
+ raise AIProviderError(error_msg)
719
+
720
+
721
+ class LlamaCppSummarizer(BaseSummarizer):
722
+ """Summarizer using local llama.cpp models."""
723
+
724
+ def __init__(self, model_path: Optional[str] = None, max_tokens: Optional[int] = None, n_ctx: Optional[int] = None):
725
+ """
726
+ Initialize llama.cpp summarizer.
727
+
728
+ Args:
729
+ model_path: Path to GGUF model file (default from env: LLAMACPP_MODEL_PATH)
730
+ max_tokens: Maximum context tokens (default from env: LLAMACPP_MODEL_CONTEXT_LENGTH)
731
+ n_ctx: Context window size for the model (default from env: LLAMACPP_N_CTX)
732
+ """
733
+ # Get model path from env if not provided
734
+ self.model_path = model_path or os.getenv("LLAMACPP_MODEL_PATH")
735
+ self.repo_id = os.getenv("LLAMACPP_REPO_ID")
736
+ self.filename = os.getenv("LLAMACPP_FILENAME")
737
+ if not (self.model_path or all((self.repo_id, self.filename))):
738
+ raise ValueError("LLAMACPP_MODEL_PATH environment variable must be set or model_path must be provided")
739
+
740
+ # Initialize base class with context length
741
+ context_length = max_tokens or int(os.getenv("LLAMACPP_MODEL_CONTEXT_LENGTH", "8192"))
742
+ super().__init__(max_tokens=context_length, model="llama.cpp")
743
+
744
+ self.n_ctx = n_ctx or int(os.getenv("LLAMACPP_N_CTX", "8192"))
745
+
746
+ try:
747
+ from llama_cpp import Llama
748
+ self.llamacpp_available = True
749
+ except ImportError:
750
+ self.llamacpp_available = False
751
+ raise ImportError("llama-cpp-python package is not installed. Install with: pip install llama-cpp-python")
752
+
753
+ try:
754
+ # Initialize llama.cpp model
755
+ if self.repo_id and self.filename:
756
+ self.llm = Llama.from_pretrained(
757
+ repo_id=self.repo_id,
758
+ filename=self.filename,
759
+ n_ctx=self.n_ctx,
760
+ n_threads=int(os.getenv("LLAMACPP_N_THREADS", "4")),
761
+ n_gpu_layers=int(os.getenv("LLAMACPP_N_GPU_LAYERS", "0")), # 0 = CPU only, -1 = all layers on GPU
762
+ verbose=os.getenv("LLAMACPP_VERBOSE", "false").lower() == "true",
763
+ )
764
+ logger.info(sm("llama.cpp summarizer initialized",
765
+ repo_id=self.repo_id,
766
+ filename=self.filename,
767
+ n_ctx=self.n_ctx,
768
+ max_tokens=self.max_tokens))
769
+ else:
770
+ self.llm = Llama(
771
+ repo_id=self.repo_id,
772
+ model_path=self.model_path,
773
+ n_ctx=self.n_ctx,
774
+ n_threads=int(os.getenv("LLAMACPP_N_THREADS", "4")),
775
+ n_gpu_layers=int(os.getenv("LLAMACPP_N_GPU_LAYERS", "0")), # 0 = CPU only, -1 = all layers on GPU
776
+ verbose=os.getenv("LLAMACPP_VERBOSE", "false").lower() == "true",
777
+ )
778
+ logger.info(sm("llama.cpp summarizer initialized",
779
+ model_path=self.model_path,
780
+ n_ctx=self.n_ctx,
781
+ max_tokens=self.max_tokens))
782
+ except Exception as e:
783
+ logger.error(sm("Failed to initialize llama.cpp model", model_path=self.model_path, error=str(e)))
784
+ raise AIProviderError(f"Failed to initialize llama.cpp: {str(e)}")
785
+
786
+ async def is_available(self) -> bool:
787
+ """Check if llama.cpp is available."""
788
+ try:
789
+ # Check if model file exists and is accessible
790
+ # if not self.model_path or not os.path.exists(self.model_path):
791
+ # logger.debug(f"llama.cpp model not found at path: {self.model_path}")
792
+ # return False
793
+
794
+ # Check if llm is initialized
795
+ return hasattr(self, 'llm') and self.llm is not None
796
+ except Exception as e:
797
+ logger.error(f"llama.cpp not available - error: {str(e)}")
798
+ return False
799
+
800
+ async def summarize(self, file_metadata: File) -> File:
801
+ """Summarize using llama.cpp with chunking support."""
802
+ if not self._validate_content(file_metadata):
803
+ return file_metadata
804
+
805
+ logger.info(sm("Summarizing with llama.cpp", filename=file_metadata.filename, model_path=self.model_path))
806
+
807
+ try:
808
+ # Prepare content chunks
809
+ content_chunks = self._prepare_content(file_metadata.content)
810
+ chunk_summaries = []
811
+ resolved_title = None
812
+
813
+ # Note: llama.cpp doesn't natively support image processing in the same way as Ollama
814
+ # Images would need to be handled by a multimodal GGUF model if available
815
+ images = self._prepare_images(file_metadata)
816
+ if images:
817
+ logger.warning("Image support in llama.cpp requires multimodal GGUF models and is not fully implemented")
818
+
819
+ # Process each chunk
820
+ for i, chunk in enumerate(content_chunks):
821
+ logger.info(sm(f"Processing chunk {i+1}/{len(content_chunks)}", length=len(chunk)))
822
+
823
+ # Use create_chat_completion to properly format the prompt with the model's chat template
824
+ response = self.llm.create_chat_completion(
825
+ messages=[
826
+ {"role": "system", "content": self._get_system_prompt(include_title=(i == 0))},
827
+ {"role": "user", "content": chunk},
828
+ ],
829
+ max_tokens=500,
830
+ temperature=0.1,
831
+ top_p=0.95,
832
+ stream=False,
833
+ )
834
+
835
+ response_content = response['choices'][0]['message']['content'].strip()
836
+ logger.info(sm("llama.cpp raw response", response=response_content))
837
+
838
+ if not response_content:
839
+ logger.warning(sm("Empty response for chunk", chunk_num=i+1))
840
+ continue
841
+
842
+ # Try to extract JSON from response
843
+ json_str = extract_json_from_response(response_content)
844
+
845
+ try:
846
+ title, summary, keywords = self._parse_ai_response(json_str)
847
+ chunk_summaries.append({"summary": summary, "keywords": keywords})
848
+ if i == 0 and title:
849
+ resolved_title = title
850
+ except ValueError as e:
851
+ logger.warning(sm("Failed to parse chunk response", chunk_num=i+1, error=str(e), response=json_str))
852
+ continue
853
+
854
+ if not chunk_summaries:
855
+ raise AIProviderError("No valid responses from llama.cpp")
856
+
857
+ # Combine chunk summaries
858
+ final_summary, final_keywords = self._combine_chunk_summaries(chunk_summaries)
859
+
860
+ # Update file metadata
861
+ file_metadata.title = resolved_title
862
+ file_metadata.summary = final_summary
863
+ file_metadata.keywords = final_keywords
864
+ file_metadata.status = ProcessingStatus.SUMMARIZED
865
+
866
+ logger.info(sm("Successfully summarized with llama.cpp", filename=file_metadata.filename,
867
+ title=resolved_title,
868
+ summary_length=len(final_summary), keyword_count=len(final_keywords),
869
+ chunks_processed=len(chunk_summaries)))
870
+
871
+ return file_metadata
872
+
873
+ except Exception as e:
874
+ error_msg = f"llama.cpp summarization failed: {str(e)}"
875
+ logger.error(sm("llama.cpp summarization failed", filename=file_metadata.filename,
876
+ model_path=self.model_path, error=str(e)))
877
+ raise AIProviderError(error_msg)
878
+
879
+
880
+ class AutoSummarizer:
881
+ """
882
+ Automatic summarizer that selects the best available provider.
883
+
884
+ Tries providers in order of preference:
885
+ 1. User-specified provider
886
+ 2. Local llama.cpp (fastest local inference)
887
+ 3. Local Ollama (privacy-focused)
888
+ 4. Online models (fallback)
889
+ """
890
+
891
+ def __init__(self, preferred_provider: Optional[str] = None):
892
+ """
893
+ Initialize auto summarizer.
894
+
895
+ Args:
896
+ preferred_provider: Preferred provider ('llamacpp', 'ollama', 'online', 'auto')
897
+ """
898
+ self.preferred_provider = preferred_provider or os.getenv("AI_PROVIDER", "auto")
899
+ self.summarizers = {}
900
+
901
+ logger.info(sm("AutoSummarizer initialized", preferred_provider=self.preferred_provider))
902
+
903
+ async def _get_llamacpp_summarizer(self) -> Optional[LlamaCppSummarizer]:
904
+ """Get or create llama.cpp summarizer if available."""
905
+ if "llamacpp" not in self.summarizers:
906
+ try:
907
+ logger.info(sm("llama.cpp summarizer initalizing"))
908
+ summarizer = LlamaCppSummarizer()
909
+ if await summarizer.is_available():
910
+ self.summarizers["llamacpp"] = summarizer
911
+ logger.info(sm("llama.cpp summarizer available"))
912
+ else:
913
+ logger.info("llama.cpp summarizer not available")
914
+ return None
915
+ except Exception as e:
916
+ logger.error(sm("Failed to create llama.cpp summarizer)", error=str(e)))
917
+ return None
918
+
919
+ return self.summarizers.get("llamacpp")
920
+
921
+ async def _get_ollama_summarizer(self) -> Optional[OllamaSummarizer]:
922
+ """Get or create Ollama summarizer if available."""
923
+ if "ollama" not in self.summarizers:
924
+ try:
925
+ summarizer = OllamaSummarizer()
926
+ if await summarizer.is_available():
927
+ self.summarizers["ollama"] = summarizer
928
+ logger.info(sm("Ollama summarizer available"))
929
+ else:
930
+ logger.debug("Ollama summarizer not available")
931
+ return None
932
+ except Exception as e:
933
+ logger.debug(f"Failed to create Ollama summarizer - error: {str(e)}")
934
+ return None
935
+
936
+ return self.summarizers.get("ollama")
937
+
938
+ async def _get_online_summarizer(self) -> Optional[OnlineSummarizer]:
939
+ """Get or create online summarizer if available."""
940
+ if "online" not in self.summarizers:
941
+ try:
942
+ summarizer = OnlineSummarizer()
943
+ if await summarizer.is_available():
944
+ self.summarizers["online"] = summarizer
945
+ logger.info(sm("Online summarizer available"))
946
+ else:
947
+ logger.debug("Online summarizer not available")
948
+ return None
949
+ except Exception as e:
950
+ logger.debug(f"Failed to create online summarizer - error: {str(e)}")
951
+ return None
952
+
953
+ return self.summarizers.get("online")
954
+
955
+ async def _select_summarizer(self) -> Optional[BaseSummarizer]:
956
+ """
957
+ Select the best available summarizer based on preference.
958
+
959
+ Returns:
960
+ An available summarizer instance or None
961
+ """
962
+ # Prioritize preferred provider if available
963
+ if self.preferred_provider == "llamacpp" and await self._get_llamacpp_summarizer():
964
+ return await self._get_llamacpp_summarizer()
965
+
966
+ if self.preferred_provider == "ollama" and await self._get_ollama_summarizer():
967
+ return await self._get_ollama_summarizer()
968
+
969
+ if self.preferred_provider == "online" and await self._get_online_summarizer():
970
+ return await self._get_online_summarizer()
971
+
972
+ # Auto-selection logic (prefer local models first)
973
+ if await self._get_llamacpp_summarizer():
974
+ return await self._get_llamacpp_summarizer()
975
+
976
+ if await self._get_ollama_summarizer():
977
+ return await self._get_ollama_summarizer()
978
+
979
+ if await self._get_online_summarizer():
980
+ return await self._get_online_summarizer()
981
+
982
+ return None
983
+
984
+ async def summarize(self, file_metadata: File) -> File:
985
+ """
986
+ Summarize using the best available provider with fallback.
987
+
988
+ Args:
989
+ file_metadata: File metadata to summarize
990
+
991
+ Returns:
992
+ Enhanced file metadata with summary and keywords
993
+
994
+ Raises:
995
+ SummarizerError: If no summarizers are available or all fail
996
+ """
997
+ # Get all available providers in priority order
998
+ providers = [
999
+ await self._get_llamacpp_summarizer(),
1000
+ await self._get_ollama_summarizer(),
1001
+ await self._get_online_summarizer()
1002
+ ]
1003
+
1004
+ # Sort providers based on preference
1005
+ if self.preferred_provider == "online":
1006
+ providers.reverse()
1007
+ elif self.preferred_provider == "ollama":
1008
+ # Move Ollama to front
1009
+ providers = [p for p in providers if isinstance(p, OllamaSummarizer)] + \
1010
+ [p for p in providers if not isinstance(p, OllamaSummarizer)]
1011
+ elif self.preferred_provider == "llamacpp":
1012
+ # Move llama.cpp to front (already first by default)
1013
+ pass
1014
+
1015
+ summarizer = None
1016
+ for provider in providers:
1017
+ if provider and await provider.is_available():
1018
+ summarizer = provider
1019
+ try:
1020
+ logger.info(sm("Attempting summarization", provider=type(summarizer).__name__))
1021
+ return await summarizer.summarize(file_metadata)
1022
+ except Exception as e:
1023
+ logger.warning(sm("Summarizer failed, trying next provider", provider=type(summarizer).__name__, error=str(e)))
1024
+ continue # Try next provider
1025
+
1026
+ error_msg = "All AI summarizers failed or are unavailable"
1027
+ logger.error(sm("All AI summarizers failed or are unavailable", preferred_provider=self.preferred_provider))
1028
+ raise SummarizerError(error_msg)
1029
+
1030
+ async def get_available_providers(self) -> List[str]:
1031
+ """Get list of available providers."""
1032
+ providers = []
1033
+
1034
+ if await self._get_llamacpp_summarizer():
1035
+ providers.append("llamacpp")
1036
+
1037
+ if await self._get_ollama_summarizer():
1038
+ providers.append("ollama")
1039
+
1040
+ if await self._get_online_summarizer():
1041
+ providers.append("online")
1042
+
1043
+ return providers
1044
+
1045
+
1046
+ # Convenience functions for easier usage
1047
+ async def summarize_file(file_metadata: File,
1048
+ provider: Optional[str] = None) -> File:
1049
+ """
1050
+ Convenience function to summarize a file.
1051
+
1052
+ Args:
1053
+ file_metadata: File metadata to summarize
1054
+ provider: Preferred AI provider
1055
+
1056
+ Returns:
1057
+ Enhanced file metadata with summary and keywords
1058
+ """
1059
+ summarizer = AutoSummarizer(preferred_provider=provider)
1060
+ return await summarizer.summarize(file_metadata)
1061
+
1062
+
1063
+ async def get_available_providers() -> List[str]:
1064
+ """Get list of available AI providers."""
1065
+ summarizer = AutoSummarizer()
1066
+ return await summarizer.get_available_providers()
1067
+
1068
+
1069
+ async def is_summarizer_available() -> bool:
1070
+ """Check if any summarizer is available."""
1071
+ try:
1072
+ providers = await get_available_providers()
1073
+ return len(providers) > 0
1074
+ except Exception:
1075
+ return False