ragit 0.8__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragit/__init__.py CHANGED
@@ -1,2 +1,116 @@
1
- # __init__.py
2
- from .main import VectorDBManager
1
+ #
2
+ # Copyright RODMENA LIMITED 2025
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ """
6
+ Ragit - RAG toolkit for document Q&A and hyperparameter optimization.
7
+
8
+ Quick Start
9
+ -----------
10
+ >>> from ragit import RAGAssistant
11
+ >>>
12
+ >>> # With custom embedding function (retrieval-only)
13
+ >>> def my_embed(text: str) -> list[float]:
14
+ ... # Your embedding implementation
15
+ ... pass
16
+ >>> assistant = RAGAssistant("docs/", embed_fn=my_embed)
17
+ >>> results = assistant.retrieve("How do I create a REST API?")
18
+ >>>
19
+ >>> # With SentenceTransformers (offline, requires ragit[transformers])
20
+ >>> from ragit.providers import SentenceTransformersProvider
21
+ >>> assistant = RAGAssistant("docs/", provider=SentenceTransformersProvider())
22
+ >>>
23
+ >>> # With Ollama (explicit)
24
+ >>> from ragit.providers import OllamaProvider
25
+ >>> assistant = RAGAssistant("docs/", provider=OllamaProvider())
26
+ >>> answer = assistant.ask("How do I create a REST API?")
27
+
28
+ Optimization
29
+ ------------
30
+ >>> from ragit import RagitExperiment, Document, BenchmarkQuestion
31
+ >>>
32
+ >>> docs = [Document(id="doc1", content="...")]
33
+ >>> benchmark = [BenchmarkQuestion(question="What is X?", ground_truth="...")]
34
+ >>>
35
+ >>> # With explicit provider
36
+ >>> experiment = RagitExperiment(docs, benchmark, provider=OllamaProvider())
37
+ >>> results = experiment.run()
38
+ >>> print(results[0]) # Best configuration
39
+ """
40
+
41
+ import logging
42
+ import os
43
+
44
+ from ragit.version import __version__
45
+
46
+ # Set up logging
47
+ logger = logging.getLogger("ragit")
48
+ logger.setLevel(os.getenv("RAGIT_LOG_LEVEL", "INFO"))
49
+
50
+ if not logger.handlers:
51
+ formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
52
+ handler = logging.StreamHandler()
53
+ handler.setFormatter(formatter)
54
+ logger.addHandler(handler)
55
+
56
+ # Public API (imports after logging setup)
57
+ from ragit.assistant import RAGAssistant # noqa: E402
58
+ from ragit.core.experiment.experiment import ( # noqa: E402
59
+ BenchmarkQuestion,
60
+ Chunk,
61
+ Document,
62
+ RAGConfig,
63
+ RagitExperiment,
64
+ )
65
+ from ragit.core.experiment.results import EvaluationResult, ExperimentResults # noqa: E402
66
+ from ragit.loaders import ( # noqa: E402
67
+ chunk_by_separator,
68
+ chunk_document,
69
+ chunk_rst_sections,
70
+ chunk_text,
71
+ load_directory,
72
+ load_text,
73
+ )
74
+ from ragit.providers import ( # noqa: E402
75
+ BaseEmbeddingProvider,
76
+ BaseLLMProvider,
77
+ FunctionProvider,
78
+ OllamaProvider,
79
+ )
80
+
81
+ __all__ = [
82
+ "__version__",
83
+ # High-level API
84
+ "RAGAssistant",
85
+ # Document loading
86
+ "load_text",
87
+ "load_directory",
88
+ "chunk_text",
89
+ "chunk_document",
90
+ "chunk_by_separator",
91
+ "chunk_rst_sections",
92
+ # Core classes
93
+ "Document",
94
+ "Chunk",
95
+ # Providers
96
+ "OllamaProvider",
97
+ "FunctionProvider",
98
+ "BaseLLMProvider",
99
+ "BaseEmbeddingProvider",
100
+ # Optimization
101
+ "RagitExperiment",
102
+ "BenchmarkQuestion",
103
+ "RAGConfig",
104
+ "EvaluationResult",
105
+ "ExperimentResults",
106
+ ]
107
+
108
+ # Conditionally add SentenceTransformersProvider if available
109
+ try:
110
+ from ragit.providers import ( # noqa: E402
111
+ SentenceTransformersProvider as SentenceTransformersProvider,
112
+ )
113
+
114
+ __all__ += ["SentenceTransformersProvider"]
115
+ except ImportError:
116
+ pass
ragit/assistant.py ADDED
@@ -0,0 +1,577 @@
1
+ #
2
+ # Copyright RODMENA LIMITED 2025
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ """
6
+ High-level RAG Assistant for document Q&A and code generation.
7
+
8
+ Provides a simple interface for RAG-based tasks.
9
+
10
+ Note: This class is NOT thread-safe. Do not share instances across threads.
11
+ """
12
+
13
+ from collections.abc import Callable
14
+ from pathlib import Path
15
+ from typing import TYPE_CHECKING
16
+
17
+ import numpy as np
18
+ from numpy.typing import NDArray
19
+
20
+ from ragit.core.experiment.experiment import Chunk, Document
21
+ from ragit.loaders import chunk_document, chunk_rst_sections, load_directory, load_text
22
+ from ragit.providers.base import BaseEmbeddingProvider, BaseLLMProvider
23
+ from ragit.providers.function_adapter import FunctionProvider
24
+
25
+ if TYPE_CHECKING:
26
+ from numpy.typing import NDArray
27
+
28
+
29
+ class RAGAssistant:
30
+ """
31
+ High-level RAG assistant for document Q&A and generation.
32
+
33
+ Handles document indexing, retrieval, and LLM generation in one simple API.
34
+
35
+ Parameters
36
+ ----------
37
+ documents : list[Document] or str or Path
38
+ Documents to index. Can be:
39
+ - List of Document objects
40
+ - Path to a single file
41
+ - Path to a directory (will load all .txt, .md, .rst files)
42
+ embed_fn : Callable[[str], list[float]], optional
43
+ Function that takes text and returns an embedding vector.
44
+ If provided, creates a FunctionProvider internally.
45
+ generate_fn : Callable, optional
46
+ Function for text generation. Supports (prompt) or (prompt, system_prompt).
47
+ If provided without embed_fn, must also provide embed_fn.
48
+ provider : BaseEmbeddingProvider, optional
49
+ Provider for embeddings (and optionally LLM). If embed_fn is provided,
50
+ this is ignored for embeddings.
51
+ embedding_model : str, optional
52
+ Embedding model name (used with provider).
53
+ llm_model : str, optional
54
+ LLM model name (used with provider).
55
+ chunk_size : int, optional
56
+ Chunk size for splitting documents (default: 512).
57
+ chunk_overlap : int, optional
58
+ Overlap between chunks (default: 50).
59
+
60
+ Raises
61
+ ------
62
+ ValueError
63
+ If neither embed_fn nor provider is provided.
64
+
65
+ Note
66
+ ----
67
+ This class is NOT thread-safe. Each thread should have its own instance.
68
+
69
+ Examples
70
+ --------
71
+ >>> # With custom embedding function (retrieval-only)
72
+ >>> assistant = RAGAssistant(docs, embed_fn=my_embed)
73
+ >>> results = assistant.retrieve("query")
74
+ >>>
75
+ >>> # With custom embedding and LLM functions (full RAG)
76
+ >>> assistant = RAGAssistant(docs, embed_fn=my_embed, generate_fn=my_llm)
77
+ >>> answer = assistant.ask("What is X?")
78
+ >>>
79
+ >>> # With explicit provider
80
+ >>> from ragit.providers import OllamaProvider
81
+ >>> assistant = RAGAssistant(docs, provider=OllamaProvider())
82
+ >>>
83
+ >>> # With SentenceTransformers (offline)
84
+ >>> from ragit.providers import SentenceTransformersProvider
85
+ >>> assistant = RAGAssistant(docs, provider=SentenceTransformersProvider())
86
+ """
87
+
88
+ def __init__(
89
+ self,
90
+ documents: list[Document] | str | Path,
91
+ embed_fn: Callable[[str], list[float]] | None = None,
92
+ generate_fn: Callable[..., str] | None = None,
93
+ provider: BaseEmbeddingProvider | BaseLLMProvider | None = None,
94
+ embedding_model: str | None = None,
95
+ llm_model: str | None = None,
96
+ chunk_size: int = 512,
97
+ chunk_overlap: int = 50,
98
+ ):
99
+ # Resolve provider from embed_fn/generate_fn or explicit provider
100
+ self._embedding_provider: BaseEmbeddingProvider
101
+ self._llm_provider: BaseLLMProvider | None = None
102
+
103
+ if embed_fn is not None:
104
+ # Create FunctionProvider from provided functions
105
+ function_provider = FunctionProvider(
106
+ embed_fn=embed_fn,
107
+ generate_fn=generate_fn,
108
+ )
109
+ self._embedding_provider = function_provider
110
+ if generate_fn is not None:
111
+ self._llm_provider = function_provider
112
+ elif provider is not None and isinstance(provider, BaseLLMProvider):
113
+ # Use explicit provider for LLM if function_provider doesn't have LLM
114
+ self._llm_provider = provider
115
+ elif provider is not None:
116
+ # Use explicit provider
117
+ if not isinstance(provider, BaseEmbeddingProvider):
118
+ raise ValueError(
119
+ "Provider must implement BaseEmbeddingProvider for embeddings. Alternatively, provide embed_fn."
120
+ )
121
+ self._embedding_provider = provider
122
+ if isinstance(provider, BaseLLMProvider):
123
+ self._llm_provider = provider
124
+ else:
125
+ raise ValueError(
126
+ "Must provide embed_fn or provider for embeddings. "
127
+ "Examples:\n"
128
+ " RAGAssistant(docs, embed_fn=my_embed_function)\n"
129
+ " RAGAssistant(docs, provider=OllamaProvider())\n"
130
+ " RAGAssistant(docs, provider=SentenceTransformersProvider())"
131
+ )
132
+
133
+ self.embedding_model = embedding_model or "default"
134
+ self.llm_model = llm_model or "default"
135
+ self.chunk_size = chunk_size
136
+ self.chunk_overlap = chunk_overlap
137
+
138
+ # Load documents if path provided
139
+ self.documents = self._load_documents(documents)
140
+
141
+ # Index chunks - embeddings stored as pre-normalized numpy matrix for fast search
142
+ self._chunks: tuple[Chunk, ...] = ()
143
+ self._embedding_matrix: NDArray[np.float64] | None = None # Pre-normalized
144
+ self._build_index()
145
+
146
+ def _load_documents(self, documents: list[Document] | str | Path) -> list[Document]:
147
+ """Load documents from various sources."""
148
+ if isinstance(documents, list):
149
+ return documents
150
+
151
+ path = Path(documents)
152
+
153
+ if path.is_file():
154
+ return [load_text(path)]
155
+
156
+ if path.is_dir():
157
+ docs: list[Document] = []
158
+ for pattern in (
159
+ "*.txt",
160
+ "*.md",
161
+ "*.rst",
162
+ "*.py",
163
+ "*.js",
164
+ "*.ts",
165
+ "*.go",
166
+ "*.java",
167
+ "*.c",
168
+ "*.cpp",
169
+ "*.h",
170
+ "*.hpp",
171
+ ):
172
+ docs.extend(load_directory(path, pattern))
173
+ return docs
174
+
175
+ raise ValueError(f"Invalid documents source: {documents}")
176
+
177
+ def _build_index(self) -> None:
178
+ """Build vector index from documents using batch embedding."""
179
+ all_chunks: list[Chunk] = []
180
+
181
+ for doc in self.documents:
182
+ # Use RST section chunking for .rst files, otherwise regular chunking
183
+ if doc.metadata.get("filename", "").endswith(".rst"):
184
+ chunks = chunk_rst_sections(doc.content, doc.id, metadata=doc.metadata)
185
+ else:
186
+ chunks = chunk_document(doc, self.chunk_size, self.chunk_overlap)
187
+ all_chunks.extend(chunks)
188
+
189
+ if not all_chunks:
190
+ self._chunks = ()
191
+ self._embedding_matrix = None
192
+ return
193
+
194
+ # Batch embed all chunks at once (single API call)
195
+ texts = [chunk.content for chunk in all_chunks]
196
+ responses = self._embedding_provider.embed_batch(texts, self.embedding_model)
197
+
198
+ # Build embedding matrix directly (skip storing in chunks to avoid duplication)
199
+ embedding_matrix = np.array([response.embedding for response in responses], dtype=np.float64)
200
+
201
+ # Pre-normalize for fast cosine similarity (normalize once, use many times)
202
+ norms = np.linalg.norm(embedding_matrix, axis=1, keepdims=True)
203
+ norms[norms == 0] = 1 # Avoid division by zero
204
+
205
+ # Store as immutable tuple and pre-normalized numpy matrix
206
+ self._chunks = tuple(all_chunks)
207
+ self._embedding_matrix = embedding_matrix / norms
208
+
209
+ def add_documents(self, documents: list[Document] | str | Path) -> int:
210
+ """Add documents to the existing index incrementally.
211
+
212
+ Args:
213
+ documents: Documents to add.
214
+
215
+ Returns:
216
+ Number of chunks added.
217
+ """
218
+ new_docs = self._load_documents(documents)
219
+ if not new_docs:
220
+ return 0
221
+
222
+ self.documents.extend(new_docs)
223
+
224
+ # Chunk new docs
225
+ new_chunks: list[Chunk] = []
226
+ for doc in new_docs:
227
+ if doc.metadata.get("filename", "").endswith(".rst"):
228
+ chunks = chunk_rst_sections(doc.content, doc.id, metadata=doc.metadata)
229
+ else:
230
+ chunks = chunk_document(doc, self.chunk_size, self.chunk_overlap)
231
+ new_chunks.extend(chunks)
232
+
233
+ if not new_chunks:
234
+ return 0
235
+
236
+ # Embed new chunks
237
+ texts = [chunk.content for chunk in new_chunks]
238
+ responses = self._embedding_provider.embed_batch(texts, self.embedding_model)
239
+
240
+ new_matrix = np.array([response.embedding for response in responses], dtype=np.float64)
241
+
242
+ # Normalize
243
+ norms = np.linalg.norm(new_matrix, axis=1, keepdims=True)
244
+ norms[norms == 0] = 1
245
+ new_matrix_norm = new_matrix / norms
246
+
247
+ # Update state
248
+ current_chunks = list(self._chunks)
249
+ current_chunks.extend(new_chunks)
250
+ self._chunks = tuple(current_chunks)
251
+
252
+ if self._embedding_matrix is None:
253
+ self._embedding_matrix = new_matrix_norm
254
+ else:
255
+ self._embedding_matrix = np.vstack((self._embedding_matrix, new_matrix_norm))
256
+
257
+ return len(new_chunks)
258
+
259
+ def remove_documents(self, source_path_pattern: str) -> int:
260
+ """Remove documents matching a source path pattern.
261
+
262
+ Args:
263
+ source_path_pattern: Glob pattern to match 'source' metadata.
264
+
265
+ Returns:
266
+ Number of chunks removed.
267
+ """
268
+ import fnmatch
269
+
270
+ if not self._chunks:
271
+ return 0
272
+
273
+ indices_to_keep = []
274
+ kept_chunks = []
275
+ removed_count = 0
276
+
277
+ for i, chunk in enumerate(self._chunks):
278
+ source = chunk.metadata.get("source", "")
279
+ if not source or not fnmatch.fnmatch(source, source_path_pattern):
280
+ indices_to_keep.append(i)
281
+ kept_chunks.append(chunk)
282
+ else:
283
+ removed_count += 1
284
+
285
+ if removed_count == 0:
286
+ return 0
287
+
288
+ self._chunks = tuple(kept_chunks)
289
+
290
+ if self._embedding_matrix is not None:
291
+ if not kept_chunks:
292
+ self._embedding_matrix = None
293
+ else:
294
+ self._embedding_matrix = self._embedding_matrix[indices_to_keep]
295
+
296
+ # Also remove from self.documents
297
+ self.documents = [
298
+ doc for doc in self.documents if not fnmatch.fnmatch(doc.metadata.get("source", ""), source_path_pattern)
299
+ ]
300
+
301
+ return removed_count
302
+
303
+ def update_documents(self, documents: list[Document] | str | Path) -> int:
304
+ """Update existing documents (remove old, add new).
305
+
306
+ Uses document source path to identify what to remove.
307
+
308
+ Args:
309
+ documents: New versions of documents.
310
+
311
+ Returns:
312
+ Number of chunks added.
313
+ """
314
+ new_docs = self._load_documents(documents)
315
+ if not new_docs:
316
+ return 0
317
+
318
+ # Identify sources to remove
319
+ sources_to_remove = set()
320
+ for doc in new_docs:
321
+ source = doc.metadata.get("source")
322
+ if source:
323
+ sources_to_remove.add(source)
324
+
325
+ # Remove old versions
326
+ for source in sources_to_remove:
327
+ self.remove_documents(source)
328
+
329
+ # Add new versions
330
+ return self.add_documents(new_docs)
331
+
332
+ def retrieve(self, query: str, top_k: int = 3) -> list[tuple[Chunk, float]]:
333
+ """
334
+ Retrieve relevant chunks for a query.
335
+
336
+ Uses vectorized cosine similarity for fast search over all chunks.
337
+
338
+ Parameters
339
+ ----------
340
+ query : str
341
+ Search query.
342
+ top_k : int
343
+ Number of chunks to return (default: 3).
344
+
345
+ Returns
346
+ -------
347
+ list[tuple[Chunk, float]]
348
+ List of (chunk, similarity_score) tuples, sorted by relevance.
349
+
350
+ Examples
351
+ --------
352
+ >>> results = assistant.retrieve("how to create a route")
353
+ >>> for chunk, score in results:
354
+ ... print(f"{score:.2f}: {chunk.content[:100]}...")
355
+ """
356
+ if not self._chunks or self._embedding_matrix is None:
357
+ return []
358
+
359
+ # Get query embedding and normalize
360
+ query_response = self._embedding_provider.embed(query, self.embedding_model)
361
+ query_vec = np.array(query_response.embedding, dtype=np.float64)
362
+ query_norm = np.linalg.norm(query_vec)
363
+ if query_norm == 0:
364
+ return []
365
+ query_normalized = query_vec / query_norm
366
+
367
+ # Fast cosine similarity: matrix is pre-normalized, just dot product
368
+ similarities = self._embedding_matrix @ query_normalized
369
+
370
+ # Get top_k indices using argpartition (faster than full sort for large arrays)
371
+ if len(similarities) <= top_k:
372
+ top_indices = np.argsort(similarities)[::-1]
373
+ else:
374
+ # Partial sort - only find top_k elements
375
+ top_indices = np.argpartition(similarities, -top_k)[-top_k:]
376
+ # Sort the top_k by score
377
+ top_indices = top_indices[np.argsort(similarities[top_indices])[::-1]]
378
+
379
+ return [(self._chunks[i], float(similarities[i])) for i in top_indices]
380
+
381
+ def get_context(self, query: str, top_k: int = 3) -> str:
382
+ """
383
+ Get formatted context string from retrieved chunks.
384
+
385
+ Parameters
386
+ ----------
387
+ query : str
388
+ Search query.
389
+ top_k : int
390
+ Number of chunks to include.
391
+
392
+ Returns
393
+ -------
394
+ str
395
+ Formatted context string.
396
+ """
397
+ results = self.retrieve(query, top_k)
398
+ return "\n\n---\n\n".join(chunk.content for chunk, _ in results)
399
+
400
+ def _ensure_llm(self) -> BaseLLMProvider:
401
+ """Ensure LLM provider is available."""
402
+ if self._llm_provider is None:
403
+ raise NotImplementedError(
404
+ "No LLM configured. Provide generate_fn or a provider with LLM support "
405
+ "to use ask(), generate(), or generate_code() methods."
406
+ )
407
+ return self._llm_provider
408
+
409
+ def generate(
410
+ self,
411
+ prompt: str,
412
+ system_prompt: str | None = None,
413
+ temperature: float = 0.7,
414
+ ) -> str:
415
+ """
416
+ Generate text using the LLM (without retrieval).
417
+
418
+ Parameters
419
+ ----------
420
+ prompt : str
421
+ User prompt.
422
+ system_prompt : str, optional
423
+ System prompt for context.
424
+ temperature : float
425
+ Sampling temperature (default: 0.7).
426
+
427
+ Returns
428
+ -------
429
+ str
430
+ Generated text.
431
+
432
+ Raises
433
+ ------
434
+ NotImplementedError
435
+ If no LLM is configured.
436
+ """
437
+ llm = self._ensure_llm()
438
+ response = llm.generate(
439
+ prompt=prompt,
440
+ model=self.llm_model,
441
+ system_prompt=system_prompt,
442
+ temperature=temperature,
443
+ )
444
+ return response.text
445
+
446
+ def ask(
447
+ self,
448
+ question: str,
449
+ system_prompt: str | None = None,
450
+ top_k: int = 3,
451
+ temperature: float = 0.7,
452
+ ) -> str:
453
+ """
454
+ Ask a question using RAG (retrieve + generate).
455
+
456
+ Parameters
457
+ ----------
458
+ question : str
459
+ Question to answer.
460
+ system_prompt : str, optional
461
+ System prompt. Defaults to a helpful assistant prompt.
462
+ top_k : int
463
+ Number of context chunks to retrieve (default: 3).
464
+ temperature : float
465
+ Sampling temperature (default: 0.7).
466
+
467
+ Returns
468
+ -------
469
+ str
470
+ Generated answer.
471
+
472
+ Raises
473
+ ------
474
+ NotImplementedError
475
+ If no LLM is configured.
476
+
477
+ Examples
478
+ --------
479
+ >>> answer = assistant.ask("How do I create a REST API?")
480
+ >>> print(answer)
481
+ """
482
+ # Retrieve context
483
+ context = self.get_context(question, top_k)
484
+
485
+ # Default system prompt
486
+ if system_prompt is None:
487
+ system_prompt = """You are a helpful assistant. Answer questions based on the provided context.
488
+ If the context doesn't contain enough information, say so. Be concise and accurate."""
489
+
490
+ # Build prompt with context
491
+ prompt = f"""Context:
492
+ {context}
493
+
494
+ Question: {question}
495
+
496
+ Answer:"""
497
+
498
+ return self.generate(prompt, system_prompt, temperature)
499
+
500
+ def generate_code(
501
+ self,
502
+ request: str,
503
+ language: str = "python",
504
+ top_k: int = 3,
505
+ temperature: float = 0.7,
506
+ ) -> str:
507
+ """
508
+ Generate code based on documentation context.
509
+
510
+ Parameters
511
+ ----------
512
+ request : str
513
+ Description of what code to generate.
514
+ language : str
515
+ Programming language (default: "python").
516
+ top_k : int
517
+ Number of context chunks to retrieve.
518
+ temperature : float
519
+ Sampling temperature.
520
+
521
+ Returns
522
+ -------
523
+ str
524
+ Generated code (cleaned, without markdown).
525
+
526
+ Raises
527
+ ------
528
+ NotImplementedError
529
+ If no LLM is configured.
530
+
531
+ Examples
532
+ --------
533
+ >>> code = assistant.generate_code("create a REST API with user endpoints")
534
+ >>> print(code)
535
+ """
536
+ context = self.get_context(request, top_k)
537
+
538
+ system_prompt = f"""You are an expert {language} developer. Generate ONLY valid {language} code.
539
+
540
+ RULES:
541
+ 1. Output PURE CODE ONLY - no explanations, no markdown code blocks
542
+ 2. Include necessary imports
543
+ 3. Write clean, production-ready code
544
+ 4. Add brief comments for clarity"""
545
+
546
+ prompt = f"""Documentation:
547
+ {context}
548
+
549
+ Request: {request}
550
+
551
+ Generate the {language} code:"""
552
+
553
+ response = self.generate(prompt, system_prompt, temperature)
554
+
555
+ # Clean up response - remove markdown if present
556
+ code = response
557
+ if f"```{language}" in code:
558
+ code = code.split(f"```{language}")[1].split("```")[0]
559
+ elif "```" in code:
560
+ code = code.split("```")[1].split("```")[0]
561
+
562
+ return code.strip()
563
+
564
+ @property
565
+ def num_chunks(self) -> int:
566
+ """Return number of indexed chunks."""
567
+ return len(self._chunks)
568
+
569
+ @property
570
+ def num_documents(self) -> int:
571
+ """Return number of loaded documents."""
572
+ return len(self.documents)
573
+
574
+ @property
575
+ def has_llm(self) -> bool:
576
+ """Check if LLM is configured."""
577
+ return self._llm_provider is not None