ragit 0.3__py3-none-any.whl → 0.10.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragit/logging.py ADDED
@@ -0,0 +1,194 @@
1
+ #
2
+ # Copyright RODMENA LIMITED 2025
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ """
6
+ Structured logging for ragit.
7
+
8
+ Provides consistent logging across all ragit components with:
9
+ - Operation timing
10
+ - Context tracking
11
+ - Configurable log levels
12
+ """
13
+
14
+ import logging
15
+ import time
16
+ from collections.abc import Callable, Generator
17
+ from contextlib import contextmanager
18
+ from functools import wraps
19
+ from typing import Any, TypeVar
20
+
21
+ # Create ragit logger
22
+ logger = logging.getLogger("ragit")
23
+
24
+ # Type variable for decorated functions
25
+ F = TypeVar("F", bound=Callable[..., Any])
26
+
27
+
28
+ def setup_logging(level: str = "INFO", format_string: str | None = None) -> None:
29
+ """Configure ragit logging.
30
+
31
+ Parameters
32
+ ----------
33
+ level : str
34
+ Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
35
+ format_string : str, optional
36
+ Custom format string. If None, uses default format.
37
+
38
+ Examples
39
+ --------
40
+ >>> from ragit.logging import setup_logging
41
+ >>> setup_logging("DEBUG")
42
+ """
43
+ logger.setLevel(level.upper())
44
+
45
+ # Only add handler if none exist
46
+ if not logger.handlers:
47
+ handler = logging.StreamHandler()
48
+ handler.setLevel(level.upper())
49
+
50
+ if format_string is None:
51
+ format_string = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
52
+
53
+ formatter = logging.Formatter(format_string)
54
+ handler.setFormatter(formatter)
55
+ logger.addHandler(handler)
56
+
57
+
58
+ @contextmanager
59
+ def log_operation(operation: str, **context: Any) -> Generator[dict[str, Any], None, None]:
60
+ """Context manager for logging operations with timing.
61
+
62
+ Parameters
63
+ ----------
64
+ operation : str
65
+ Name of the operation being performed.
66
+ **context
67
+ Additional context to include in log messages.
68
+
69
+ Yields
70
+ ------
71
+ dict
72
+ Mutable dict to add additional context during the operation.
73
+
74
+ Examples
75
+ --------
76
+ >>> with log_operation("embed", model="nomic-embed-text") as ctx:
77
+ ... result = provider.embed(text, model)
78
+ ... ctx["dimensions"] = len(result.embedding)
79
+ """
80
+ start = time.perf_counter()
81
+ extra_context: dict[str, Any] = {}
82
+
83
+ # Build context string
84
+ ctx_str = ", ".join(f"{k}={v}" for k, v in context.items()) if context else ""
85
+
86
+ logger.debug(f"{operation}.start" + (f" [{ctx_str}]" if ctx_str else ""))
87
+
88
+ try:
89
+ yield extra_context
90
+ duration_ms = (time.perf_counter() - start) * 1000
91
+
92
+ # Combine original context with extra context
93
+ all_context = {**context, **extra_context, "duration_ms": f"{duration_ms:.2f}"}
94
+ ctx_str = ", ".join(f"{k}={v}" for k, v in all_context.items())
95
+
96
+ logger.info(f"{operation}.success [{ctx_str}]")
97
+ except Exception as e:
98
+ duration_ms = (time.perf_counter() - start) * 1000
99
+ all_context = {**context, **extra_context, "duration_ms": f"{duration_ms:.2f}", "error": str(e)}
100
+ ctx_str = ", ".join(f"{k}={v}" for k, v in all_context.items())
101
+
102
+ logger.error(f"{operation}.failed [{ctx_str}]", exc_info=True)
103
+ raise
104
+
105
+
106
+ def log_method(operation: str) -> Callable[[F], F]:
107
+ """Decorator for logging method calls with timing.
108
+
109
+ Parameters
110
+ ----------
111
+ operation : str
112
+ Name of the operation for logging.
113
+
114
+ Returns
115
+ -------
116
+ Callable
117
+ Decorated function.
118
+
119
+ Examples
120
+ --------
121
+ >>> class MyProvider:
122
+ ... @log_method("embed")
123
+ ... def embed(self, text: str, model: str):
124
+ ... ...
125
+ """
126
+
127
+ def decorator(func: F) -> F:
128
+ @wraps(func)
129
+ def wrapper(*args: Any, **kwargs: Any) -> Any:
130
+ with log_operation(operation, method=func.__name__):
131
+ return func(*args, **kwargs)
132
+
133
+ return wrapper # type: ignore
134
+
135
+ return decorator
136
+
137
+
138
+ class LogContext:
139
+ """Context tracker for correlating related log messages.
140
+
141
+ Useful for tracing operations across multiple components.
142
+
143
+ Examples
144
+ --------
145
+ >>> ctx = LogContext("query-123")
146
+ >>> ctx.log("Starting retrieval", top_k=5)
147
+ >>> ctx.log("Retrieved chunks", count=3)
148
+ """
149
+
150
+ def __init__(self, request_id: str | None = None):
151
+ """Initialize log context.
152
+
153
+ Parameters
154
+ ----------
155
+ request_id : str, optional
156
+ Unique identifier for this context. Auto-generated if not provided.
157
+ """
158
+ self.request_id = request_id or f"req-{int(time.time() * 1000) % 100000}"
159
+ self._start_time = time.perf_counter()
160
+
161
+ def log(self, message: str, level: str = "INFO", **context: Any) -> None:
162
+ """Log a message with this context.
163
+
164
+ Parameters
165
+ ----------
166
+ message : str
167
+ Log message.
168
+ level : str
169
+ Log level (DEBUG, INFO, WARNING, ERROR).
170
+ **context
171
+ Additional context key-value pairs.
172
+ """
173
+ elapsed_ms = (time.perf_counter() - self._start_time) * 1000
174
+ ctx_str = ", ".join(f"{k}={v}" for k, v in context.items())
175
+ full_msg = f"[{self.request_id}] {message}" + (f" [{ctx_str}]" if ctx_str else "") + f" (+{elapsed_ms:.0f}ms)"
176
+
177
+ log_level = getattr(logging, level.upper(), logging.INFO)
178
+ logger.log(log_level, full_msg)
179
+
180
+ def debug(self, message: str, **context: Any) -> None:
181
+ """Log debug message."""
182
+ self.log(message, "DEBUG", **context)
183
+
184
+ def info(self, message: str, **context: Any) -> None:
185
+ """Log info message."""
186
+ self.log(message, "INFO", **context)
187
+
188
+ def warning(self, message: str, **context: Any) -> None:
189
+ """Log warning message."""
190
+ self.log(message, "WARNING", **context)
191
+
192
+ def error(self, message: str, **context: Any) -> None:
193
+ """Log error message."""
194
+ self.log(message, "ERROR", **context)
ragit/monitor.py ADDED
@@ -0,0 +1,307 @@
1
+ #
2
+ # Copyright RODMENA LIMITED 2025
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ """
6
+ Execution monitoring with timing and JSON export.
7
+
8
+ Pattern inspired by ai4rag experiment_monitor.py.
9
+
10
+ Provides structured tracking of:
11
+ - Pattern execution times (e.g., experiment configurations)
12
+ - Step execution times within patterns
13
+ - Summary statistics and JSON export
14
+ """
15
+
16
+ import json
17
+ import time
18
+ from collections.abc import Generator
19
+ from contextlib import contextmanager
20
+ from dataclasses import dataclass, field
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+
25
+ @dataclass
26
+ class StepTiming:
27
+ """Timing information for a single step."""
28
+
29
+ name: str
30
+ start_time: float
31
+ end_time: float | None = None
32
+ metadata: dict[str, Any] = field(default_factory=dict)
33
+
34
+ @property
35
+ def duration_ms(self) -> float | None:
36
+ """Duration in milliseconds."""
37
+ if self.end_time is None:
38
+ return None
39
+ return (self.end_time - self.start_time) * 1000
40
+
41
+ def to_dict(self) -> dict[str, Any]:
42
+ """Convert to dictionary for JSON serialization."""
43
+ return {
44
+ "name": self.name,
45
+ "duration_ms": self.duration_ms,
46
+ **self.metadata,
47
+ }
48
+
49
+
50
+ @dataclass
51
+ class PatternTiming:
52
+ """Timing information for a pattern (e.g., experiment configuration)."""
53
+
54
+ name: str
55
+ start_time: float
56
+ end_time: float | None = None
57
+ steps: list[StepTiming] = field(default_factory=list)
58
+ metadata: dict[str, Any] = field(default_factory=dict)
59
+
60
+ @property
61
+ def duration_ms(self) -> float | None:
62
+ """Duration in milliseconds."""
63
+ if self.end_time is None:
64
+ return None
65
+ return (self.end_time - self.start_time) * 1000
66
+
67
+ def to_dict(self) -> dict[str, Any]:
68
+ """Convert to dictionary for JSON serialization."""
69
+ return {
70
+ "name": self.name,
71
+ "duration_ms": self.duration_ms,
72
+ "steps": [s.to_dict() for s in self.steps],
73
+ **self.metadata,
74
+ }
75
+
76
+
77
+ class ExecutionMonitor:
78
+ """
79
+ Monitor experiment execution with timing and export.
80
+
81
+ Tracks pattern execution times, step timings within patterns,
82
+ and provides summary statistics and JSON export.
83
+
84
+ Pattern from ai4rag experiment_monitor.py.
85
+
86
+ Examples
87
+ --------
88
+ >>> monitor = ExecutionMonitor()
89
+ >>> with monitor.pattern("config-1"):
90
+ ... with monitor.step("indexing", chunk_size=512):
91
+ ... # Index documents
92
+ ... pass
93
+ ... with monitor.step("retrieval", top_k=3):
94
+ ... # Retrieve results
95
+ ... pass
96
+ >>> monitor.print_summary()
97
+ >>> monitor.export_json("timing.json")
98
+ """
99
+
100
+ def __init__(self) -> None:
101
+ self._patterns: list[PatternTiming] = []
102
+ self._current_pattern: PatternTiming | None = None
103
+ self._current_step: StepTiming | None = None
104
+ self._start_time = time.perf_counter()
105
+
106
+ @contextmanager
107
+ def pattern(self, name: str, **metadata: Any) -> Generator[PatternTiming, None, None]:
108
+ """
109
+ Context manager for timing a pattern execution.
110
+
111
+ Parameters
112
+ ----------
113
+ name : str
114
+ Pattern name (e.g., configuration identifier).
115
+ **metadata
116
+ Additional metadata to attach to the pattern.
117
+
118
+ Yields
119
+ ------
120
+ PatternTiming
121
+ The pattern timing object (can be modified).
122
+ """
123
+ pattern = PatternTiming(name=name, start_time=time.perf_counter(), metadata=metadata)
124
+ self._current_pattern = pattern
125
+
126
+ try:
127
+ yield pattern
128
+ finally:
129
+ pattern.end_time = time.perf_counter()
130
+ self._patterns.append(pattern)
131
+ self._current_pattern = None
132
+
133
+ @contextmanager
134
+ def step(self, name: str, **metadata: Any) -> Generator[StepTiming, None, None]:
135
+ """
136
+ Context manager for timing a step within a pattern.
137
+
138
+ Parameters
139
+ ----------
140
+ name : str
141
+ Step name (e.g., "indexing", "retrieval", "evaluation").
142
+ **metadata
143
+ Additional metadata to attach to the step.
144
+
145
+ Yields
146
+ ------
147
+ StepTiming
148
+ The step timing object (can be modified).
149
+ """
150
+ step = StepTiming(name=name, start_time=time.perf_counter(), metadata=metadata)
151
+ self._current_step = step
152
+
153
+ try:
154
+ yield step
155
+ finally:
156
+ step.end_time = time.perf_counter()
157
+ if self._current_pattern is not None:
158
+ self._current_pattern.steps.append(step)
159
+ self._current_step = None
160
+
161
+ def on_pattern_start(self, pattern_name: str, **metadata: Any) -> None:
162
+ """Manual pattern start (alternative to context manager)."""
163
+ self._current_pattern = PatternTiming(name=pattern_name, start_time=time.perf_counter(), metadata=metadata)
164
+
165
+ def on_pattern_finish(self, **metadata: Any) -> None:
166
+ """Manual pattern finish (alternative to context manager)."""
167
+ if self._current_pattern:
168
+ self._current_pattern.end_time = time.perf_counter()
169
+ self._current_pattern.metadata.update(metadata)
170
+ self._patterns.append(self._current_pattern)
171
+ self._current_pattern = None
172
+
173
+ def on_step_start(self, step_name: str, **metadata: Any) -> None:
174
+ """Manual step start (alternative to context manager)."""
175
+ self._current_step = StepTiming(name=step_name, start_time=time.perf_counter(), metadata=metadata)
176
+
177
+ def on_step_finish(self, **metadata: Any) -> None:
178
+ """Manual step finish (alternative to context manager)."""
179
+ if self._current_step:
180
+ self._current_step.end_time = time.perf_counter()
181
+ self._current_step.metadata.update(metadata)
182
+ if self._current_pattern is not None:
183
+ self._current_pattern.steps.append(self._current_step)
184
+ self._current_step = None
185
+
186
+ @property
187
+ def total_duration_ms(self) -> float:
188
+ """Total duration since monitor creation in milliseconds."""
189
+ return (time.perf_counter() - self._start_time) * 1000
190
+
191
+ @property
192
+ def pattern_count(self) -> int:
193
+ """Number of completed patterns."""
194
+ return len(self._patterns)
195
+
196
+ def get_summary(self) -> dict[str, Any]:
197
+ """
198
+ Get summary statistics as dictionary.
199
+
200
+ Returns
201
+ -------
202
+ dict
203
+ Summary with total duration, pattern count, and pattern details.
204
+ """
205
+ return {
206
+ "total_duration_ms": self.total_duration_ms,
207
+ "pattern_count": self.pattern_count,
208
+ "patterns": [p.to_dict() for p in self._patterns],
209
+ }
210
+
211
+ def get_step_aggregates(self) -> dict[str, dict[str, float]]:
212
+ """
213
+ Get aggregated step statistics across all patterns.
214
+
215
+ Returns
216
+ -------
217
+ dict
218
+ Step name -> {count, total_ms, avg_ms, min_ms, max_ms}
219
+ """
220
+ step_stats: dict[str, list[float]] = {}
221
+
222
+ for pattern in self._patterns:
223
+ for step in pattern.steps:
224
+ if step.duration_ms is not None:
225
+ if step.name not in step_stats:
226
+ step_stats[step.name] = []
227
+ step_stats[step.name].append(step.duration_ms)
228
+
229
+ aggregates = {}
230
+ for name, durations in step_stats.items():
231
+ aggregates[name] = {
232
+ "count": len(durations),
233
+ "total_ms": sum(durations),
234
+ "avg_ms": sum(durations) / len(durations),
235
+ "min_ms": min(durations),
236
+ "max_ms": max(durations),
237
+ }
238
+
239
+ return aggregates
240
+
241
+ def export_json(self, path: Path | str, indent: int = 2) -> None:
242
+ """
243
+ Export monitoring data to JSON file.
244
+
245
+ Parameters
246
+ ----------
247
+ path : Path or str
248
+ Output file path.
249
+ indent : int
250
+ JSON indentation (default: 2).
251
+ """
252
+ path = Path(path)
253
+ data = {
254
+ **self.get_summary(),
255
+ "step_aggregates": self.get_step_aggregates(),
256
+ }
257
+
258
+ with open(path, "w") as f:
259
+ json.dump(data, f, indent=indent)
260
+
261
+ def print_summary(self, show_steps: bool = True) -> None:
262
+ """
263
+ Print human-readable summary to console.
264
+
265
+ Parameters
266
+ ----------
267
+ show_steps : bool
268
+ Include step-level details (default: True).
269
+ """
270
+ summary = self.get_summary()
271
+
272
+ print(f"\n{'=' * 60}")
273
+ print(f"Execution Summary (Total: {summary['total_duration_ms']:.0f}ms)")
274
+ print(f"Patterns: {summary['pattern_count']}")
275
+ print(f"{'=' * 60}")
276
+
277
+ for pattern in summary["patterns"]:
278
+ duration = pattern.get("duration_ms")
279
+ duration_str = f"{duration:.0f}ms" if duration else "in progress"
280
+ print(f"\n{pattern['name']}: {duration_str}")
281
+
282
+ if show_steps:
283
+ for step in pattern.get("steps", []):
284
+ step_duration = step.get("duration_ms")
285
+ step_duration_str = f"{step_duration:.0f}ms" if step_duration else "in progress"
286
+ # Show first few metadata items
287
+ meta_items = [(k, v) for k, v in step.items() if k not in ("name", "duration_ms")][:3]
288
+ meta_str = ", ".join(f"{k}={v}" for k, v in meta_items) if meta_items else ""
289
+ print(f" - {step['name']}: {step_duration_str}" + (f" ({meta_str})" if meta_str else ""))
290
+
291
+ # Print step aggregates
292
+ aggregates = self.get_step_aggregates()
293
+ if aggregates:
294
+ print(f"\n{'-' * 60}")
295
+ print("Step Aggregates:")
296
+ for name, stats in sorted(aggregates.items(), key=lambda x: -x[1]["total_ms"]):
297
+ print(
298
+ f" {name}: {stats['count']}x, total={stats['total_ms']:.0f}ms, "
299
+ f"avg={stats['avg_ms']:.0f}ms, range=[{stats['min_ms']:.0f}-{stats['max_ms']:.0f}]ms"
300
+ )
301
+
302
+ def reset(self) -> None:
303
+ """Reset the monitor, clearing all recorded patterns."""
304
+ self._patterns.clear()
305
+ self._current_pattern = None
306
+ self._current_step = None
307
+ self._start_time = time.perf_counter()
@@ -0,0 +1,35 @@
1
+ #
2
+ # Copyright RODMENA LIMITED 2025
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ """
6
+ Ragit Providers - LLM and Embedding providers for RAG optimization.
7
+
8
+ Supported providers:
9
+ - OllamaProvider: Connect to local or remote Ollama servers (supports nomic-embed-text)
10
+ - FunctionProvider: Wrap custom embedding/LLM functions
11
+
12
+ Base classes for implementing custom providers:
13
+ - BaseLLMProvider: Abstract base for LLM providers
14
+ - BaseEmbeddingProvider: Abstract base for embedding providers
15
+ """
16
+
17
+ from ragit.providers.base import (
18
+ BaseEmbeddingProvider,
19
+ BaseLLMProvider,
20
+ EmbeddingResponse,
21
+ LLMResponse,
22
+ )
23
+ from ragit.providers.function_adapter import FunctionProvider
24
+ from ragit.providers.ollama import OllamaProvider
25
+
26
+ __all__ = [
27
+ # Base classes
28
+ "BaseLLMProvider",
29
+ "BaseEmbeddingProvider",
30
+ "LLMResponse",
31
+ "EmbeddingResponse",
32
+ # Built-in providers
33
+ "OllamaProvider",
34
+ "FunctionProvider",
35
+ ]
@@ -0,0 +1,147 @@
1
+ #
2
+ # Copyright RODMENA LIMITED 2025
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ """
6
+ Base provider interfaces for LLM and Embedding providers.
7
+
8
+ These abstract classes define the interface that all providers must implement,
9
+ making it easy to add new providers (Gemini, Claude, OpenAI, etc.)
10
+ """
11
+
12
+ from abc import ABC, abstractmethod
13
+ from dataclasses import dataclass
14
+
15
+
16
+ @dataclass
17
+ class LLMResponse:
18
+ """Response from an LLM call."""
19
+
20
+ text: str
21
+ model: str
22
+ provider: str
23
+ usage: dict[str, int] | None = None
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class EmbeddingResponse:
28
+ """Response from an embedding call (immutable)."""
29
+
30
+ embedding: tuple[float, ...]
31
+ model: str
32
+ provider: str
33
+ dimensions: int
34
+
35
+
36
+ class BaseLLMProvider(ABC):
37
+ """
38
+ Abstract base class for LLM providers.
39
+
40
+ Implement this to add support for new LLM providers like Gemini, Claude, etc.
41
+ """
42
+
43
+ @property
44
+ @abstractmethod
45
+ def provider_name(self) -> str:
46
+ """Return the provider name (e.g., 'ollama', 'gemini', 'claude')."""
47
+ pass
48
+
49
+ @abstractmethod
50
+ def generate(
51
+ self,
52
+ prompt: str,
53
+ model: str,
54
+ system_prompt: str | None = None,
55
+ temperature: float = 0.7,
56
+ max_tokens: int | None = None,
57
+ ) -> LLMResponse:
58
+ """
59
+ Generate text from the LLM.
60
+
61
+ Parameters
62
+ ----------
63
+ prompt : str
64
+ The user prompt/query.
65
+ model : str
66
+ Model identifier (e.g., 'llama3', 'qwen3-vl:235b-instruct-cloud').
67
+ system_prompt : str, optional
68
+ System prompt for context/instructions.
69
+ temperature : float
70
+ Sampling temperature (0.0 to 1.0).
71
+ max_tokens : int, optional
72
+ Maximum tokens to generate.
73
+
74
+ Returns
75
+ -------
76
+ LLMResponse
77
+ The generated response.
78
+ """
79
+ pass
80
+
81
+ @abstractmethod
82
+ def is_available(self) -> bool:
83
+ """Check if the provider is available and configured."""
84
+ pass
85
+
86
+
87
+ class BaseEmbeddingProvider(ABC):
88
+ """
89
+ Abstract base class for embedding providers.
90
+
91
+ Implement this to add support for new embedding providers.
92
+ """
93
+
94
+ @property
95
+ @abstractmethod
96
+ def provider_name(self) -> str:
97
+ """Return the provider name."""
98
+ pass
99
+
100
+ @property
101
+ @abstractmethod
102
+ def dimensions(self) -> int:
103
+ """Return the embedding dimensions for the current model."""
104
+ pass
105
+
106
+ @abstractmethod
107
+ def embed(self, text: str, model: str) -> EmbeddingResponse:
108
+ """
109
+ Generate embedding for text.
110
+
111
+ Parameters
112
+ ----------
113
+ text : str
114
+ Text to embed.
115
+ model : str
116
+ Model identifier (e.g., 'nomic-embed-text').
117
+
118
+ Returns
119
+ -------
120
+ EmbeddingResponse
121
+ The embedding response.
122
+ """
123
+ pass
124
+
125
+ @abstractmethod
126
+ def embed_batch(self, texts: list[str], model: str) -> list[EmbeddingResponse]:
127
+ """
128
+ Generate embeddings for multiple texts.
129
+
130
+ Parameters
131
+ ----------
132
+ texts : list[str]
133
+ Texts to embed.
134
+ model : str
135
+ Model identifier.
136
+
137
+ Returns
138
+ -------
139
+ list[EmbeddingResponse]
140
+ List of embedding responses.
141
+ """
142
+ pass
143
+
144
+ @abstractmethod
145
+ def is_available(self) -> bool:
146
+ """Check if the provider is available and configured."""
147
+ pass