ragit 0.8.2__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragit/__init__.py +27 -15
- ragit/assistant.py +431 -40
- ragit/config.py +165 -22
- ragit/core/experiment/experiment.py +7 -1
- ragit/exceptions.py +271 -0
- ragit/loaders.py +200 -44
- ragit/logging.py +194 -0
- ragit/monitor.py +307 -0
- ragit/providers/__init__.py +1 -13
- ragit/providers/ollama.py +379 -121
- ragit/utils/__init__.py +0 -22
- ragit/version.py +1 -1
- {ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/METADATA +48 -25
- ragit-0.11.0.dist-info/RECORD +22 -0
- {ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/WHEEL +1 -1
- ragit/providers/sentence_transformers.py +0 -225
- ragit-0.8.2.dist-info/RECORD +0 -20
- {ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/licenses/LICENSE +0 -0
- {ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/top_level.txt +0 -0
ragit/monitor.py
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright RODMENA LIMITED 2025
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
#
|
|
5
|
+
"""
|
|
6
|
+
Execution monitoring with timing and JSON export.
|
|
7
|
+
|
|
8
|
+
Pattern inspired by ai4rag experiment_monitor.py.
|
|
9
|
+
|
|
10
|
+
Provides structured tracking of:
|
|
11
|
+
- Pattern execution times (e.g., experiment configurations)
|
|
12
|
+
- Step execution times within patterns
|
|
13
|
+
- Summary statistics and JSON export
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import time
|
|
18
|
+
from collections.abc import Generator
|
|
19
|
+
from contextlib import contextmanager
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class StepTiming:
|
|
27
|
+
"""Timing information for a single step."""
|
|
28
|
+
|
|
29
|
+
name: str
|
|
30
|
+
start_time: float
|
|
31
|
+
end_time: float | None = None
|
|
32
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def duration_ms(self) -> float | None:
|
|
36
|
+
"""Duration in milliseconds."""
|
|
37
|
+
if self.end_time is None:
|
|
38
|
+
return None
|
|
39
|
+
return (self.end_time - self.start_time) * 1000
|
|
40
|
+
|
|
41
|
+
def to_dict(self) -> dict[str, Any]:
|
|
42
|
+
"""Convert to dictionary for JSON serialization."""
|
|
43
|
+
return {
|
|
44
|
+
"name": self.name,
|
|
45
|
+
"duration_ms": self.duration_ms,
|
|
46
|
+
**self.metadata,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class PatternTiming:
|
|
52
|
+
"""Timing information for a pattern (e.g., experiment configuration)."""
|
|
53
|
+
|
|
54
|
+
name: str
|
|
55
|
+
start_time: float
|
|
56
|
+
end_time: float | None = None
|
|
57
|
+
steps: list[StepTiming] = field(default_factory=list)
|
|
58
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def duration_ms(self) -> float | None:
|
|
62
|
+
"""Duration in milliseconds."""
|
|
63
|
+
if self.end_time is None:
|
|
64
|
+
return None
|
|
65
|
+
return (self.end_time - self.start_time) * 1000
|
|
66
|
+
|
|
67
|
+
def to_dict(self) -> dict[str, Any]:
|
|
68
|
+
"""Convert to dictionary for JSON serialization."""
|
|
69
|
+
return {
|
|
70
|
+
"name": self.name,
|
|
71
|
+
"duration_ms": self.duration_ms,
|
|
72
|
+
"steps": [s.to_dict() for s in self.steps],
|
|
73
|
+
**self.metadata,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class ExecutionMonitor:
|
|
78
|
+
"""
|
|
79
|
+
Monitor experiment execution with timing and export.
|
|
80
|
+
|
|
81
|
+
Tracks pattern execution times, step timings within patterns,
|
|
82
|
+
and provides summary statistics and JSON export.
|
|
83
|
+
|
|
84
|
+
Pattern from ai4rag experiment_monitor.py.
|
|
85
|
+
|
|
86
|
+
Examples
|
|
87
|
+
--------
|
|
88
|
+
>>> monitor = ExecutionMonitor()
|
|
89
|
+
>>> with monitor.pattern("config-1"):
|
|
90
|
+
... with monitor.step("indexing", chunk_size=512):
|
|
91
|
+
... # Index documents
|
|
92
|
+
... pass
|
|
93
|
+
... with monitor.step("retrieval", top_k=3):
|
|
94
|
+
... # Retrieve results
|
|
95
|
+
... pass
|
|
96
|
+
>>> monitor.print_summary()
|
|
97
|
+
>>> monitor.export_json("timing.json")
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
def __init__(self) -> None:
|
|
101
|
+
self._patterns: list[PatternTiming] = []
|
|
102
|
+
self._current_pattern: PatternTiming | None = None
|
|
103
|
+
self._current_step: StepTiming | None = None
|
|
104
|
+
self._start_time = time.perf_counter()
|
|
105
|
+
|
|
106
|
+
@contextmanager
|
|
107
|
+
def pattern(self, name: str, **metadata: Any) -> Generator[PatternTiming, None, None]:
|
|
108
|
+
"""
|
|
109
|
+
Context manager for timing a pattern execution.
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
name : str
|
|
114
|
+
Pattern name (e.g., configuration identifier).
|
|
115
|
+
**metadata
|
|
116
|
+
Additional metadata to attach to the pattern.
|
|
117
|
+
|
|
118
|
+
Yields
|
|
119
|
+
------
|
|
120
|
+
PatternTiming
|
|
121
|
+
The pattern timing object (can be modified).
|
|
122
|
+
"""
|
|
123
|
+
pattern = PatternTiming(name=name, start_time=time.perf_counter(), metadata=metadata)
|
|
124
|
+
self._current_pattern = pattern
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
yield pattern
|
|
128
|
+
finally:
|
|
129
|
+
pattern.end_time = time.perf_counter()
|
|
130
|
+
self._patterns.append(pattern)
|
|
131
|
+
self._current_pattern = None
|
|
132
|
+
|
|
133
|
+
@contextmanager
|
|
134
|
+
def step(self, name: str, **metadata: Any) -> Generator[StepTiming, None, None]:
|
|
135
|
+
"""
|
|
136
|
+
Context manager for timing a step within a pattern.
|
|
137
|
+
|
|
138
|
+
Parameters
|
|
139
|
+
----------
|
|
140
|
+
name : str
|
|
141
|
+
Step name (e.g., "indexing", "retrieval", "evaluation").
|
|
142
|
+
**metadata
|
|
143
|
+
Additional metadata to attach to the step.
|
|
144
|
+
|
|
145
|
+
Yields
|
|
146
|
+
------
|
|
147
|
+
StepTiming
|
|
148
|
+
The step timing object (can be modified).
|
|
149
|
+
"""
|
|
150
|
+
step = StepTiming(name=name, start_time=time.perf_counter(), metadata=metadata)
|
|
151
|
+
self._current_step = step
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
yield step
|
|
155
|
+
finally:
|
|
156
|
+
step.end_time = time.perf_counter()
|
|
157
|
+
if self._current_pattern is not None:
|
|
158
|
+
self._current_pattern.steps.append(step)
|
|
159
|
+
self._current_step = None
|
|
160
|
+
|
|
161
|
+
def on_pattern_start(self, pattern_name: str, **metadata: Any) -> None:
|
|
162
|
+
"""Manual pattern start (alternative to context manager)."""
|
|
163
|
+
self._current_pattern = PatternTiming(name=pattern_name, start_time=time.perf_counter(), metadata=metadata)
|
|
164
|
+
|
|
165
|
+
def on_pattern_finish(self, **metadata: Any) -> None:
|
|
166
|
+
"""Manual pattern finish (alternative to context manager)."""
|
|
167
|
+
if self._current_pattern:
|
|
168
|
+
self._current_pattern.end_time = time.perf_counter()
|
|
169
|
+
self._current_pattern.metadata.update(metadata)
|
|
170
|
+
self._patterns.append(self._current_pattern)
|
|
171
|
+
self._current_pattern = None
|
|
172
|
+
|
|
173
|
+
def on_step_start(self, step_name: str, **metadata: Any) -> None:
|
|
174
|
+
"""Manual step start (alternative to context manager)."""
|
|
175
|
+
self._current_step = StepTiming(name=step_name, start_time=time.perf_counter(), metadata=metadata)
|
|
176
|
+
|
|
177
|
+
def on_step_finish(self, **metadata: Any) -> None:
|
|
178
|
+
"""Manual step finish (alternative to context manager)."""
|
|
179
|
+
if self._current_step:
|
|
180
|
+
self._current_step.end_time = time.perf_counter()
|
|
181
|
+
self._current_step.metadata.update(metadata)
|
|
182
|
+
if self._current_pattern is not None:
|
|
183
|
+
self._current_pattern.steps.append(self._current_step)
|
|
184
|
+
self._current_step = None
|
|
185
|
+
|
|
186
|
+
@property
|
|
187
|
+
def total_duration_ms(self) -> float:
|
|
188
|
+
"""Total duration since monitor creation in milliseconds."""
|
|
189
|
+
return (time.perf_counter() - self._start_time) * 1000
|
|
190
|
+
|
|
191
|
+
@property
|
|
192
|
+
def pattern_count(self) -> int:
|
|
193
|
+
"""Number of completed patterns."""
|
|
194
|
+
return len(self._patterns)
|
|
195
|
+
|
|
196
|
+
def get_summary(self) -> dict[str, Any]:
|
|
197
|
+
"""
|
|
198
|
+
Get summary statistics as dictionary.
|
|
199
|
+
|
|
200
|
+
Returns
|
|
201
|
+
-------
|
|
202
|
+
dict
|
|
203
|
+
Summary with total duration, pattern count, and pattern details.
|
|
204
|
+
"""
|
|
205
|
+
return {
|
|
206
|
+
"total_duration_ms": self.total_duration_ms,
|
|
207
|
+
"pattern_count": self.pattern_count,
|
|
208
|
+
"patterns": [p.to_dict() for p in self._patterns],
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
def get_step_aggregates(self) -> dict[str, dict[str, float]]:
|
|
212
|
+
"""
|
|
213
|
+
Get aggregated step statistics across all patterns.
|
|
214
|
+
|
|
215
|
+
Returns
|
|
216
|
+
-------
|
|
217
|
+
dict
|
|
218
|
+
Step name -> {count, total_ms, avg_ms, min_ms, max_ms}
|
|
219
|
+
"""
|
|
220
|
+
step_stats: dict[str, list[float]] = {}
|
|
221
|
+
|
|
222
|
+
for pattern in self._patterns:
|
|
223
|
+
for step in pattern.steps:
|
|
224
|
+
if step.duration_ms is not None:
|
|
225
|
+
if step.name not in step_stats:
|
|
226
|
+
step_stats[step.name] = []
|
|
227
|
+
step_stats[step.name].append(step.duration_ms)
|
|
228
|
+
|
|
229
|
+
aggregates = {}
|
|
230
|
+
for name, durations in step_stats.items():
|
|
231
|
+
aggregates[name] = {
|
|
232
|
+
"count": len(durations),
|
|
233
|
+
"total_ms": sum(durations),
|
|
234
|
+
"avg_ms": sum(durations) / len(durations),
|
|
235
|
+
"min_ms": min(durations),
|
|
236
|
+
"max_ms": max(durations),
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
return aggregates
|
|
240
|
+
|
|
241
|
+
def export_json(self, path: Path | str, indent: int = 2) -> None:
|
|
242
|
+
"""
|
|
243
|
+
Export monitoring data to JSON file.
|
|
244
|
+
|
|
245
|
+
Parameters
|
|
246
|
+
----------
|
|
247
|
+
path : Path or str
|
|
248
|
+
Output file path.
|
|
249
|
+
indent : int
|
|
250
|
+
JSON indentation (default: 2).
|
|
251
|
+
"""
|
|
252
|
+
path = Path(path)
|
|
253
|
+
data = {
|
|
254
|
+
**self.get_summary(),
|
|
255
|
+
"step_aggregates": self.get_step_aggregates(),
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
with open(path, "w") as f:
|
|
259
|
+
json.dump(data, f, indent=indent)
|
|
260
|
+
|
|
261
|
+
def print_summary(self, show_steps: bool = True) -> None:
|
|
262
|
+
"""
|
|
263
|
+
Print human-readable summary to console.
|
|
264
|
+
|
|
265
|
+
Parameters
|
|
266
|
+
----------
|
|
267
|
+
show_steps : bool
|
|
268
|
+
Include step-level details (default: True).
|
|
269
|
+
"""
|
|
270
|
+
summary = self.get_summary()
|
|
271
|
+
|
|
272
|
+
print(f"\n{'=' * 60}")
|
|
273
|
+
print(f"Execution Summary (Total: {summary['total_duration_ms']:.0f}ms)")
|
|
274
|
+
print(f"Patterns: {summary['pattern_count']}")
|
|
275
|
+
print(f"{'=' * 60}")
|
|
276
|
+
|
|
277
|
+
for pattern in summary["patterns"]:
|
|
278
|
+
duration = pattern.get("duration_ms")
|
|
279
|
+
duration_str = f"{duration:.0f}ms" if duration else "in progress"
|
|
280
|
+
print(f"\n{pattern['name']}: {duration_str}")
|
|
281
|
+
|
|
282
|
+
if show_steps:
|
|
283
|
+
for step in pattern.get("steps", []):
|
|
284
|
+
step_duration = step.get("duration_ms")
|
|
285
|
+
step_duration_str = f"{step_duration:.0f}ms" if step_duration else "in progress"
|
|
286
|
+
# Show first few metadata items
|
|
287
|
+
meta_items = [(k, v) for k, v in step.items() if k not in ("name", "duration_ms")][:3]
|
|
288
|
+
meta_str = ", ".join(f"{k}={v}" for k, v in meta_items) if meta_items else ""
|
|
289
|
+
print(f" - {step['name']}: {step_duration_str}" + (f" ({meta_str})" if meta_str else ""))
|
|
290
|
+
|
|
291
|
+
# Print step aggregates
|
|
292
|
+
aggregates = self.get_step_aggregates()
|
|
293
|
+
if aggregates:
|
|
294
|
+
print(f"\n{'-' * 60}")
|
|
295
|
+
print("Step Aggregates:")
|
|
296
|
+
for name, stats in sorted(aggregates.items(), key=lambda x: -x[1]["total_ms"]):
|
|
297
|
+
print(
|
|
298
|
+
f" {name}: {stats['count']}x, total={stats['total_ms']:.0f}ms, "
|
|
299
|
+
f"avg={stats['avg_ms']:.0f}ms, range=[{stats['min_ms']:.0f}-{stats['max_ms']:.0f}]ms"
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
def reset(self) -> None:
|
|
303
|
+
"""Reset the monitor, clearing all recorded patterns."""
|
|
304
|
+
self._patterns.clear()
|
|
305
|
+
self._current_pattern = None
|
|
306
|
+
self._current_step = None
|
|
307
|
+
self._start_time = time.perf_counter()
|
ragit/providers/__init__.py
CHANGED
|
@@ -6,9 +6,8 @@
|
|
|
6
6
|
Ragit Providers - LLM and Embedding providers for RAG optimization.
|
|
7
7
|
|
|
8
8
|
Supported providers:
|
|
9
|
-
- OllamaProvider: Connect to local or remote Ollama servers
|
|
9
|
+
- OllamaProvider: Connect to local or remote Ollama servers (supports nomic-embed-text)
|
|
10
10
|
- FunctionProvider: Wrap custom embedding/LLM functions
|
|
11
|
-
- SentenceTransformersProvider: Offline embedding (requires ragit[transformers])
|
|
12
11
|
|
|
13
12
|
Base classes for implementing custom providers:
|
|
14
13
|
- BaseLLMProvider: Abstract base for LLM providers
|
|
@@ -34,14 +33,3 @@ __all__ = [
|
|
|
34
33
|
"OllamaProvider",
|
|
35
34
|
"FunctionProvider",
|
|
36
35
|
]
|
|
37
|
-
|
|
38
|
-
# Conditionally export SentenceTransformersProvider if available
|
|
39
|
-
try:
|
|
40
|
-
from ragit.providers.sentence_transformers import (
|
|
41
|
-
SentenceTransformersProvider as SentenceTransformersProvider,
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
__all__ += ["SentenceTransformersProvider"]
|
|
45
|
-
except ImportError:
|
|
46
|
-
# sentence-transformers not installed, SentenceTransformersProvider not available
|
|
47
|
-
pass
|