ragit 0.8.2__py3-none-any.whl → 0.11.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ragit/monitor.py ADDED
@@ -0,0 +1,307 @@
1
+ #
2
+ # Copyright RODMENA LIMITED 2025
3
+ # SPDX-License-Identifier: Apache-2.0
4
+ #
5
+ """
6
+ Execution monitoring with timing and JSON export.
7
+
8
+ Pattern inspired by ai4rag experiment_monitor.py.
9
+
10
+ Provides structured tracking of:
11
+ - Pattern execution times (e.g., experiment configurations)
12
+ - Step execution times within patterns
13
+ - Summary statistics and JSON export
14
+ """
15
+
16
+ import json
17
+ import time
18
+ from collections.abc import Generator
19
+ from contextlib import contextmanager
20
+ from dataclasses import dataclass, field
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+
25
+ @dataclass
26
+ class StepTiming:
27
+ """Timing information for a single step."""
28
+
29
+ name: str
30
+ start_time: float
31
+ end_time: float | None = None
32
+ metadata: dict[str, Any] = field(default_factory=dict)
33
+
34
+ @property
35
+ def duration_ms(self) -> float | None:
36
+ """Duration in milliseconds."""
37
+ if self.end_time is None:
38
+ return None
39
+ return (self.end_time - self.start_time) * 1000
40
+
41
+ def to_dict(self) -> dict[str, Any]:
42
+ """Convert to dictionary for JSON serialization."""
43
+ return {
44
+ "name": self.name,
45
+ "duration_ms": self.duration_ms,
46
+ **self.metadata,
47
+ }
48
+
49
+
50
+ @dataclass
51
+ class PatternTiming:
52
+ """Timing information for a pattern (e.g., experiment configuration)."""
53
+
54
+ name: str
55
+ start_time: float
56
+ end_time: float | None = None
57
+ steps: list[StepTiming] = field(default_factory=list)
58
+ metadata: dict[str, Any] = field(default_factory=dict)
59
+
60
+ @property
61
+ def duration_ms(self) -> float | None:
62
+ """Duration in milliseconds."""
63
+ if self.end_time is None:
64
+ return None
65
+ return (self.end_time - self.start_time) * 1000
66
+
67
+ def to_dict(self) -> dict[str, Any]:
68
+ """Convert to dictionary for JSON serialization."""
69
+ return {
70
+ "name": self.name,
71
+ "duration_ms": self.duration_ms,
72
+ "steps": [s.to_dict() for s in self.steps],
73
+ **self.metadata,
74
+ }
75
+
76
+
77
+ class ExecutionMonitor:
78
+ """
79
+ Monitor experiment execution with timing and export.
80
+
81
+ Tracks pattern execution times, step timings within patterns,
82
+ and provides summary statistics and JSON export.
83
+
84
+ Pattern from ai4rag experiment_monitor.py.
85
+
86
+ Examples
87
+ --------
88
+ >>> monitor = ExecutionMonitor()
89
+ >>> with monitor.pattern("config-1"):
90
+ ... with monitor.step("indexing", chunk_size=512):
91
+ ... # Index documents
92
+ ... pass
93
+ ... with monitor.step("retrieval", top_k=3):
94
+ ... # Retrieve results
95
+ ... pass
96
+ >>> monitor.print_summary()
97
+ >>> monitor.export_json("timing.json")
98
+ """
99
+
100
+ def __init__(self) -> None:
101
+ self._patterns: list[PatternTiming] = []
102
+ self._current_pattern: PatternTiming | None = None
103
+ self._current_step: StepTiming | None = None
104
+ self._start_time = time.perf_counter()
105
+
106
+ @contextmanager
107
+ def pattern(self, name: str, **metadata: Any) -> Generator[PatternTiming, None, None]:
108
+ """
109
+ Context manager for timing a pattern execution.
110
+
111
+ Parameters
112
+ ----------
113
+ name : str
114
+ Pattern name (e.g., configuration identifier).
115
+ **metadata
116
+ Additional metadata to attach to the pattern.
117
+
118
+ Yields
119
+ ------
120
+ PatternTiming
121
+ The pattern timing object (can be modified).
122
+ """
123
+ pattern = PatternTiming(name=name, start_time=time.perf_counter(), metadata=metadata)
124
+ self._current_pattern = pattern
125
+
126
+ try:
127
+ yield pattern
128
+ finally:
129
+ pattern.end_time = time.perf_counter()
130
+ self._patterns.append(pattern)
131
+ self._current_pattern = None
132
+
133
+ @contextmanager
134
+ def step(self, name: str, **metadata: Any) -> Generator[StepTiming, None, None]:
135
+ """
136
+ Context manager for timing a step within a pattern.
137
+
138
+ Parameters
139
+ ----------
140
+ name : str
141
+ Step name (e.g., "indexing", "retrieval", "evaluation").
142
+ **metadata
143
+ Additional metadata to attach to the step.
144
+
145
+ Yields
146
+ ------
147
+ StepTiming
148
+ The step timing object (can be modified).
149
+ """
150
+ step = StepTiming(name=name, start_time=time.perf_counter(), metadata=metadata)
151
+ self._current_step = step
152
+
153
+ try:
154
+ yield step
155
+ finally:
156
+ step.end_time = time.perf_counter()
157
+ if self._current_pattern is not None:
158
+ self._current_pattern.steps.append(step)
159
+ self._current_step = None
160
+
161
+ def on_pattern_start(self, pattern_name: str, **metadata: Any) -> None:
162
+ """Manual pattern start (alternative to context manager)."""
163
+ self._current_pattern = PatternTiming(name=pattern_name, start_time=time.perf_counter(), metadata=metadata)
164
+
165
+ def on_pattern_finish(self, **metadata: Any) -> None:
166
+ """Manual pattern finish (alternative to context manager)."""
167
+ if self._current_pattern:
168
+ self._current_pattern.end_time = time.perf_counter()
169
+ self._current_pattern.metadata.update(metadata)
170
+ self._patterns.append(self._current_pattern)
171
+ self._current_pattern = None
172
+
173
+ def on_step_start(self, step_name: str, **metadata: Any) -> None:
174
+ """Manual step start (alternative to context manager)."""
175
+ self._current_step = StepTiming(name=step_name, start_time=time.perf_counter(), metadata=metadata)
176
+
177
+ def on_step_finish(self, **metadata: Any) -> None:
178
+ """Manual step finish (alternative to context manager)."""
179
+ if self._current_step:
180
+ self._current_step.end_time = time.perf_counter()
181
+ self._current_step.metadata.update(metadata)
182
+ if self._current_pattern is not None:
183
+ self._current_pattern.steps.append(self._current_step)
184
+ self._current_step = None
185
+
186
+ @property
187
+ def total_duration_ms(self) -> float:
188
+ """Total duration since monitor creation in milliseconds."""
189
+ return (time.perf_counter() - self._start_time) * 1000
190
+
191
+ @property
192
+ def pattern_count(self) -> int:
193
+ """Number of completed patterns."""
194
+ return len(self._patterns)
195
+
196
+ def get_summary(self) -> dict[str, Any]:
197
+ """
198
+ Get summary statistics as dictionary.
199
+
200
+ Returns
201
+ -------
202
+ dict
203
+ Summary with total duration, pattern count, and pattern details.
204
+ """
205
+ return {
206
+ "total_duration_ms": self.total_duration_ms,
207
+ "pattern_count": self.pattern_count,
208
+ "patterns": [p.to_dict() for p in self._patterns],
209
+ }
210
+
211
+ def get_step_aggregates(self) -> dict[str, dict[str, float]]:
212
+ """
213
+ Get aggregated step statistics across all patterns.
214
+
215
+ Returns
216
+ -------
217
+ dict
218
+ Step name -> {count, total_ms, avg_ms, min_ms, max_ms}
219
+ """
220
+ step_stats: dict[str, list[float]] = {}
221
+
222
+ for pattern in self._patterns:
223
+ for step in pattern.steps:
224
+ if step.duration_ms is not None:
225
+ if step.name not in step_stats:
226
+ step_stats[step.name] = []
227
+ step_stats[step.name].append(step.duration_ms)
228
+
229
+ aggregates = {}
230
+ for name, durations in step_stats.items():
231
+ aggregates[name] = {
232
+ "count": len(durations),
233
+ "total_ms": sum(durations),
234
+ "avg_ms": sum(durations) / len(durations),
235
+ "min_ms": min(durations),
236
+ "max_ms": max(durations),
237
+ }
238
+
239
+ return aggregates
240
+
241
+ def export_json(self, path: Path | str, indent: int = 2) -> None:
242
+ """
243
+ Export monitoring data to JSON file.
244
+
245
+ Parameters
246
+ ----------
247
+ path : Path or str
248
+ Output file path.
249
+ indent : int
250
+ JSON indentation (default: 2).
251
+ """
252
+ path = Path(path)
253
+ data = {
254
+ **self.get_summary(),
255
+ "step_aggregates": self.get_step_aggregates(),
256
+ }
257
+
258
+ with open(path, "w") as f:
259
+ json.dump(data, f, indent=indent)
260
+
261
+ def print_summary(self, show_steps: bool = True) -> None:
262
+ """
263
+ Print human-readable summary to console.
264
+
265
+ Parameters
266
+ ----------
267
+ show_steps : bool
268
+ Include step-level details (default: True).
269
+ """
270
+ summary = self.get_summary()
271
+
272
+ print(f"\n{'=' * 60}")
273
+ print(f"Execution Summary (Total: {summary['total_duration_ms']:.0f}ms)")
274
+ print(f"Patterns: {summary['pattern_count']}")
275
+ print(f"{'=' * 60}")
276
+
277
+ for pattern in summary["patterns"]:
278
+ duration = pattern.get("duration_ms")
279
+ duration_str = f"{duration:.0f}ms" if duration else "in progress"
280
+ print(f"\n{pattern['name']}: {duration_str}")
281
+
282
+ if show_steps:
283
+ for step in pattern.get("steps", []):
284
+ step_duration = step.get("duration_ms")
285
+ step_duration_str = f"{step_duration:.0f}ms" if step_duration else "in progress"
286
+ # Show first few metadata items
287
+ meta_items = [(k, v) for k, v in step.items() if k not in ("name", "duration_ms")][:3]
288
+ meta_str = ", ".join(f"{k}={v}" for k, v in meta_items) if meta_items else ""
289
+ print(f" - {step['name']}: {step_duration_str}" + (f" ({meta_str})" if meta_str else ""))
290
+
291
+ # Print step aggregates
292
+ aggregates = self.get_step_aggregates()
293
+ if aggregates:
294
+ print(f"\n{'-' * 60}")
295
+ print("Step Aggregates:")
296
+ for name, stats in sorted(aggregates.items(), key=lambda x: -x[1]["total_ms"]):
297
+ print(
298
+ f" {name}: {stats['count']}x, total={stats['total_ms']:.0f}ms, "
299
+ f"avg={stats['avg_ms']:.0f}ms, range=[{stats['min_ms']:.0f}-{stats['max_ms']:.0f}]ms"
300
+ )
301
+
302
+ def reset(self) -> None:
303
+ """Reset the monitor, clearing all recorded patterns."""
304
+ self._patterns.clear()
305
+ self._current_pattern = None
306
+ self._current_step = None
307
+ self._start_time = time.perf_counter()
@@ -6,9 +6,8 @@
6
6
  Ragit Providers - LLM and Embedding providers for RAG optimization.
7
7
 
8
8
  Supported providers:
9
- - OllamaProvider: Connect to local or remote Ollama servers
9
+ - OllamaProvider: Connect to local or remote Ollama servers (supports nomic-embed-text)
10
10
  - FunctionProvider: Wrap custom embedding/LLM functions
11
- - SentenceTransformersProvider: Offline embedding (requires ragit[transformers])
12
11
 
13
12
  Base classes for implementing custom providers:
14
13
  - BaseLLMProvider: Abstract base for LLM providers
@@ -34,14 +33,3 @@ __all__ = [
34
33
  "OllamaProvider",
35
34
  "FunctionProvider",
36
35
  ]
37
-
38
- # Conditionally export SentenceTransformersProvider if available
39
- try:
40
- from ragit.providers.sentence_transformers import (
41
- SentenceTransformersProvider as SentenceTransformersProvider,
42
- )
43
-
44
- __all__ += ["SentenceTransformersProvider"]
45
- except ImportError:
46
- # sentence-transformers not installed, SentenceTransformersProvider not available
47
- pass