ragit 0.8.2__py3-none-any.whl → 0.11.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ragit/__init__.py +27 -15
- ragit/assistant.py +431 -40
- ragit/config.py +165 -22
- ragit/core/experiment/experiment.py +7 -1
- ragit/exceptions.py +271 -0
- ragit/loaders.py +200 -44
- ragit/logging.py +194 -0
- ragit/monitor.py +307 -0
- ragit/providers/__init__.py +1 -13
- ragit/providers/ollama.py +379 -121
- ragit/utils/__init__.py +0 -22
- ragit/version.py +1 -1
- {ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/METADATA +48 -25
- ragit-0.11.0.dist-info/RECORD +22 -0
- {ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/WHEEL +1 -1
- ragit/providers/sentence_transformers.py +0 -225
- ragit-0.8.2.dist-info/RECORD +0 -20
- {ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/licenses/LICENSE +0 -0
- {ragit-0.8.2.dist-info → ragit-0.11.0.dist-info}/top_level.txt +0 -0
ragit/loaders.py
CHANGED
|
@@ -6,15 +6,78 @@
|
|
|
6
6
|
Document loading and chunking utilities.
|
|
7
7
|
|
|
8
8
|
Provides simple functions to load documents from files and chunk text.
|
|
9
|
+
|
|
10
|
+
Includes ai4rag-inspired patterns:
|
|
11
|
+
- Auto-generated document IDs via SHA256 hash
|
|
12
|
+
- Sequence numbering for chunk ordering
|
|
13
|
+
- Deduplication via content hashing
|
|
9
14
|
"""
|
|
10
15
|
|
|
16
|
+
import hashlib
|
|
11
17
|
import re
|
|
12
18
|
from pathlib import Path
|
|
13
|
-
from typing import Any
|
|
14
19
|
|
|
15
20
|
from ragit.core.experiment.experiment import Chunk, Document
|
|
16
21
|
|
|
17
22
|
|
|
23
|
+
def generate_document_id(content: str) -> str:
|
|
24
|
+
"""
|
|
25
|
+
Generate a unique document ID from content using SHA256 hash.
|
|
26
|
+
|
|
27
|
+
Pattern from ai4rag langchain_chunker.py.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
content : str
|
|
32
|
+
Document content to hash.
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
str
|
|
37
|
+
16-character hex string (first 64 bits of SHA256).
|
|
38
|
+
|
|
39
|
+
Examples
|
|
40
|
+
--------
|
|
41
|
+
>>> doc_id = generate_document_id("Hello, world!")
|
|
42
|
+
>>> len(doc_id)
|
|
43
|
+
16
|
|
44
|
+
"""
|
|
45
|
+
return hashlib.sha256(content.encode()).hexdigest()[:16]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def deduplicate_documents(documents: list[Document]) -> list[Document]:
|
|
49
|
+
"""
|
|
50
|
+
Remove duplicate documents based on content hash.
|
|
51
|
+
|
|
52
|
+
Pattern from ai4rag chroma.py.
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
documents : list[Document]
|
|
57
|
+
Documents to deduplicate.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
list[Document]
|
|
62
|
+
Unique documents (first occurrence kept).
|
|
63
|
+
|
|
64
|
+
Examples
|
|
65
|
+
--------
|
|
66
|
+
>>> unique_docs = deduplicate_documents(docs)
|
|
67
|
+
>>> print(f"Removed {len(docs) - len(unique_docs)} duplicates")
|
|
68
|
+
"""
|
|
69
|
+
seen_hashes: set[str] = set()
|
|
70
|
+
unique_docs: list[Document] = []
|
|
71
|
+
|
|
72
|
+
for doc in documents:
|
|
73
|
+
content_hash = generate_document_id(doc.content)
|
|
74
|
+
if content_hash not in seen_hashes:
|
|
75
|
+
seen_hashes.add(content_hash)
|
|
76
|
+
unique_docs.append(doc)
|
|
77
|
+
|
|
78
|
+
return unique_docs
|
|
79
|
+
|
|
80
|
+
|
|
18
81
|
def load_text(path: str | Path) -> Document:
|
|
19
82
|
"""
|
|
20
83
|
Load a single text file as a Document.
|
|
@@ -77,11 +140,16 @@ def chunk_text(
|
|
|
77
140
|
text: str,
|
|
78
141
|
chunk_size: int = 512,
|
|
79
142
|
chunk_overlap: int = 50,
|
|
80
|
-
doc_id: str =
|
|
81
|
-
|
|
143
|
+
doc_id: str | None = None,
|
|
144
|
+
include_metadata: bool = True,
|
|
82
145
|
) -> list[Chunk]:
|
|
83
146
|
"""
|
|
84
|
-
Split text into overlapping chunks.
|
|
147
|
+
Split text into overlapping chunks with rich metadata.
|
|
148
|
+
|
|
149
|
+
Includes ai4rag-inspired metadata:
|
|
150
|
+
- document_id: SHA256 hash for deduplication and window search
|
|
151
|
+
- sequence_number: Order within the document
|
|
152
|
+
- chunk_start/chunk_end: Character positions in original text
|
|
85
153
|
|
|
86
154
|
Parameters
|
|
87
155
|
----------
|
|
@@ -91,37 +159,55 @@ def chunk_text(
|
|
|
91
159
|
Maximum characters per chunk (default: 512).
|
|
92
160
|
chunk_overlap : int
|
|
93
161
|
Overlap between chunks (default: 50).
|
|
94
|
-
doc_id : str
|
|
95
|
-
Document ID for the chunks
|
|
96
|
-
|
|
97
|
-
|
|
162
|
+
doc_id : str, optional
|
|
163
|
+
Document ID for the chunks. If None, generates from content hash.
|
|
164
|
+
include_metadata : bool
|
|
165
|
+
Include rich metadata in chunks (default: True).
|
|
98
166
|
|
|
99
167
|
Returns
|
|
100
168
|
-------
|
|
101
169
|
list[Chunk]
|
|
102
|
-
List of text chunks.
|
|
170
|
+
List of text chunks with metadata.
|
|
103
171
|
|
|
104
172
|
Examples
|
|
105
173
|
--------
|
|
106
|
-
>>> chunks = chunk_text("Long document...", chunk_size=256
|
|
174
|
+
>>> chunks = chunk_text("Long document...", chunk_size=256)
|
|
175
|
+
>>> print(chunks[0].metadata)
|
|
176
|
+
{'document_id': 'a1b2c3...', 'sequence_number': 0, 'chunk_start': 0, 'chunk_end': 256}
|
|
107
177
|
"""
|
|
108
178
|
if chunk_overlap >= chunk_size:
|
|
109
179
|
raise ValueError("chunk_overlap must be less than chunk_size")
|
|
110
180
|
|
|
181
|
+
# Generate document ID if not provided
|
|
182
|
+
effective_doc_id = doc_id or generate_document_id(text)
|
|
183
|
+
|
|
111
184
|
chunks = []
|
|
112
185
|
start = 0
|
|
113
|
-
|
|
114
|
-
chunk_metadata = metadata or {}
|
|
186
|
+
sequence_number = 0
|
|
115
187
|
|
|
116
188
|
while start < len(text):
|
|
117
|
-
end = start + chunk_size
|
|
189
|
+
end = min(start + chunk_size, len(text))
|
|
118
190
|
chunk_content = text[start:end].strip()
|
|
119
191
|
|
|
120
192
|
if chunk_content:
|
|
193
|
+
metadata = {}
|
|
194
|
+
if include_metadata:
|
|
195
|
+
metadata = {
|
|
196
|
+
"document_id": effective_doc_id,
|
|
197
|
+
"sequence_number": sequence_number,
|
|
198
|
+
"chunk_start": start,
|
|
199
|
+
"chunk_end": end,
|
|
200
|
+
}
|
|
201
|
+
|
|
121
202
|
chunks.append(
|
|
122
|
-
Chunk(
|
|
203
|
+
Chunk(
|
|
204
|
+
content=chunk_content,
|
|
205
|
+
doc_id=effective_doc_id,
|
|
206
|
+
chunk_index=sequence_number,
|
|
207
|
+
metadata=metadata,
|
|
208
|
+
)
|
|
123
209
|
)
|
|
124
|
-
|
|
210
|
+
sequence_number += 1
|
|
125
211
|
|
|
126
212
|
start = end - chunk_overlap
|
|
127
213
|
if start >= len(text) - chunk_overlap:
|
|
@@ -130,9 +216,14 @@ def chunk_text(
|
|
|
130
216
|
return chunks
|
|
131
217
|
|
|
132
218
|
|
|
133
|
-
def chunk_document(
|
|
219
|
+
def chunk_document(
|
|
220
|
+
doc: Document,
|
|
221
|
+
chunk_size: int = 512,
|
|
222
|
+
chunk_overlap: int = 50,
|
|
223
|
+
include_metadata: bool = True,
|
|
224
|
+
) -> list[Chunk]:
|
|
134
225
|
"""
|
|
135
|
-
Split a Document into overlapping chunks.
|
|
226
|
+
Split a Document into overlapping chunks with rich metadata.
|
|
136
227
|
|
|
137
228
|
Parameters
|
|
138
229
|
----------
|
|
@@ -142,17 +233,29 @@ def chunk_document(doc: Document, chunk_size: int = 512, chunk_overlap: int = 50
|
|
|
142
233
|
Maximum characters per chunk.
|
|
143
234
|
chunk_overlap : int
|
|
144
235
|
Overlap between chunks.
|
|
236
|
+
include_metadata : bool
|
|
237
|
+
Include rich metadata in chunks (default: True).
|
|
145
238
|
|
|
146
239
|
Returns
|
|
147
240
|
-------
|
|
148
241
|
list[Chunk]
|
|
149
|
-
List of chunks from the document.
|
|
242
|
+
List of chunks from the document with metadata.
|
|
150
243
|
"""
|
|
151
|
-
|
|
244
|
+
chunks = chunk_text(doc.content, chunk_size, chunk_overlap, doc.id, include_metadata)
|
|
245
|
+
|
|
246
|
+
# Merge document metadata into chunk metadata
|
|
247
|
+
if doc.metadata and include_metadata:
|
|
248
|
+
for chunk in chunks:
|
|
249
|
+
chunk.metadata = {**doc.metadata, **chunk.metadata}
|
|
250
|
+
|
|
251
|
+
return chunks
|
|
152
252
|
|
|
153
253
|
|
|
154
254
|
def chunk_by_separator(
|
|
155
|
-
text: str,
|
|
255
|
+
text: str,
|
|
256
|
+
separator: str = "\n\n",
|
|
257
|
+
doc_id: str | None = None,
|
|
258
|
+
include_metadata: bool = True,
|
|
156
259
|
) -> list[Chunk]:
|
|
157
260
|
"""
|
|
158
261
|
Split text by a separator (e.g., paragraphs, sections).
|
|
@@ -163,64 +266,96 @@ def chunk_by_separator(
|
|
|
163
266
|
Text to split.
|
|
164
267
|
separator : str
|
|
165
268
|
Separator string (default: double newline for paragraphs).
|
|
166
|
-
doc_id : str
|
|
167
|
-
Document ID for the chunks.
|
|
168
|
-
|
|
169
|
-
|
|
269
|
+
doc_id : str, optional
|
|
270
|
+
Document ID for the chunks. If None, generates from content hash.
|
|
271
|
+
include_metadata : bool
|
|
272
|
+
Include rich metadata in chunks (default: True).
|
|
170
273
|
|
|
171
274
|
Returns
|
|
172
275
|
-------
|
|
173
276
|
list[Chunk]
|
|
174
|
-
List of chunks.
|
|
277
|
+
List of chunks with metadata.
|
|
175
278
|
|
|
176
279
|
Examples
|
|
177
280
|
--------
|
|
178
281
|
>>> chunks = chunk_by_separator(text, separator="\\n---\\n")
|
|
179
282
|
"""
|
|
283
|
+
effective_doc_id = doc_id or generate_document_id(text)
|
|
180
284
|
parts = text.split(separator)
|
|
181
|
-
chunks = []
|
|
182
|
-
|
|
285
|
+
chunks: list[Chunk] = []
|
|
286
|
+
current_pos = 0
|
|
183
287
|
|
|
184
|
-
for
|
|
288
|
+
for _idx, part in enumerate(parts):
|
|
185
289
|
content = part.strip()
|
|
186
290
|
if content:
|
|
187
|
-
|
|
291
|
+
metadata = {}
|
|
292
|
+
if include_metadata:
|
|
293
|
+
# Find actual position in original text
|
|
294
|
+
part_start = text.find(part, current_pos)
|
|
295
|
+
part_end = part_start + len(part) if part_start >= 0 else current_pos + len(part)
|
|
296
|
+
metadata = {
|
|
297
|
+
"document_id": effective_doc_id,
|
|
298
|
+
"sequence_number": len(chunks),
|
|
299
|
+
"chunk_start": part_start if part_start >= 0 else current_pos,
|
|
300
|
+
"chunk_end": part_end,
|
|
301
|
+
}
|
|
302
|
+
current_pos = part_end
|
|
303
|
+
|
|
304
|
+
chunks.append(
|
|
305
|
+
Chunk(
|
|
306
|
+
content=content,
|
|
307
|
+
doc_id=effective_doc_id,
|
|
308
|
+
chunk_index=len(chunks),
|
|
309
|
+
metadata=metadata,
|
|
310
|
+
)
|
|
311
|
+
)
|
|
188
312
|
|
|
189
313
|
return chunks
|
|
190
314
|
|
|
191
315
|
|
|
192
|
-
def chunk_rst_sections(
|
|
316
|
+
def chunk_rst_sections(
|
|
317
|
+
text: str,
|
|
318
|
+
doc_id: str | None = None,
|
|
319
|
+
include_metadata: bool = True,
|
|
320
|
+
) -> list[Chunk]:
|
|
193
321
|
"""
|
|
194
|
-
Split RST document by section headers.
|
|
322
|
+
Split RST document by section headers with rich metadata.
|
|
195
323
|
|
|
196
324
|
Parameters
|
|
197
325
|
----------
|
|
198
326
|
text : str
|
|
199
327
|
RST document text.
|
|
200
|
-
doc_id : str
|
|
201
|
-
Document ID for the chunks.
|
|
202
|
-
|
|
203
|
-
|
|
328
|
+
doc_id : str, optional
|
|
329
|
+
Document ID for the chunks. If None, generates from content hash.
|
|
330
|
+
include_metadata : bool
|
|
331
|
+
Include rich metadata in chunks (default: True).
|
|
204
332
|
|
|
205
333
|
Returns
|
|
206
334
|
-------
|
|
207
335
|
list[Chunk]
|
|
208
|
-
List of section chunks.
|
|
336
|
+
List of section chunks with metadata.
|
|
209
337
|
"""
|
|
338
|
+
effective_doc_id = doc_id or generate_document_id(text)
|
|
339
|
+
|
|
210
340
|
# Match RST section headers (title followed by underline of =, -, ~, etc.)
|
|
211
341
|
pattern = r"\n([^\n]+)\n([=\-~`\'\"^_*+#]+)\n"
|
|
212
|
-
chunk_metadata = metadata or {}
|
|
213
342
|
|
|
214
343
|
# Find all section positions
|
|
215
344
|
matches = list(re.finditer(pattern, text))
|
|
216
345
|
|
|
217
346
|
if not matches:
|
|
218
347
|
# No sections found, return whole text as one chunk
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
if
|
|
222
|
-
|
|
223
|
-
|
|
348
|
+
if text.strip():
|
|
349
|
+
metadata = {}
|
|
350
|
+
if include_metadata:
|
|
351
|
+
metadata = {
|
|
352
|
+
"document_id": effective_doc_id,
|
|
353
|
+
"sequence_number": 0,
|
|
354
|
+
"chunk_start": 0,
|
|
355
|
+
"chunk_end": len(text),
|
|
356
|
+
}
|
|
357
|
+
return [Chunk(content=text.strip(), doc_id=effective_doc_id, chunk_index=0, metadata=metadata)]
|
|
358
|
+
return []
|
|
224
359
|
|
|
225
360
|
chunks = []
|
|
226
361
|
|
|
@@ -229,7 +364,15 @@ def chunk_rst_sections(text: str, doc_id: str = "doc", metadata: dict[str, Any]
|
|
|
229
364
|
if first_pos > 0:
|
|
230
365
|
pre_content = text[:first_pos].strip()
|
|
231
366
|
if pre_content:
|
|
232
|
-
|
|
367
|
+
metadata = {}
|
|
368
|
+
if include_metadata:
|
|
369
|
+
metadata = {
|
|
370
|
+
"document_id": effective_doc_id,
|
|
371
|
+
"sequence_number": 0,
|
|
372
|
+
"chunk_start": 0,
|
|
373
|
+
"chunk_end": first_pos,
|
|
374
|
+
}
|
|
375
|
+
chunks.append(Chunk(content=pre_content, doc_id=effective_doc_id, chunk_index=0, metadata=metadata))
|
|
233
376
|
|
|
234
377
|
# Extract each section
|
|
235
378
|
for i, match in enumerate(matches):
|
|
@@ -238,8 +381,21 @@ def chunk_rst_sections(text: str, doc_id: str = "doc", metadata: dict[str, Any]
|
|
|
238
381
|
|
|
239
382
|
section_content = text[start:end].strip()
|
|
240
383
|
if section_content:
|
|
384
|
+
metadata = {}
|
|
385
|
+
if include_metadata:
|
|
386
|
+
metadata = {
|
|
387
|
+
"document_id": effective_doc_id,
|
|
388
|
+
"sequence_number": len(chunks),
|
|
389
|
+
"chunk_start": start,
|
|
390
|
+
"chunk_end": end,
|
|
391
|
+
}
|
|
241
392
|
chunks.append(
|
|
242
|
-
Chunk(
|
|
393
|
+
Chunk(
|
|
394
|
+
content=section_content,
|
|
395
|
+
doc_id=effective_doc_id,
|
|
396
|
+
chunk_index=len(chunks),
|
|
397
|
+
metadata=metadata,
|
|
398
|
+
)
|
|
243
399
|
)
|
|
244
400
|
|
|
245
401
|
return chunks
|
ragit/logging.py
ADDED
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright RODMENA LIMITED 2025
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
#
|
|
5
|
+
"""
|
|
6
|
+
Structured logging for ragit.
|
|
7
|
+
|
|
8
|
+
Provides consistent logging across all ragit components with:
|
|
9
|
+
- Operation timing
|
|
10
|
+
- Context tracking
|
|
11
|
+
- Configurable log levels
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import logging
|
|
15
|
+
import time
|
|
16
|
+
from collections.abc import Callable, Generator
|
|
17
|
+
from contextlib import contextmanager
|
|
18
|
+
from functools import wraps
|
|
19
|
+
from typing import Any, TypeVar
|
|
20
|
+
|
|
21
|
+
# Create ragit logger
|
|
22
|
+
logger = logging.getLogger("ragit")
|
|
23
|
+
|
|
24
|
+
# Type variable for decorated functions
|
|
25
|
+
F = TypeVar("F", bound=Callable[..., Any])
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def setup_logging(level: str = "INFO", format_string: str | None = None) -> None:
|
|
29
|
+
"""Configure ragit logging.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
level : str
|
|
34
|
+
Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL).
|
|
35
|
+
format_string : str, optional
|
|
36
|
+
Custom format string. If None, uses default format.
|
|
37
|
+
|
|
38
|
+
Examples
|
|
39
|
+
--------
|
|
40
|
+
>>> from ragit.logging import setup_logging
|
|
41
|
+
>>> setup_logging("DEBUG")
|
|
42
|
+
"""
|
|
43
|
+
logger.setLevel(level.upper())
|
|
44
|
+
|
|
45
|
+
# Only add handler if none exist
|
|
46
|
+
if not logger.handlers:
|
|
47
|
+
handler = logging.StreamHandler()
|
|
48
|
+
handler.setLevel(level.upper())
|
|
49
|
+
|
|
50
|
+
if format_string is None:
|
|
51
|
+
format_string = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
52
|
+
|
|
53
|
+
formatter = logging.Formatter(format_string)
|
|
54
|
+
handler.setFormatter(formatter)
|
|
55
|
+
logger.addHandler(handler)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@contextmanager
|
|
59
|
+
def log_operation(operation: str, **context: Any) -> Generator[dict[str, Any], None, None]:
|
|
60
|
+
"""Context manager for logging operations with timing.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
operation : str
|
|
65
|
+
Name of the operation being performed.
|
|
66
|
+
**context
|
|
67
|
+
Additional context to include in log messages.
|
|
68
|
+
|
|
69
|
+
Yields
|
|
70
|
+
------
|
|
71
|
+
dict
|
|
72
|
+
Mutable dict to add additional context during the operation.
|
|
73
|
+
|
|
74
|
+
Examples
|
|
75
|
+
--------
|
|
76
|
+
>>> with log_operation("embed", model="nomic-embed-text") as ctx:
|
|
77
|
+
... result = provider.embed(text, model)
|
|
78
|
+
... ctx["dimensions"] = len(result.embedding)
|
|
79
|
+
"""
|
|
80
|
+
start = time.perf_counter()
|
|
81
|
+
extra_context: dict[str, Any] = {}
|
|
82
|
+
|
|
83
|
+
# Build context string
|
|
84
|
+
ctx_str = ", ".join(f"{k}={v}" for k, v in context.items()) if context else ""
|
|
85
|
+
|
|
86
|
+
logger.debug(f"{operation}.start" + (f" [{ctx_str}]" if ctx_str else ""))
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
yield extra_context
|
|
90
|
+
duration_ms = (time.perf_counter() - start) * 1000
|
|
91
|
+
|
|
92
|
+
# Combine original context with extra context
|
|
93
|
+
all_context = {**context, **extra_context, "duration_ms": f"{duration_ms:.2f}"}
|
|
94
|
+
ctx_str = ", ".join(f"{k}={v}" for k, v in all_context.items())
|
|
95
|
+
|
|
96
|
+
logger.info(f"{operation}.success [{ctx_str}]")
|
|
97
|
+
except Exception as e:
|
|
98
|
+
duration_ms = (time.perf_counter() - start) * 1000
|
|
99
|
+
all_context = {**context, **extra_context, "duration_ms": f"{duration_ms:.2f}", "error": str(e)}
|
|
100
|
+
ctx_str = ", ".join(f"{k}={v}" for k, v in all_context.items())
|
|
101
|
+
|
|
102
|
+
logger.error(f"{operation}.failed [{ctx_str}]", exc_info=True)
|
|
103
|
+
raise
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def log_method(operation: str) -> Callable[[F], F]:
|
|
107
|
+
"""Decorator for logging method calls with timing.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
operation : str
|
|
112
|
+
Name of the operation for logging.
|
|
113
|
+
|
|
114
|
+
Returns
|
|
115
|
+
-------
|
|
116
|
+
Callable
|
|
117
|
+
Decorated function.
|
|
118
|
+
|
|
119
|
+
Examples
|
|
120
|
+
--------
|
|
121
|
+
>>> class MyProvider:
|
|
122
|
+
... @log_method("embed")
|
|
123
|
+
... def embed(self, text: str, model: str):
|
|
124
|
+
... ...
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
def decorator(func: F) -> F:
|
|
128
|
+
@wraps(func)
|
|
129
|
+
def wrapper(*args: Any, **kwargs: Any) -> Any:
|
|
130
|
+
with log_operation(operation, method=func.__name__):
|
|
131
|
+
return func(*args, **kwargs)
|
|
132
|
+
|
|
133
|
+
return wrapper # type: ignore
|
|
134
|
+
|
|
135
|
+
return decorator
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
class LogContext:
|
|
139
|
+
"""Context tracker for correlating related log messages.
|
|
140
|
+
|
|
141
|
+
Useful for tracing operations across multiple components.
|
|
142
|
+
|
|
143
|
+
Examples
|
|
144
|
+
--------
|
|
145
|
+
>>> ctx = LogContext("query-123")
|
|
146
|
+
>>> ctx.log("Starting retrieval", top_k=5)
|
|
147
|
+
>>> ctx.log("Retrieved chunks", count=3)
|
|
148
|
+
"""
|
|
149
|
+
|
|
150
|
+
def __init__(self, request_id: str | None = None):
|
|
151
|
+
"""Initialize log context.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
request_id : str, optional
|
|
156
|
+
Unique identifier for this context. Auto-generated if not provided.
|
|
157
|
+
"""
|
|
158
|
+
self.request_id = request_id or f"req-{int(time.time() * 1000) % 100000}"
|
|
159
|
+
self._start_time = time.perf_counter()
|
|
160
|
+
|
|
161
|
+
def log(self, message: str, level: str = "INFO", **context: Any) -> None:
|
|
162
|
+
"""Log a message with this context.
|
|
163
|
+
|
|
164
|
+
Parameters
|
|
165
|
+
----------
|
|
166
|
+
message : str
|
|
167
|
+
Log message.
|
|
168
|
+
level : str
|
|
169
|
+
Log level (DEBUG, INFO, WARNING, ERROR).
|
|
170
|
+
**context
|
|
171
|
+
Additional context key-value pairs.
|
|
172
|
+
"""
|
|
173
|
+
elapsed_ms = (time.perf_counter() - self._start_time) * 1000
|
|
174
|
+
ctx_str = ", ".join(f"{k}={v}" for k, v in context.items())
|
|
175
|
+
full_msg = f"[{self.request_id}] {message}" + (f" [{ctx_str}]" if ctx_str else "") + f" (+{elapsed_ms:.0f}ms)"
|
|
176
|
+
|
|
177
|
+
log_level = getattr(logging, level.upper(), logging.INFO)
|
|
178
|
+
logger.log(log_level, full_msg)
|
|
179
|
+
|
|
180
|
+
def debug(self, message: str, **context: Any) -> None:
|
|
181
|
+
"""Log debug message."""
|
|
182
|
+
self.log(message, "DEBUG", **context)
|
|
183
|
+
|
|
184
|
+
def info(self, message: str, **context: Any) -> None:
|
|
185
|
+
"""Log info message."""
|
|
186
|
+
self.log(message, "INFO", **context)
|
|
187
|
+
|
|
188
|
+
def warning(self, message: str, **context: Any) -> None:
|
|
189
|
+
"""Log warning message."""
|
|
190
|
+
self.log(message, "WARNING", **context)
|
|
191
|
+
|
|
192
|
+
def error(self, message: str, **context: Any) -> None:
|
|
193
|
+
"""Log error message."""
|
|
194
|
+
self.log(message, "ERROR", **context)
|