dossier 1.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dossier-1.1.3/PKG-INFO +302 -0
- dossier-1.1.3/README.md +276 -0
- dossier-1.1.3/dossier/__init__.py +21 -0
- dossier-1.1.3/dossier/dossier.py +379 -0
- dossier-1.1.3/dossier/processors.py +109 -0
- dossier-1.1.3/dossier/py.typed +0 -0
- dossier-1.1.3/pyproject.toml +145 -0
dossier-1.1.3/PKG-INFO
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: dossier
|
|
3
|
+
Version: 1.1.3
|
|
4
|
+
Summary: A structured logging library for AI agents with session management and object unpacking
|
|
5
|
+
Keywords: logging,structlog,ai,agents,session-management
|
|
6
|
+
Author: Ricardo Decal
|
|
7
|
+
Author-email: Ricardo Decal <dossier-project@ricardodecal.com>
|
|
8
|
+
License-Expression: Apache-2.0
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
15
|
+
Classifier: Topic :: System :: Logging
|
|
16
|
+
Requires-Dist: beartype>=0.22.5
|
|
17
|
+
Requires-Dist: better-exceptions>=0.3.3
|
|
18
|
+
Requires-Dist: rich>=14.2.0
|
|
19
|
+
Requires-Dist: structlog>=25.5.0
|
|
20
|
+
Requires-Python: >=3.13
|
|
21
|
+
Project-URL: Documentation, https://dossier.ricardodecal.com/
|
|
22
|
+
Project-URL: Homepage, https://dossier.ricardodecal.com/
|
|
23
|
+
Project-URL: Issues, https://github.com/crypdick/dossier/issues
|
|
24
|
+
Project-URL: Repository, https://github.com/crypdick/dossier
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# dossier
|
|
28
|
+
|
|
29
|
+
A structured logging library with session management and object unpacking, built on [structlog](https://www.structlog.org/).
|
|
30
|
+
|
|
31
|
+
## Why `dossier`?
|
|
32
|
+
|
|
33
|
+
`structlog` is great, but while I was using it for AI agents, I found myself writing a lot of boilerplate code to handle session management and object unpacking:
|
|
34
|
+
|
|
35
|
+
- I want to organize my logs by agent session, so I can easily find logs for a specific session.
|
|
36
|
+
- I want to throw objects like dataclasses, Pydantic models, or any other object at the logger and let it handle unpacking them for structured logging.
|
|
37
|
+
|
|
38
|
+
With `dossier`, each session is automatically organized:
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
logs/
|
|
42
|
+
└── session_20251118_120000/
|
|
43
|
+
└── events.jsonl # Structured JSONL logs
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
And you can throw objects like dataclasses, Pydantic models, regular dicts, and `dossier` will handle the unpacking.
|
|
47
|
+
|
|
48
|
+
## Installation
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pip install dossier
|
|
52
|
+
# or
|
|
53
|
+
uv add dossier
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Quick Start
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from dataclasses import dataclass
|
|
60
|
+
import dossier
|
|
61
|
+
|
|
62
|
+
log = dossier.get_session()
|
|
63
|
+
|
|
64
|
+
# Bind info to the logger for the entire session
|
|
65
|
+
log.bind(model="gpt-4", mode="agent", user_id="user_123", experiment="feature_test")
|
|
66
|
+
|
|
67
|
+
log.info("session_start")
|
|
68
|
+
# logs: {"event": "session_start", "timestamp": "2025-11-18T12:00:00.000Z", "level": "info",
|
|
69
|
+
# "model": "gpt-4", "mode": "agent", "user_id": "user_123", "experiment": "feature_test"}
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
# Unbind keys
|
|
73
|
+
log.unbind("mode", "user_id")
|
|
74
|
+
|
|
75
|
+
# Log similarly to structlog
|
|
76
|
+
log.info("user_message", content="What's the weather?", role="user")
|
|
77
|
+
|
|
78
|
+
# Or use dataclasses - event type auto-detected!
|
|
79
|
+
@dataclass
|
|
80
|
+
class ToolCall:
|
|
81
|
+
tool_name: str
|
|
82
|
+
arguments: dict
|
|
83
|
+
|
|
84
|
+
tool_call = ToolCall(tool_name="web_search", arguments={"q": "weather"})
|
|
85
|
+
log.info(tool_call) # Same as log.info("tool_call", tool_name=tool_call.tool_name, arguments=tool_call.arguments)
|
|
86
|
+
```
|
|
87
|
+
Get current session information:
|
|
88
|
+
|
|
89
|
+
```python
|
|
90
|
+
session_id = logger.get_session_id()
|
|
91
|
+
session_dir = logger.get_session_path()
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Reusing a session
|
|
95
|
+
|
|
96
|
+
`dossier` implements a session registry similar to Python's standard `logging.getLogger(name)`. This means you can retrieve the same session instance from anywhere in your application using its `session_id` without needing to pass it around or set global variables.
|
|
97
|
+
|
|
98
|
+
**How it works:** Session IDs are simple identifiers (like `"main"`), while log directories are automatically timestamped (like `main_20251118_120000/`). This gives you easy session retrieval while maintaining chronological log organization.
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
from dossier import get_session
|
|
102
|
+
|
|
103
|
+
# First call creates the session (creates logs/main_TIMESTAMP/)
|
|
104
|
+
logger = get_session(session_id="main")
|
|
105
|
+
logger.bind(app_version="1.0.0", user_id="user_123")
|
|
106
|
+
|
|
107
|
+
# Later, anywhere else in your app: this returns the same instance
|
|
108
|
+
logger2 = get_session(session_id="main")
|
|
109
|
+
assert logger is logger2 # True!
|
|
110
|
+
|
|
111
|
+
# Log to logs/main_NEW_TIMESTAMP/
|
|
112
|
+
logger3 = get_session(session_id="main", force_new=True)
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Sessions are isolated from each other by using different session_ids.
|
|
116
|
+
|
|
117
|
+
## Namespaced logging
|
|
118
|
+
|
|
119
|
+
Route logs to different files within the same session using the `namespace` parameter. This is useful for organizing logs by component, worker, or module:
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from dossier import get_session
|
|
123
|
+
|
|
124
|
+
logger = get_session() # logs to logs/session_TIMESTAMP/events.jsonl
|
|
125
|
+
|
|
126
|
+
logger.info("task_started", namespace="worker") # logs to logs/session_TIMESTAMP/worker.jsonl
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Custom processors
|
|
130
|
+
|
|
131
|
+
Dossier allows you to register custom structlog processors for advanced use cases like cost tracking, metrics collection, or adding custom fields.
|
|
132
|
+
|
|
133
|
+
#### Function processors
|
|
134
|
+
|
|
135
|
+
Simple stateless processors that add fields or transform data:
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
from dossier import get_session
|
|
139
|
+
|
|
140
|
+
# Add a custom field to every log
|
|
141
|
+
def add_hostname(logger, method_name, event_dict):
|
|
142
|
+
import socket
|
|
143
|
+
event_dict["hostname"] = socket.gethostname()
|
|
144
|
+
return event_dict
|
|
145
|
+
|
|
146
|
+
# Add environment info
|
|
147
|
+
def add_environment(logger, method_name, event_dict):
|
|
148
|
+
import os
|
|
149
|
+
event_dict["environment"] = os.environ.get("ENV", "development")
|
|
150
|
+
return event_dict
|
|
151
|
+
|
|
152
|
+
logger = get_session(
|
|
153
|
+
log_dir="logs",
|
|
154
|
+
processors=[add_hostname, add_environment],
|
|
155
|
+
)
|
|
156
|
+
logger.bind(model="gpt-4")
|
|
157
|
+
logger.info("test_event")
|
|
158
|
+
# Output includes: hostname, environment, model
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### Stateful processors
|
|
162
|
+
|
|
163
|
+
For tracking state across log calls (like token counting, cost tracking, etc.):
|
|
164
|
+
|
|
165
|
+
```python
|
|
166
|
+
from dossier import get_session
|
|
167
|
+
|
|
168
|
+
class TokenCounter:
|
|
169
|
+
"""Track cumulative token usage across the session"""
|
|
170
|
+
|
|
171
|
+
def __init__(self):
|
|
172
|
+
self.total_tokens = 0
|
|
173
|
+
self.call_count = 0
|
|
174
|
+
|
|
175
|
+
def __call__(self, logger, method_name, event_dict):
|
|
176
|
+
# Only process token_usage events
|
|
177
|
+
if "input_tokens" in event_dict and "output_tokens" in event_dict:
|
|
178
|
+
total = event_dict["input_tokens"] + event_dict["output_tokens"]
|
|
179
|
+
self.total_tokens += total
|
|
180
|
+
self.call_count += 1
|
|
181
|
+
|
|
182
|
+
# Add cumulative info to the log
|
|
183
|
+
event_dict["cumulative_tokens"] = self.total_tokens
|
|
184
|
+
event_dict["token_call_count"] = self.call_count
|
|
185
|
+
|
|
186
|
+
return event_dict
|
|
187
|
+
|
|
188
|
+
# Create the counter instance
|
|
189
|
+
counter = TokenCounter()
|
|
190
|
+
|
|
191
|
+
logger = get_session(log_dir="logs", processors=[counter])
|
|
192
|
+
logger.bind(model="gpt-4")
|
|
193
|
+
|
|
194
|
+
# Log token usage
|
|
195
|
+
logger.info("token_usage", input_tokens=100, output_tokens=50)
|
|
196
|
+
logger.info("token_usage", input_tokens=200, output_tokens=100)
|
|
197
|
+
|
|
198
|
+
# Access the counter's state
|
|
199
|
+
print(f"Total tokens used: {counter.total_tokens}") # 450
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
#### Real-World Example: Cost Tracker
|
|
203
|
+
|
|
204
|
+
Here's a complete cost tracking processor similar to OpenAI's pricing:
|
|
205
|
+
|
|
206
|
+
```python
|
|
207
|
+
from dossier import get_session
|
|
208
|
+
|
|
209
|
+
# Pricing per million tokens (USD)
|
|
210
|
+
PRICING = {
|
|
211
|
+
"gpt-4": {"input": 30.00, "output": 60.00},
|
|
212
|
+
"gpt-4-turbo": {"input": 10.00, "output": 30.00},
|
|
213
|
+
"gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
class CostTracker:
|
|
217
|
+
"""Track API costs across the session"""
|
|
218
|
+
|
|
219
|
+
def __init__(self):
|
|
220
|
+
self.total_cost = 0.0
|
|
221
|
+
self.total_calls = 0
|
|
222
|
+
|
|
223
|
+
def __call__(self, logger, method_name, event_dict):
|
|
224
|
+
# Process token_usage events
|
|
225
|
+
if event_dict.get("event") == "token_usage":
|
|
226
|
+
model = event_dict.get("model", "gpt-4")
|
|
227
|
+
input_tokens = event_dict.get("input_tokens", 0)
|
|
228
|
+
output_tokens = event_dict.get("output_tokens", 0)
|
|
229
|
+
|
|
230
|
+
if model in PRICING:
|
|
231
|
+
pricing = PRICING[model]
|
|
232
|
+
cost = (
|
|
233
|
+
(input_tokens / 1_000_000) * pricing["input"] +
|
|
234
|
+
(output_tokens / 1_000_000) * pricing["output"]
|
|
235
|
+
)
|
|
236
|
+
self.total_cost += cost
|
|
237
|
+
self.total_calls += 1
|
|
238
|
+
|
|
239
|
+
# Add cost info to log
|
|
240
|
+
event_dict["call_cost_usd"] = round(cost, 6)
|
|
241
|
+
event_dict["cumulative_cost_usd"] = round(self.total_cost, 6)
|
|
242
|
+
|
|
243
|
+
return event_dict
|
|
244
|
+
|
|
245
|
+
def get_summary(self):
|
|
246
|
+
"""Get formatted summary"""
|
|
247
|
+
return f"Total cost: ${self.total_cost:.4f} across {self.total_calls} calls"
|
|
248
|
+
|
|
249
|
+
# Use the cost tracker
|
|
250
|
+
cost_tracker = CostTracker()
|
|
251
|
+
logger = get_session(
|
|
252
|
+
log_dir="logs",
|
|
253
|
+
processors=[cost_tracker],
|
|
254
|
+
)
|
|
255
|
+
logger.bind(model="gpt-4-turbo")
|
|
256
|
+
|
|
257
|
+
# Log some API usage
|
|
258
|
+
logger.info("token_usage", model="gpt-4-turbo", input_tokens=1000, output_tokens=500)
|
|
259
|
+
logger.info("token_usage", model="gpt-4-turbo", input_tokens=2000, output_tokens=1000)
|
|
260
|
+
|
|
261
|
+
# Get cost summary
|
|
262
|
+
print(cost_tracker.get_summary())
|
|
263
|
+
# Output: Total cost: $0.0500 across 2 calls
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
## Integrations
|
|
267
|
+
|
|
268
|
+
Dossier automatically unpacks objects from popular libraries.
|
|
269
|
+
|
|
270
|
+
### LangChain
|
|
271
|
+
|
|
272
|
+
Works seamlessly with LangChain objects:
|
|
273
|
+
|
|
274
|
+
```python
|
|
275
|
+
from langchain_core.messages import HumanMessage, AIMessage
|
|
276
|
+
|
|
277
|
+
user_msg = HumanMessage(content="What's 2+2?")
|
|
278
|
+
logger.info(user_msg) # Event type: "human_message"
|
|
279
|
+
|
|
280
|
+
ai_msg = AIMessage(content="4")
|
|
281
|
+
logger.info(ai_msg) # Event type: "ai_message"
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
### Pydantic
|
|
285
|
+
|
|
286
|
+
```python
|
|
287
|
+
from pydantic import BaseModel
|
|
288
|
+
|
|
289
|
+
class RequestModel(BaseModel):
|
|
290
|
+
method: str
|
|
291
|
+
path: str
|
|
292
|
+
body: dict
|
|
293
|
+
|
|
294
|
+
request = RequestModel(method="POST", path="/api/chat", body={"msg": "hi"})
|
|
295
|
+
logger.info(request) # Auto-unpacks to flat dict
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
## License
|
|
301
|
+
|
|
302
|
+
Apache 2.0
|
dossier-1.1.3/README.md
ADDED
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
# dossier
|
|
2
|
+
|
|
3
|
+
A structured logging library with session management and object unpacking, built on [structlog](https://www.structlog.org/).
|
|
4
|
+
|
|
5
|
+
## Why `dossier`?
|
|
6
|
+
|
|
7
|
+
`structlog` is great, but while I was using it for AI agents, I found myself writing a lot of boilerplate code to handle session management and object unpacking:
|
|
8
|
+
|
|
9
|
+
- I want to organize my logs by agent session, so I can easily find logs for a specific session.
|
|
10
|
+
- I want to throw objects like dataclasses, Pydantic models, or any other object at the logger and let it handle unpacking them for structured logging.
|
|
11
|
+
|
|
12
|
+
With `dossier`, each session is automatically organized:
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
logs/
|
|
16
|
+
└── session_20251118_120000/
|
|
17
|
+
└── events.jsonl # Structured JSONL logs
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
And you can throw objects like dataclasses, Pydantic models, regular dicts, and `dossier` will handle the unpacking.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install dossier
|
|
26
|
+
# or
|
|
27
|
+
uv add dossier
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Quick Start
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
from dataclasses import dataclass
|
|
34
|
+
import dossier
|
|
35
|
+
|
|
36
|
+
log = dossier.get_session()
|
|
37
|
+
|
|
38
|
+
# Bind info to the logger for the entire session
|
|
39
|
+
log.bind(model="gpt-4", mode="agent", user_id="user_123", experiment="feature_test")
|
|
40
|
+
|
|
41
|
+
log.info("session_start")
|
|
42
|
+
# logs: {"event": "session_start", "timestamp": "2025-11-18T12:00:00.000Z", "level": "info",
|
|
43
|
+
# "model": "gpt-4", "mode": "agent", "user_id": "user_123", "experiment": "feature_test"}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Unbind keys
|
|
47
|
+
log.unbind("mode", "user_id")
|
|
48
|
+
|
|
49
|
+
# Log similarly to structlog
|
|
50
|
+
log.info("user_message", content="What's the weather?", role="user")
|
|
51
|
+
|
|
52
|
+
# Or use dataclasses - event type auto-detected!
|
|
53
|
+
@dataclass
|
|
54
|
+
class ToolCall:
|
|
55
|
+
tool_name: str
|
|
56
|
+
arguments: dict
|
|
57
|
+
|
|
58
|
+
tool_call = ToolCall(tool_name="web_search", arguments={"q": "weather"})
|
|
59
|
+
log.info(tool_call) # Same as log.info("tool_call", tool_name=tool_call.tool_name, arguments=tool_call.arguments)
|
|
60
|
+
```
|
|
61
|
+
Get current session information:
|
|
62
|
+
|
|
63
|
+
```python
|
|
64
|
+
session_id = logger.get_session_id()
|
|
65
|
+
session_dir = logger.get_session_path()
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Reusing a session
|
|
69
|
+
|
|
70
|
+
`dossier` implements a session registry similar to Python's standard `logging.getLogger(name)`. This means you can retrieve the same session instance from anywhere in your application using its `session_id` without needing to pass it around or set global variables.
|
|
71
|
+
|
|
72
|
+
**How it works:** Session IDs are simple identifiers (like `"main"`), while log directories are automatically timestamped (like `main_20251118_120000/`). This gives you easy session retrieval while maintaining chronological log organization.
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
from dossier import get_session
|
|
76
|
+
|
|
77
|
+
# First call creates the session (creates logs/main_TIMESTAMP/)
|
|
78
|
+
logger = get_session(session_id="main")
|
|
79
|
+
logger.bind(app_version="1.0.0", user_id="user_123")
|
|
80
|
+
|
|
81
|
+
# Later, anywhere else in your app: this returns the same instance
|
|
82
|
+
logger2 = get_session(session_id="main")
|
|
83
|
+
assert logger is logger2 # True!
|
|
84
|
+
|
|
85
|
+
# Log to logs/main_NEW_TIMESTAMP/
|
|
86
|
+
logger3 = get_session(session_id="main", force_new=True)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Sessions are isolated from each other by using different session_ids.
|
|
90
|
+
|
|
91
|
+
## Namespaced logging
|
|
92
|
+
|
|
93
|
+
Route logs to different files within the same session using the `namespace` parameter. This is useful for organizing logs by component, worker, or module:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
from dossier import get_session
|
|
97
|
+
|
|
98
|
+
logger = get_session() # logs to logs/session_TIMESTAMP/events.jsonl
|
|
99
|
+
|
|
100
|
+
logger.info("task_started", namespace="worker") # logs to logs/session_TIMESTAMP/worker.jsonl
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Custom processors
|
|
104
|
+
|
|
105
|
+
Dossier allows you to register custom structlog processors for advanced use cases like cost tracking, metrics collection, or adding custom fields.
|
|
106
|
+
|
|
107
|
+
#### Function processors
|
|
108
|
+
|
|
109
|
+
Simple stateless processors that add fields or transform data:
|
|
110
|
+
|
|
111
|
+
```python
|
|
112
|
+
from dossier import get_session
|
|
113
|
+
|
|
114
|
+
# Add a custom field to every log
|
|
115
|
+
def add_hostname(logger, method_name, event_dict):
|
|
116
|
+
import socket
|
|
117
|
+
event_dict["hostname"] = socket.gethostname()
|
|
118
|
+
return event_dict
|
|
119
|
+
|
|
120
|
+
# Add environment info
|
|
121
|
+
def add_environment(logger, method_name, event_dict):
|
|
122
|
+
import os
|
|
123
|
+
event_dict["environment"] = os.environ.get("ENV", "development")
|
|
124
|
+
return event_dict
|
|
125
|
+
|
|
126
|
+
logger = get_session(
|
|
127
|
+
log_dir="logs",
|
|
128
|
+
processors=[add_hostname, add_environment],
|
|
129
|
+
)
|
|
130
|
+
logger.bind(model="gpt-4")
|
|
131
|
+
logger.info("test_event")
|
|
132
|
+
# Output includes: hostname, environment, model
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Stateful processors
|
|
136
|
+
|
|
137
|
+
For tracking state across log calls (like token counting, cost tracking, etc.):
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
from dossier import get_session
|
|
141
|
+
|
|
142
|
+
class TokenCounter:
|
|
143
|
+
"""Track cumulative token usage across the session"""
|
|
144
|
+
|
|
145
|
+
def __init__(self):
|
|
146
|
+
self.total_tokens = 0
|
|
147
|
+
self.call_count = 0
|
|
148
|
+
|
|
149
|
+
def __call__(self, logger, method_name, event_dict):
|
|
150
|
+
# Only process token_usage events
|
|
151
|
+
if "input_tokens" in event_dict and "output_tokens" in event_dict:
|
|
152
|
+
total = event_dict["input_tokens"] + event_dict["output_tokens"]
|
|
153
|
+
self.total_tokens += total
|
|
154
|
+
self.call_count += 1
|
|
155
|
+
|
|
156
|
+
# Add cumulative info to the log
|
|
157
|
+
event_dict["cumulative_tokens"] = self.total_tokens
|
|
158
|
+
event_dict["token_call_count"] = self.call_count
|
|
159
|
+
|
|
160
|
+
return event_dict
|
|
161
|
+
|
|
162
|
+
# Create the counter instance
|
|
163
|
+
counter = TokenCounter()
|
|
164
|
+
|
|
165
|
+
logger = get_session(log_dir="logs", processors=[counter])
|
|
166
|
+
logger.bind(model="gpt-4")
|
|
167
|
+
|
|
168
|
+
# Log token usage
|
|
169
|
+
logger.info("token_usage", input_tokens=100, output_tokens=50)
|
|
170
|
+
logger.info("token_usage", input_tokens=200, output_tokens=100)
|
|
171
|
+
|
|
172
|
+
# Access the counter's state
|
|
173
|
+
print(f"Total tokens used: {counter.total_tokens}") # 450
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
#### Real-World Example: Cost Tracker
|
|
177
|
+
|
|
178
|
+
Here's a complete cost tracking processor similar to OpenAI's pricing:
|
|
179
|
+
|
|
180
|
+
```python
|
|
181
|
+
from dossier import get_session
|
|
182
|
+
|
|
183
|
+
# Pricing per million tokens (USD)
|
|
184
|
+
PRICING = {
|
|
185
|
+
"gpt-4": {"input": 30.00, "output": 60.00},
|
|
186
|
+
"gpt-4-turbo": {"input": 10.00, "output": 30.00},
|
|
187
|
+
"gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
class CostTracker:
|
|
191
|
+
"""Track API costs across the session"""
|
|
192
|
+
|
|
193
|
+
def __init__(self):
|
|
194
|
+
self.total_cost = 0.0
|
|
195
|
+
self.total_calls = 0
|
|
196
|
+
|
|
197
|
+
def __call__(self, logger, method_name, event_dict):
|
|
198
|
+
# Process token_usage events
|
|
199
|
+
if event_dict.get("event") == "token_usage":
|
|
200
|
+
model = event_dict.get("model", "gpt-4")
|
|
201
|
+
input_tokens = event_dict.get("input_tokens", 0)
|
|
202
|
+
output_tokens = event_dict.get("output_tokens", 0)
|
|
203
|
+
|
|
204
|
+
if model in PRICING:
|
|
205
|
+
pricing = PRICING[model]
|
|
206
|
+
cost = (
|
|
207
|
+
(input_tokens / 1_000_000) * pricing["input"] +
|
|
208
|
+
(output_tokens / 1_000_000) * pricing["output"]
|
|
209
|
+
)
|
|
210
|
+
self.total_cost += cost
|
|
211
|
+
self.total_calls += 1
|
|
212
|
+
|
|
213
|
+
# Add cost info to log
|
|
214
|
+
event_dict["call_cost_usd"] = round(cost, 6)
|
|
215
|
+
event_dict["cumulative_cost_usd"] = round(self.total_cost, 6)
|
|
216
|
+
|
|
217
|
+
return event_dict
|
|
218
|
+
|
|
219
|
+
def get_summary(self):
|
|
220
|
+
"""Get formatted summary"""
|
|
221
|
+
return f"Total cost: ${self.total_cost:.4f} across {self.total_calls} calls"
|
|
222
|
+
|
|
223
|
+
# Use the cost tracker
|
|
224
|
+
cost_tracker = CostTracker()
|
|
225
|
+
logger = get_session(
|
|
226
|
+
log_dir="logs",
|
|
227
|
+
processors=[cost_tracker],
|
|
228
|
+
)
|
|
229
|
+
logger.bind(model="gpt-4-turbo")
|
|
230
|
+
|
|
231
|
+
# Log some API usage
|
|
232
|
+
logger.info("token_usage", model="gpt-4-turbo", input_tokens=1000, output_tokens=500)
|
|
233
|
+
logger.info("token_usage", model="gpt-4-turbo", input_tokens=2000, output_tokens=1000)
|
|
234
|
+
|
|
235
|
+
# Get cost summary
|
|
236
|
+
print(cost_tracker.get_summary())
|
|
237
|
+
# Output: Total cost: $0.0500 across 2 calls
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
## Integrations
|
|
241
|
+
|
|
242
|
+
Dossier automatically unpacks objects from popular libraries.
|
|
243
|
+
|
|
244
|
+
### LangChain
|
|
245
|
+
|
|
246
|
+
Works seamlessly with LangChain objects:
|
|
247
|
+
|
|
248
|
+
```python
|
|
249
|
+
from langchain_core.messages import HumanMessage, AIMessage
|
|
250
|
+
|
|
251
|
+
user_msg = HumanMessage(content="What's 2+2?")
|
|
252
|
+
logger.info(user_msg) # Event type: "human_message"
|
|
253
|
+
|
|
254
|
+
ai_msg = AIMessage(content="4")
|
|
255
|
+
logger.info(ai_msg) # Event type: "ai_message"
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
### Pydantic
|
|
259
|
+
|
|
260
|
+
```python
|
|
261
|
+
from pydantic import BaseModel
|
|
262
|
+
|
|
263
|
+
class RequestModel(BaseModel):
|
|
264
|
+
method: str
|
|
265
|
+
path: str
|
|
266
|
+
body: dict
|
|
267
|
+
|
|
268
|
+
request = RequestModel(method="POST", path="/api/chat", body={"msg": "hi"})
|
|
269
|
+
logger.info(request) # Auto-unpacks to flat dict
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
## License
|
|
275
|
+
|
|
276
|
+
Apache 2.0
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from beartype.claw import beartype_this_package
|
|
2
|
+
|
|
3
|
+
# Enable beartype runtime type-checking for all modules in this package
|
|
4
|
+
beartype_this_package()
|
|
5
|
+
|
|
6
|
+
from dossier.dossier import ( # noqa: E402
|
|
7
|
+
close_logger, # deprecated alias
|
|
8
|
+
close_session,
|
|
9
|
+
get_logger, # deprecated alias
|
|
10
|
+
get_session,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
# Note: Dossier class is internal - use get_session() instead
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"get_session",
|
|
17
|
+
"close_session",
|
|
18
|
+
# Backward compatibility
|
|
19
|
+
"get_logger",
|
|
20
|
+
"close_logger",
|
|
21
|
+
]
|
|
@@ -0,0 +1,379 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
import logging
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, cast
|
|
7
|
+
|
|
8
|
+
import structlog
|
|
9
|
+
|
|
10
|
+
from dossier.processors import (
|
|
11
|
+
make_json_safe,
|
|
12
|
+
unpack_dataclasses,
|
|
13
|
+
unpack_generic_objects,
|
|
14
|
+
unpack_pydantic_models,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Module-level cache for logger instances (similar to logging.getLogger)
|
|
18
|
+
_logger_cache: dict[str, Any] = {}
|
|
19
|
+
|
|
20
|
+
# Track if structlog has been configured globally
|
|
21
|
+
_structlog_configured = False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _infer_event_type_from_object(obj: Any) -> str | None:
|
|
25
|
+
"""Infer event type from object class name."""
|
|
26
|
+
if isinstance(obj, (str, int, float, bool, type(None), list, tuple, dict)):
|
|
27
|
+
return None
|
|
28
|
+
return type(obj).__name__
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def infer_event(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
32
|
+
"""
|
|
33
|
+
Decorator that handles event type inference from objects.
|
|
34
|
+
|
|
35
|
+
If the first arg (event) is an object (not a string):
|
|
36
|
+
- Infers event type from class name
|
|
37
|
+
- Adds object to kwargs as "_obj" for unpacking processor
|
|
38
|
+
- Calls the underlying method with inferred event type
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
@functools.wraps(func)
|
|
42
|
+
def wrapper(self: "Dossier", event: str | Any | None = None, **kwargs: Any) -> Any:
|
|
43
|
+
# Handle event type inference
|
|
44
|
+
if event is not None and not isinstance(event, str):
|
|
45
|
+
# Event is an object - infer type
|
|
46
|
+
inferred = _infer_event_type_from_object(event)
|
|
47
|
+
if inferred is None:
|
|
48
|
+
raise ValueError(
|
|
49
|
+
"Must provide event type string or object with inferrable type"
|
|
50
|
+
)
|
|
51
|
+
kwargs["_obj"] = event
|
|
52
|
+
event = inferred
|
|
53
|
+
elif event is None:
|
|
54
|
+
raise ValueError("Must provide event string or object")
|
|
55
|
+
|
|
56
|
+
# Call the original method
|
|
57
|
+
return func(self, event, **kwargs)
|
|
58
|
+
|
|
59
|
+
return wrapper
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class Dossier:
|
|
63
|
+
"""
|
|
64
|
+
Session-based structured logger with smart object unpacking and flexible metadata.
|
|
65
|
+
|
|
66
|
+
Wraps structlog for session management and automatic object unpacking.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def __init__(
|
|
70
|
+
self,
|
|
71
|
+
session_id: str,
|
|
72
|
+
session_dir: Path,
|
|
73
|
+
stdlib_logger_base_name: str,
|
|
74
|
+
processors: list[Any] | None = None,
|
|
75
|
+
) -> None:
|
|
76
|
+
"""Internal initialization - use get_session() instead."""
|
|
77
|
+
self.session_id = session_id
|
|
78
|
+
self.session_dir = session_dir
|
|
79
|
+
self._stdlib_logger_base_name = stdlib_logger_base_name
|
|
80
|
+
self._processors = processors or []
|
|
81
|
+
self._namespaced_loggers: dict[str, Any] = {}
|
|
82
|
+
|
|
83
|
+
def _resolve_namespace(self, namespace: str | None) -> str:
|
|
84
|
+
"""Resolve namespace to a canonical string, defaulting to 'events'."""
|
|
85
|
+
return "events" if not namespace else namespace
|
|
86
|
+
|
|
87
|
+
def _get_namespaced_logger(self, namespace: str | None) -> Any | None:
|
|
88
|
+
"""Get a namespaced logger if it exists, return None otherwise."""
|
|
89
|
+
resolved = self._resolve_namespace(namespace)
|
|
90
|
+
return self._namespaced_loggers.get(resolved)
|
|
91
|
+
|
|
92
|
+
def _set_namespaced_logger(self, namespace: str | None, logger: Any) -> None:
|
|
93
|
+
"""Set/update a namespaced logger in the cache."""
|
|
94
|
+
resolved = self._resolve_namespace(namespace)
|
|
95
|
+
self._namespaced_loggers[resolved] = logger
|
|
96
|
+
|
|
97
|
+
def _get_or_create_namespaced_logger(self, namespace: str | None) -> Any:
|
|
98
|
+
"""Get or create a namespaced logger for routing logs to a separate file."""
|
|
99
|
+
# Return cached logger if it exists
|
|
100
|
+
cached = self._get_namespaced_logger(namespace)
|
|
101
|
+
if cached is not None:
|
|
102
|
+
return cached
|
|
103
|
+
|
|
104
|
+
# Create new namespaced logger
|
|
105
|
+
resolved = self._resolve_namespace(namespace)
|
|
106
|
+
log_file = self.session_dir / f"{resolved}.jsonl"
|
|
107
|
+
stdlib_logger_name = f"{self._stdlib_logger_base_name}.{resolved}"
|
|
108
|
+
|
|
109
|
+
structlog_logger = _create_logger_infrastructure(
|
|
110
|
+
log_file=log_file,
|
|
111
|
+
stdlib_logger_name=stdlib_logger_name,
|
|
112
|
+
processors=self._processors,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Cache the logger using the setter
|
|
116
|
+
self._set_namespaced_logger(namespace, structlog_logger)
|
|
117
|
+
return structlog_logger
|
|
118
|
+
|
|
119
|
+
def _route_log(
|
|
120
|
+
self, method_name: str, event: str | Any | None, **kwargs: Any
|
|
121
|
+
) -> Any:
|
|
122
|
+
"""Route log to appropriate logger based on namespace kwarg."""
|
|
123
|
+
namespace = kwargs.pop("namespace", None)
|
|
124
|
+
logger = self._get_or_create_namespaced_logger(namespace)
|
|
125
|
+
|
|
126
|
+
# Call the appropriate log method
|
|
127
|
+
log_method = getattr(logger, method_name)
|
|
128
|
+
return log_method(event, **kwargs)
|
|
129
|
+
|
|
130
|
+
@infer_event
|
|
131
|
+
def info(self, event: str | Any | None = None, **kwargs: Any) -> Any:
|
|
132
|
+
"""Log info-level event."""
|
|
133
|
+
return self._route_log("info", event, **kwargs)
|
|
134
|
+
|
|
135
|
+
@infer_event
|
|
136
|
+
def error(self, event: str | Any | None = None, **kwargs: Any) -> Any:
|
|
137
|
+
"""Log error-level event."""
|
|
138
|
+
return self._route_log("error", event, **kwargs)
|
|
139
|
+
|
|
140
|
+
@infer_event
|
|
141
|
+
def debug(self, event: str | Any | None = None, **kwargs: Any) -> Any:
|
|
142
|
+
"""Log debug-level event."""
|
|
143
|
+
return self._route_log("debug", event, **kwargs)
|
|
144
|
+
|
|
145
|
+
@infer_event
|
|
146
|
+
def warning(self, event: str | Any | None = None, **kwargs: Any) -> Any:
|
|
147
|
+
"""Log warning-level event"""
|
|
148
|
+
return self._route_log("warning", event, **kwargs)
|
|
149
|
+
|
|
150
|
+
def bind(self, namespace: str | None = None, **kwargs: Any) -> "Dossier":
|
|
151
|
+
"""Add context to logger for subsequent log calls.
|
|
152
|
+
|
|
153
|
+
Example:
|
|
154
|
+
logger.bind(request_id="abc-123", user_id="user_456")
|
|
155
|
+
logger.info("processing_request")
|
|
156
|
+
# Includes: request_id="abc-123", user_id="user_456"
|
|
157
|
+
|
|
158
|
+
# Bind to specific namespace:
|
|
159
|
+
logger.bind(worker_id="w1", namespace="worker")
|
|
160
|
+
logger.info("task", namespace="worker") # Has worker_id="w1"
|
|
161
|
+
"""
|
|
162
|
+
bound_logger = self._get_or_create_namespaced_logger(namespace).bind(**kwargs)
|
|
163
|
+
|
|
164
|
+
self._set_namespaced_logger(namespace, bound_logger)
|
|
165
|
+
return self
|
|
166
|
+
|
|
167
|
+
def unbind(self, *keys: str, namespace: str | None = None) -> "Dossier":
|
|
168
|
+
"""Remove context keys from logger.
|
|
169
|
+
|
|
170
|
+
Example:
|
|
171
|
+
logger.bind(request_id="123", user_id="456")
|
|
172
|
+
logger.info("test") # Has both
|
|
173
|
+
|
|
174
|
+
logger.unbind("request_id")
|
|
175
|
+
logger.info("test2") # Only has user_id
|
|
176
|
+
|
|
177
|
+
# Unbind from specific namespace:
|
|
178
|
+
logger.unbind("worker_id", namespace="worker")
|
|
179
|
+
"""
|
|
180
|
+
# Get or create the logger for this namespace (defaults to "events" if None)
|
|
181
|
+
unbound_logger = self._get_or_create_namespaced_logger(namespace).unbind(*keys)
|
|
182
|
+
|
|
183
|
+
self._set_namespaced_logger(namespace, unbound_logger)
|
|
184
|
+
return self
|
|
185
|
+
|
|
186
|
+
def get_session_path(self) -> Path:
|
|
187
|
+
"""Get the path to the current session directory."""
|
|
188
|
+
return self.session_dir
|
|
189
|
+
|
|
190
|
+
def get_session_id(self) -> str:
|
|
191
|
+
"""Get the current session ID."""
|
|
192
|
+
return self.session_id
|
|
193
|
+
|
|
194
|
+
def __enter__(self) -> "Dossier":
|
|
195
|
+
return self
|
|
196
|
+
|
|
197
|
+
def __exit__(
|
|
198
|
+
self, exc_type: object | None, exc_val: object | None, exc_tb: object | None
|
|
199
|
+
) -> None:
|
|
200
|
+
pass # Needed for context manager
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _ensure_structlog_configured() -> None:
|
|
204
|
+
"""Configure structlog once globally if not already configured."""
|
|
205
|
+
global _structlog_configured
|
|
206
|
+
|
|
207
|
+
if _structlog_configured:
|
|
208
|
+
return
|
|
209
|
+
|
|
210
|
+
processor_chain: list[Any] = [
|
|
211
|
+
unpack_dataclasses,
|
|
212
|
+
unpack_pydantic_models,
|
|
213
|
+
unpack_generic_objects,
|
|
214
|
+
structlog.stdlib.add_log_level,
|
|
215
|
+
structlog.processors.TimeStamper(fmt="iso"),
|
|
216
|
+
structlog.processors.format_exc_info,
|
|
217
|
+
make_json_safe,
|
|
218
|
+
structlog.processors.JSONRenderer(),
|
|
219
|
+
]
|
|
220
|
+
|
|
221
|
+
structlog.configure(
|
|
222
|
+
processors=processor_chain,
|
|
223
|
+
wrapper_class=structlog.stdlib.BoundLogger,
|
|
224
|
+
logger_factory=structlog.stdlib.LoggerFactory(),
|
|
225
|
+
cache_logger_on_first_use=True,
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
_structlog_configured = True
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _create_logger_infrastructure(
|
|
232
|
+
log_file: Path,
|
|
233
|
+
stdlib_logger_name: str,
|
|
234
|
+
processors: list[Any] | None = None,
|
|
235
|
+
) -> Any:
|
|
236
|
+
"""Create a logger for a specific namespace."""
|
|
237
|
+
_ensure_structlog_configured()
|
|
238
|
+
|
|
239
|
+
# Set up file handler
|
|
240
|
+
handler = logging.FileHandler(log_file)
|
|
241
|
+
handler.setFormatter(logging.Formatter("%(message)s"))
|
|
242
|
+
|
|
243
|
+
# Configure standard library logger
|
|
244
|
+
stdlib_logger = logging.getLogger(stdlib_logger_name)
|
|
245
|
+
stdlib_logger.handlers.clear()
|
|
246
|
+
stdlib_logger.addHandler(handler)
|
|
247
|
+
stdlib_logger.setLevel(logging.DEBUG)
|
|
248
|
+
stdlib_logger.propagate = False
|
|
249
|
+
|
|
250
|
+
# Get base structlog logger (uses global config)
|
|
251
|
+
base_logger = structlog.get_logger(stdlib_logger_name)
|
|
252
|
+
|
|
253
|
+
# If custom processors provided, wrap the logger with them
|
|
254
|
+
if processors:
|
|
255
|
+
return structlog.wrap_logger(
|
|
256
|
+
base_logger,
|
|
257
|
+
wrapper_class=structlog.stdlib.BoundLogger,
|
|
258
|
+
processors=processors,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
return base_logger
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def get_session(
|
|
265
|
+
log_dir: str | Path = "logs",
|
|
266
|
+
session_id: str | None = None,
|
|
267
|
+
processors: list[Any] | None = None,
|
|
268
|
+
force_new: bool = False,
|
|
269
|
+
) -> Dossier:
|
|
270
|
+
"""
|
|
271
|
+
Get or create a dossier logging session. Returns existing session if session_id already exists.
|
|
272
|
+
|
|
273
|
+
Similar to logging.getLogger(name), this function caches session instances by session_id.
|
|
274
|
+
Subsequent calls with the same session_id return the cached instance.
|
|
275
|
+
|
|
276
|
+
The session_id is user-facing and simple (e.g., "main", "production"), while the actual
|
|
277
|
+
log directory is timestamped (e.g., "main_20251118_120000/"). This allows easy session
|
|
278
|
+
retrieval while maintaining chronological organization of log files.
|
|
279
|
+
|
|
280
|
+
**Namespaced Logging:**
|
|
281
|
+
Use the `namespace` kwarg on logging methods to route logs to separate files:
|
|
282
|
+
```python
|
|
283
|
+
logger = get_session(session_id="main")
|
|
284
|
+
logger.info("event") # logs to events.jsonl
|
|
285
|
+
logger.info("event", namespace="worker") # logs to worker.jsonl
|
|
286
|
+
logger.info("event", namespace="api.requests") # logs to api.requests.jsonl
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
log_dir: Directory to store log files
|
|
291
|
+
session_id: Simple session identifier (e.g., "main", "worker"). If None, defaults
|
|
292
|
+
to "session".
|
|
293
|
+
processors: Optional list of custom structlog processors
|
|
294
|
+
force_new: If True, creates new timestamped log directory even if session_id exists
|
|
295
|
+
in cache. Useful for restarting sessions with same name.
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Started Dossier instance (either cached or newly created)
|
|
299
|
+
|
|
300
|
+
Example:
|
|
301
|
+
# Simple session ID, timestamped directory created automatically
|
|
302
|
+
logger = get_session(session_id="main")
|
|
303
|
+
# Logs to: logs/main_TIMESTAMP/events.jsonl
|
|
304
|
+
|
|
305
|
+
# Subsequent calls return the same instance
|
|
306
|
+
logger2 = get_session(session_id="main")
|
|
307
|
+
assert logger is logger2
|
|
308
|
+
|
|
309
|
+
# Namespaced logging - single logger, multiple files
|
|
310
|
+
logger.info("event", namespace="worker") # logs to main_TIMESTAMP/worker.jsonl
|
|
311
|
+
|
|
312
|
+
# Force new session - creates new timestamped directory
|
|
313
|
+
logger3 = get_session(session_id="main", force_new=True)
|
|
314
|
+
# Logs to: logs/main_MEW_TIMESTAMP/events.jsonl
|
|
315
|
+
# Now logger3 is cached under "main"
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
# With context manager
|
|
319
|
+
with get_session(session_id="task1") as logger:
|
|
320
|
+
logger.info("log to temporary session")
|
|
321
|
+
"""
|
|
322
|
+
# Convert to Path
|
|
323
|
+
log_dir_path = Path(log_dir)
|
|
324
|
+
log_dir_path.mkdir(parents=True, exist_ok=True)
|
|
325
|
+
|
|
326
|
+
# Default session ID if not provided
|
|
327
|
+
if session_id is None:
|
|
328
|
+
# If there's exactly one session in cache and not forcing new, return it (common use case)
|
|
329
|
+
if not force_new and len(_logger_cache) == 1:
|
|
330
|
+
return cast(Dossier, next(iter(_logger_cache.values())))
|
|
331
|
+
# Otherwise default to "session"
|
|
332
|
+
session_id = "session"
|
|
333
|
+
|
|
334
|
+
# Return cached logger if exists (unless force_new)
|
|
335
|
+
if not force_new and session_id in _logger_cache:
|
|
336
|
+
return cast(Dossier, _logger_cache[session_id])
|
|
337
|
+
|
|
338
|
+
# Create timestamped directory name (session_id + underscore + timestamp)
|
|
339
|
+
now = datetime.now()
|
|
340
|
+
timestamp_suffix = now.strftime("%Y%m%d_%H%M%S")
|
|
341
|
+
timestamped_dir_name = f"{session_id}_{timestamp_suffix}"
|
|
342
|
+
session_dir = log_dir_path / timestamped_dir_name
|
|
343
|
+
session_dir.mkdir(parents=True, exist_ok=True)
|
|
344
|
+
|
|
345
|
+
# Set up base stdlib logger name (namespaces will be added to this)
|
|
346
|
+
stdlib_logger_base_name = f"session.{timestamped_dir_name}"
|
|
347
|
+
|
|
348
|
+
# Create Dossier wrapper (loggers created lazily on first use)
|
|
349
|
+
dossier = Dossier(
|
|
350
|
+
session_id=session_id,
|
|
351
|
+
session_dir=session_dir,
|
|
352
|
+
stdlib_logger_base_name=stdlib_logger_base_name,
|
|
353
|
+
processors=processors,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
# Cache before returning (using user-facing session_id as key)
|
|
357
|
+
_logger_cache[session_id] = dossier
|
|
358
|
+
|
|
359
|
+
return dossier
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def close_session(session_id: str) -> None:
|
|
363
|
+
"""Close session and all the namespaced loggers."""
|
|
364
|
+
if session_id in _logger_cache:
|
|
365
|
+
dossier = _logger_cache.pop(session_id)
|
|
366
|
+
|
|
367
|
+
# Close all namespaced loggers (including "events")
|
|
368
|
+
for namespace in dossier._namespaced_loggers:
|
|
369
|
+
# Get the stdlib logger and close its handlers
|
|
370
|
+
stdlib_logger_name = f"{dossier._stdlib_logger_base_name}.{namespace}"
|
|
371
|
+
stdlib_logger = logging.getLogger(stdlib_logger_name)
|
|
372
|
+
for handler in stdlib_logger.handlers[:]:
|
|
373
|
+
handler.close()
|
|
374
|
+
stdlib_logger.removeHandler(handler)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
# Backward compatibility aliases
|
|
378
|
+
get_logger = get_session
|
|
379
|
+
close_logger = close_session
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""Custom structlog processors for session logging."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
from dataclasses import asdict, is_dataclass
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _recursive_transform(value: Any, transform_func: Callable[[Any], Any]) -> Any:
|
|
9
|
+
"""Recursively apply transform_func to nested structures."""
|
|
10
|
+
transformed = transform_func(value)
|
|
11
|
+
if transformed is not value:
|
|
12
|
+
return transformed
|
|
13
|
+
|
|
14
|
+
if isinstance(value, list):
|
|
15
|
+
return [_recursive_transform(item, transform_func) for item in value]
|
|
16
|
+
if isinstance(value, tuple):
|
|
17
|
+
return tuple(_recursive_transform(item, transform_func) for item in value)
|
|
18
|
+
if isinstance(value, dict) and type(value) is dict:
|
|
19
|
+
return {k: _recursive_transform(v, transform_func) for k, v in value.items()}
|
|
20
|
+
|
|
21
|
+
return value
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def make_json_safe(
|
|
25
|
+
logger: Any, method_name: str, event_dict: dict[str, Any]
|
|
26
|
+
) -> dict[str, Any]:
|
|
27
|
+
"""Convert non-JSON-serializable values to strings."""
|
|
28
|
+
|
|
29
|
+
def transform(value: Any) -> Any:
|
|
30
|
+
if value is None or isinstance(value, (bool, int, float, str)):
|
|
31
|
+
return value
|
|
32
|
+
if isinstance(value, (list, dict, tuple)):
|
|
33
|
+
return value # Let _recursive_transform handle recursion
|
|
34
|
+
return str(value) # Convert everything else to string
|
|
35
|
+
|
|
36
|
+
for key, value in list(event_dict.items()):
|
|
37
|
+
event_dict[key] = _recursive_transform(value, transform)
|
|
38
|
+
|
|
39
|
+
return event_dict
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _process_event_dict(
|
|
43
|
+
event_dict: dict[str, Any], transform_func: Callable[[Any], Any]
|
|
44
|
+
) -> dict[str, Any]:
|
|
45
|
+
"""Apply transform_func to event_dict values, handling _obj key and flattening."""
|
|
46
|
+
new_dict = {}
|
|
47
|
+
|
|
48
|
+
for key, value in event_dict.items():
|
|
49
|
+
if key == "_obj":
|
|
50
|
+
transformed_value = _recursive_transform(value, transform_func)
|
|
51
|
+
if isinstance(transformed_value, dict):
|
|
52
|
+
new_dict.update(transformed_value)
|
|
53
|
+
else:
|
|
54
|
+
new_dict[key] = transformed_value
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
direct_transform = transform_func(value)
|
|
58
|
+
if direct_transform is not value and isinstance(direct_transform, dict):
|
|
59
|
+
for k, v in direct_transform.items():
|
|
60
|
+
new_dict[f"{key}_{k}"] = v
|
|
61
|
+
else:
|
|
62
|
+
new_dict[key] = _recursive_transform(value, transform_func)
|
|
63
|
+
|
|
64
|
+
return new_dict
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def unpack_dataclasses(
|
|
68
|
+
logger: Any, method_name: str, event_dict: dict[str, Any]
|
|
69
|
+
) -> dict[str, Any]:
|
|
70
|
+
"""Unpack dataclasses to dicts recursively."""
|
|
71
|
+
|
|
72
|
+
def transform(value: Any) -> Any:
|
|
73
|
+
if is_dataclass(value) and not isinstance(value, type):
|
|
74
|
+
return asdict(value)
|
|
75
|
+
return value
|
|
76
|
+
|
|
77
|
+
return _process_event_dict(event_dict, transform)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def unpack_pydantic_models(
|
|
81
|
+
logger: Any, method_name: str, event_dict: dict[str, Any]
|
|
82
|
+
) -> dict[str, Any]:
|
|
83
|
+
"""Unpack Pydantic models to dicts recursively."""
|
|
84
|
+
|
|
85
|
+
def transform(value: Any) -> Any:
|
|
86
|
+
if callable(getattr(value, "model_dump", None)):
|
|
87
|
+
return value.model_dump()
|
|
88
|
+
return value
|
|
89
|
+
|
|
90
|
+
return _process_event_dict(event_dict, transform)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def unpack_generic_objects(
|
|
94
|
+
logger: Any, method_name: str, event_dict: dict[str, Any]
|
|
95
|
+
) -> dict[str, Any]:
|
|
96
|
+
"""Unpack objects with __dict__ to dicts recursively."""
|
|
97
|
+
|
|
98
|
+
def transform(value: Any) -> Any:
|
|
99
|
+
if isinstance(value, (str, int, float, bool, type(None), list, dict, tuple)):
|
|
100
|
+
return value
|
|
101
|
+
if is_dataclass(value) and not isinstance(value, type):
|
|
102
|
+
return value
|
|
103
|
+
if callable(getattr(value, "model_dump", None)):
|
|
104
|
+
return value
|
|
105
|
+
if hasattr(value, "__dict__"):
|
|
106
|
+
return {k: v for k, v in value.__dict__.items() if not k.startswith("_")}
|
|
107
|
+
return value
|
|
108
|
+
|
|
109
|
+
return _process_event_dict(event_dict, transform)
|
|
File without changes
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# Version is managed by uv. To bump:
|
|
2
|
+
# - Patch (0.1.2 -> 0.1.3): uv version --bump patch
|
|
3
|
+
# - Minor (0.1.2 -> 0.2.0): uv version --bump minor
|
|
4
|
+
# - Major (0.1.2 -> 1.0.0): uv version --bump major
|
|
5
|
+
# Then create a git tag: git tag v<version> && git push origin v<version>
|
|
6
|
+
# The tag will trigger the publish workflow to PyPI
|
|
7
|
+
|
|
8
|
+
[project]
|
|
9
|
+
name = "dossier"
|
|
10
|
+
version = "1.1.3"
|
|
11
|
+
description = "A structured logging library for AI agents with session management and object unpacking"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.13"
|
|
14
|
+
license = "Apache-2.0"
|
|
15
|
+
authors = [
|
|
16
|
+
{ name = "Ricardo Decal", email = "dossier-project@ricardodecal.com" }
|
|
17
|
+
]
|
|
18
|
+
keywords = ["logging", "structlog", "ai", "agents", "session-management"]
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Development Status :: 4 - Beta",
|
|
21
|
+
"Intended Audience :: Developers",
|
|
22
|
+
"License :: OSI Approved :: Apache Software License",
|
|
23
|
+
"Programming Language :: Python :: 3",
|
|
24
|
+
"Programming Language :: Python :: 3.13",
|
|
25
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
26
|
+
"Topic :: System :: Logging",
|
|
27
|
+
]
|
|
28
|
+
# Do not edit manually. Use `uv add <dep>` and `uv remove <dep>`
|
|
29
|
+
dependencies = [
|
|
30
|
+
"beartype>=0.22.5",
|
|
31
|
+
"better-exceptions>=0.3.3",
|
|
32
|
+
"rich>=14.2.0",
|
|
33
|
+
"structlog>=25.5.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.urls]
|
|
37
|
+
Homepage = "https://dossier.ricardodecal.com/"
|
|
38
|
+
Documentation = "https://dossier.ricardodecal.com/"
|
|
39
|
+
Repository = "https://github.com/crypdick/dossier"
|
|
40
|
+
Issues = "https://github.com/crypdick/dossier/issues"
|
|
41
|
+
|
|
42
|
+
[dependency-groups]
|
|
43
|
+
# Note: use `uvx pre-commit run --all-files` to run the pre-commit hooks
|
|
44
|
+
dev = [
|
|
45
|
+
"langchain-core>=1.0.5",
|
|
46
|
+
"mkdocs>=1.6.1",
|
|
47
|
+
"mkdocs-autorefs>=1.4.3",
|
|
48
|
+
"mkdocs-gen-files>=0.5.0",
|
|
49
|
+
"mkdocs-literate-nav>=0.6.2",
|
|
50
|
+
"mkdocs-llmstxt>=0.4.0",
|
|
51
|
+
"mkdocs-material>=9.7.0",
|
|
52
|
+
"mkdocstrings[python]>=0.30.1",
|
|
53
|
+
"mypy>=1.18.2",
|
|
54
|
+
"openai>=2.8.1",
|
|
55
|
+
"openai-agents>=0.5.1",
|
|
56
|
+
"pydantic>=2.12.4",
|
|
57
|
+
"pytest>=9.0.1",
|
|
58
|
+
"pytest-asyncio>=1.3.0",
|
|
59
|
+
"pytest-cov>=7.0.0",
|
|
60
|
+
"pytest-xdist>=3.8.0",
|
|
61
|
+
"requests>=2.32.5",
|
|
62
|
+
"ruff>=0.14.5",
|
|
63
|
+
"types-requests>=2.32.0",
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
[tool.pytest.ini_options]
|
|
68
|
+
# Run with `uv run pytest`
|
|
69
|
+
testpaths = ["tests"]
|
|
70
|
+
pythonpath = ["."]
|
|
71
|
+
# Enable concurrent testing with pytest-xdist
|
|
72
|
+
# Use -n auto to automatically detect number of CPUs
|
|
73
|
+
# Or override with: uv run pytest -n <number>
|
|
74
|
+
addopts = "-n auto"
|
|
75
|
+
|
|
76
|
+
[tool.mypy]
|
|
77
|
+
# Enforce comprehensive type hinting
|
|
78
|
+
python_version = "3.13"
|
|
79
|
+
strict = true
|
|
80
|
+
warn_return_any = true
|
|
81
|
+
warn_unused_configs = true
|
|
82
|
+
disallow_untyped_defs = true
|
|
83
|
+
disallow_incomplete_defs = true
|
|
84
|
+
disallow_untyped_calls = true
|
|
85
|
+
disallow_untyped_decorators = true
|
|
86
|
+
check_untyped_defs = true
|
|
87
|
+
no_implicit_optional = true
|
|
88
|
+
warn_redundant_casts = true
|
|
89
|
+
warn_unused_ignores = true
|
|
90
|
+
warn_no_return = true
|
|
91
|
+
warn_unreachable = true
|
|
92
|
+
strict_equality = true
|
|
93
|
+
strict_concatenate = true
|
|
94
|
+
# Show error codes
|
|
95
|
+
show_error_codes = true
|
|
96
|
+
# Pretty output
|
|
97
|
+
pretty = true
|
|
98
|
+
|
|
99
|
+
# Less strict checking for test files
|
|
100
|
+
[[tool.mypy.overrides]]
|
|
101
|
+
module = [
|
|
102
|
+
"tests.*",
|
|
103
|
+
"test_.*",
|
|
104
|
+
]
|
|
105
|
+
disallow_untyped_defs = false
|
|
106
|
+
disallow_untyped_calls = false
|
|
107
|
+
check_untyped_defs = false
|
|
108
|
+
ignore_errors = true
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
[tool.ruff]
|
|
112
|
+
# Enable pycodestyle (`E`), Pylint (`PL`), and others
|
|
113
|
+
lint.select = [
|
|
114
|
+
"E", # pycodestyle errors
|
|
115
|
+
"W", # pycodestyle warnings
|
|
116
|
+
"F", # pyflakes
|
|
117
|
+
"I", # isort
|
|
118
|
+
"PLC", # Pylint convention
|
|
119
|
+
]
|
|
120
|
+
|
|
121
|
+
# Ignore line-too-long (E501) - many docstrings/messages are naturally longer
|
|
122
|
+
lint.ignore = ["E501"]
|
|
123
|
+
|
|
124
|
+
# PLC0415: Import outside top-level (import-outside-toplevel)
|
|
125
|
+
# This catches imports inside functions/classes which should generally be at module level
|
|
126
|
+
# Can be disabled per-line with # noqa: PLC0415
|
|
127
|
+
lint.extend-select = ["PLC0415"]
|
|
128
|
+
|
|
129
|
+
# Exclude test files from import-outside-toplevel check
|
|
130
|
+
# Tests commonly use function-scoped imports for isolation
|
|
131
|
+
[tool.ruff.lint.per-file-ignores]
|
|
132
|
+
"tests/**/*.py" = ["PLC0415"]
|
|
133
|
+
"**/test_*.py" = ["PLC0415"]
|
|
134
|
+
|
|
135
|
+
[build-system]
|
|
136
|
+
requires = ["uv_build>=0.9.2,<0.10.0"]
|
|
137
|
+
build-backend = "uv_build"
|
|
138
|
+
|
|
139
|
+
[tool.uv.build-backend]
|
|
140
|
+
module-name = "dossier"
|
|
141
|
+
module-root = ""
|
|
142
|
+
|
|
143
|
+
# Include py.typed marker for PEP 561 type hint support
|
|
144
|
+
[tool.uv.build-backend.package-data]
|
|
145
|
+
dossier = ["py.typed"]
|