brooklet 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- brooklet/__init__.py +23 -0
- brooklet/consumer.py +382 -0
- brooklet/contrib/__init__.py +2 -0
- brooklet/contrib/claude_analytics.py +665 -0
- brooklet/contrib/pytest_analytics.py +368 -0
- brooklet/envelope.py +72 -0
- brooklet/offsets.py +90 -0
- brooklet/registry.py +123 -0
- brooklet/stream.py +118 -0
- brooklet/types.py +64 -0
- brooklet-0.1.0.dist-info/METADATA +191 -0
- brooklet-0.1.0.dist-info/RECORD +15 -0
- brooklet-0.1.0.dist-info/WHEEL +4 -0
- brooklet-0.1.0.dist-info/entry_points.txt +3 -0
- brooklet-0.1.0.dist-info/licenses/LICENSE +21 -0
brooklet/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# ABOUTME: Public API for brooklet — lightweight JSONL event streaming library
|
|
2
|
+
# ABOUTME: Exports open() convenience function and __version__
|
|
3
|
+
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from brooklet.stream import Stream
|
|
7
|
+
from brooklet.types import Event, Mode, SourceDef # noqa: F401
|
|
8
|
+
|
|
9
|
+
__version__ = "0.1.0"
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def open(path: str | Path) -> Stream:
|
|
13
|
+
"""Open a brooklet stream directory.
|
|
14
|
+
|
|
15
|
+
Creates the directory and .brooklet/ metadata if they don't exist.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
path: Path to the stream directory.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
A Stream instance for registering sources and consuming events.
|
|
22
|
+
"""
|
|
23
|
+
return Stream(path)
|
brooklet/consumer.py
ADDED
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
# ABOUTME: Event consumer with batch and follow modes
|
|
2
|
+
# ABOUTME: Reads JSONL lines from registered sources with offset tracking
|
|
3
|
+
|
|
4
|
+
import fnmatch
|
|
5
|
+
import glob as glob_module
|
|
6
|
+
import logging
|
|
7
|
+
import warnings
|
|
8
|
+
from collections.abc import Iterator
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from brooklet.envelope import wrap
|
|
12
|
+
from brooklet.offsets import load, save
|
|
13
|
+
from brooklet.types import Event, GlobOffset, Mode, SingleFileOffset
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger("brooklet")
|
|
16
|
+
|
|
17
|
+
_OBSERVER_JOIN_TIMEOUT = 5
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Consumer:
|
|
21
|
+
"""Iterator over JSONL events with offset tracking.
|
|
22
|
+
|
|
23
|
+
Supports single-file and glob modes. Tracks byte offsets per consumer
|
|
24
|
+
group so consumption resumes where it left off.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
path: str,
|
|
30
|
+
mode: Mode,
|
|
31
|
+
group: str,
|
|
32
|
+
topic: str,
|
|
33
|
+
offsets_dir: str | Path,
|
|
34
|
+
source: str | None = None,
|
|
35
|
+
follow: bool = False,
|
|
36
|
+
) -> None:
|
|
37
|
+
self._path = path
|
|
38
|
+
self._mode: Mode = mode
|
|
39
|
+
self._group = group
|
|
40
|
+
self._topic = topic
|
|
41
|
+
self._offsets_dir = Path(offsets_dir)
|
|
42
|
+
self._source = source
|
|
43
|
+
self._follow = follow
|
|
44
|
+
self._seq = 0
|
|
45
|
+
self._closed = False
|
|
46
|
+
self._file_handle = None
|
|
47
|
+
self._observer = None
|
|
48
|
+
|
|
49
|
+
self._offset: SingleFileOffset | GlobOffset = self._load_offset()
|
|
50
|
+
# Per-file byte positions used during glob+follow tailing
|
|
51
|
+
self._file_positions: dict[str, int] = {}
|
|
52
|
+
|
|
53
|
+
def _load_offset(self) -> SingleFileOffset | GlobOffset:
|
|
54
|
+
"""Load offset from storage, returning the appropriate typed offset."""
|
|
55
|
+
raw = load(self._offsets_dir, self._group, self._topic)
|
|
56
|
+
if self._mode == "glob":
|
|
57
|
+
return GlobOffset.decode(raw)
|
|
58
|
+
return SingleFileOffset.decode(raw)
|
|
59
|
+
|
|
60
|
+
def _save_offset(self) -> None:
|
|
61
|
+
"""Save the current offset to storage."""
|
|
62
|
+
save(self._offsets_dir, self._group, self._topic, self._offset.encode())
|
|
63
|
+
|
|
64
|
+
def _stop_observer(self, observer) -> None:
|
|
65
|
+
"""Stop a watchdog observer with a bounded join timeout."""
|
|
66
|
+
observer.stop()
|
|
67
|
+
observer.join(timeout=_OBSERVER_JOIN_TIMEOUT)
|
|
68
|
+
if observer.is_alive():
|
|
69
|
+
observer.daemon = True # Allow process exit despite hung thread
|
|
70
|
+
logger.error(
|
|
71
|
+
"Watchdog observer did not stop within %ss "
|
|
72
|
+
"(topic=%s, group=%s). Thread will be abandoned.",
|
|
73
|
+
_OBSERVER_JOIN_TIMEOUT,
|
|
74
|
+
self._topic,
|
|
75
|
+
self._group,
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
def __iter__(self) -> Iterator[Event]:
|
|
79
|
+
return self._iterate()
|
|
80
|
+
|
|
81
|
+
def _iterate(self):
|
|
82
|
+
"""Yield events from the source."""
|
|
83
|
+
if self._mode == "single-file":
|
|
84
|
+
yield from self._iterate_single_file()
|
|
85
|
+
elif self._mode == "glob":
|
|
86
|
+
if self._follow:
|
|
87
|
+
yield from self._iterate_glob_follow()
|
|
88
|
+
else:
|
|
89
|
+
yield from self._iterate_glob()
|
|
90
|
+
else:
|
|
91
|
+
raise ValueError(f"Unknown consumer mode: {self._mode!r}")
|
|
92
|
+
|
|
93
|
+
def _iterate_single_file(self):
|
|
94
|
+
"""Read events from a single JSONL file."""
|
|
95
|
+
path = Path(self._path).expanduser()
|
|
96
|
+
if not path.exists():
|
|
97
|
+
warnings.warn(
|
|
98
|
+
f"Source file does not exist: {path} "
|
|
99
|
+
f"(topic={self._topic!r}, group={self._group!r})",
|
|
100
|
+
stacklevel=2,
|
|
101
|
+
)
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
f = open(path) # noqa: SIM115
|
|
105
|
+
self._file_handle = f
|
|
106
|
+
try:
|
|
107
|
+
assert isinstance(self._offset, SingleFileOffset)
|
|
108
|
+
f.seek(self._offset.byte_offset)
|
|
109
|
+
|
|
110
|
+
if self._follow:
|
|
111
|
+
yield from self._iterate_follow(f, path)
|
|
112
|
+
else:
|
|
113
|
+
yield from self._read_lines(f)
|
|
114
|
+
|
|
115
|
+
self._offset = SingleFileOffset(byte_offset=f.tell())
|
|
116
|
+
self._save_offset()
|
|
117
|
+
finally:
|
|
118
|
+
self._file_handle = None
|
|
119
|
+
f.close()
|
|
120
|
+
|
|
121
|
+
def _read_lines(self, f):
|
|
122
|
+
"""Read and yield all available lines from a file handle.
|
|
123
|
+
|
|
124
|
+
Uses readline() instead of iteration to keep tell() available.
|
|
125
|
+
"""
|
|
126
|
+
while True:
|
|
127
|
+
line = f.readline()
|
|
128
|
+
if not line:
|
|
129
|
+
break
|
|
130
|
+
self._seq += 1
|
|
131
|
+
event = wrap(line, seq=self._seq, source=self._source)
|
|
132
|
+
if event is not None:
|
|
133
|
+
yield event
|
|
134
|
+
|
|
135
|
+
def _catch_up_glob(self, files: list[str]) -> None:
|
|
136
|
+
"""Read all unread events from glob-matched files, updating offset.
|
|
137
|
+
|
|
138
|
+
Shared between batch glob and glob+follow modes. During follow mode,
|
|
139
|
+
also populates _file_positions for subsequent tailing.
|
|
140
|
+
"""
|
|
141
|
+
assert isinstance(self._offset, GlobOffset)
|
|
142
|
+
|
|
143
|
+
if not files:
|
|
144
|
+
if self._offset.file_index != 0 or self._offset.byte_offset != 0:
|
|
145
|
+
logger.error(
|
|
146
|
+
"Glob matched no files but offset is non-zero "
|
|
147
|
+
"(file_index=%d, byte_offset=%d). "
|
|
148
|
+
"Resetting offset (topic=%s, group=%s).",
|
|
149
|
+
self._offset.file_index,
|
|
150
|
+
self._offset.byte_offset,
|
|
151
|
+
self._topic,
|
|
152
|
+
self._group,
|
|
153
|
+
)
|
|
154
|
+
self._offset = GlobOffset(file_index=0, byte_offset=0)
|
|
155
|
+
return
|
|
156
|
+
|
|
157
|
+
start_file_index = self._offset.file_index
|
|
158
|
+
start_byte_offset = self._offset.byte_offset
|
|
159
|
+
|
|
160
|
+
if start_file_index >= len(files):
|
|
161
|
+
logger.error(
|
|
162
|
+
"Saved file_index %d is out of bounds (only %d files matched). "
|
|
163
|
+
"Files may have been added or removed between sessions. "
|
|
164
|
+
"Resetting to start of all files (topic=%s, group=%s).",
|
|
165
|
+
start_file_index,
|
|
166
|
+
len(files),
|
|
167
|
+
self._topic,
|
|
168
|
+
self._group,
|
|
169
|
+
)
|
|
170
|
+
start_file_index = 0
|
|
171
|
+
start_byte_offset = 0
|
|
172
|
+
self._offset = GlobOffset(file_index=0, byte_offset=0)
|
|
173
|
+
|
|
174
|
+
for i, filepath in enumerate(files):
|
|
175
|
+
if i < start_file_index:
|
|
176
|
+
# Still record position for follow mode
|
|
177
|
+
if self._follow:
|
|
178
|
+
try:
|
|
179
|
+
self._file_positions[filepath] = Path(filepath).stat().st_size
|
|
180
|
+
except OSError as e:
|
|
181
|
+
logger.warning(
|
|
182
|
+
"Cannot stat skipped file %s (topic=%s, group=%s): %s",
|
|
183
|
+
filepath,
|
|
184
|
+
self._topic,
|
|
185
|
+
self._group,
|
|
186
|
+
e,
|
|
187
|
+
)
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
try:
|
|
191
|
+
f = open(filepath) # noqa: SIM115
|
|
192
|
+
except OSError as e:
|
|
193
|
+
logger.warning(
|
|
194
|
+
"Cannot open file %s during catch-up (topic=%s, group=%s): %s",
|
|
195
|
+
filepath,
|
|
196
|
+
self._topic,
|
|
197
|
+
self._group,
|
|
198
|
+
e,
|
|
199
|
+
)
|
|
200
|
+
# Advance offset past this file
|
|
201
|
+
if i == len(files) - 1:
|
|
202
|
+
self._offset = GlobOffset(file_index=i, byte_offset=0)
|
|
203
|
+
else:
|
|
204
|
+
self._offset = GlobOffset(file_index=i + 1, byte_offset=0)
|
|
205
|
+
continue
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
if i == start_file_index:
|
|
209
|
+
f.seek(start_byte_offset)
|
|
210
|
+
|
|
211
|
+
yield from self._read_lines(f)
|
|
212
|
+
|
|
213
|
+
end_pos = f.tell()
|
|
214
|
+
if self._follow:
|
|
215
|
+
self._file_positions[filepath] = end_pos
|
|
216
|
+
|
|
217
|
+
# After reading this file, update offset to next file
|
|
218
|
+
if i == len(files) - 1:
|
|
219
|
+
self._offset = GlobOffset(file_index=i, byte_offset=end_pos)
|
|
220
|
+
else:
|
|
221
|
+
self._offset = GlobOffset(file_index=i + 1, byte_offset=0)
|
|
222
|
+
finally:
|
|
223
|
+
f.close()
|
|
224
|
+
|
|
225
|
+
def _iterate_glob(self):
|
|
226
|
+
"""Read events across multiple files matched by glob pattern."""
|
|
227
|
+
files = sorted(glob_module.glob(self._path))
|
|
228
|
+
if not files:
|
|
229
|
+
logger.warning(
|
|
230
|
+
"Glob pattern matched no files: %s (topic=%s, group=%s)",
|
|
231
|
+
self._path,
|
|
232
|
+
self._topic,
|
|
233
|
+
self._group,
|
|
234
|
+
)
|
|
235
|
+
yield from self._catch_up_glob(files)
|
|
236
|
+
self._save_offset()
|
|
237
|
+
|
|
238
|
+
def _iterate_glob_follow(self):
|
|
239
|
+
"""Catch up on existing glob files, then tail for changes and new files."""
|
|
240
|
+
import queue
|
|
241
|
+
|
|
242
|
+
from watchdog.events import FileSystemEventHandler
|
|
243
|
+
from watchdog.observers import Observer
|
|
244
|
+
|
|
245
|
+
assert isinstance(self._offset, GlobOffset)
|
|
246
|
+
|
|
247
|
+
# Phase 1: catch-up on existing files
|
|
248
|
+
files = sorted(glob_module.glob(self._path))
|
|
249
|
+
yield from self._catch_up_glob(files)
|
|
250
|
+
self._save_offset()
|
|
251
|
+
|
|
252
|
+
# Phase 2: tail using watchdog on the parent directory
|
|
253
|
+
glob_pattern = self._path
|
|
254
|
+
watch_dir = str(Path(self._path).parent)
|
|
255
|
+
event_queue = queue.Queue()
|
|
256
|
+
|
|
257
|
+
class GlobHandler(FileSystemEventHandler):
|
|
258
|
+
def on_modified(self, event):
|
|
259
|
+
if not event.is_directory and fnmatch.fnmatch(event.src_path, glob_pattern):
|
|
260
|
+
event_queue.put(("modified", event.src_path))
|
|
261
|
+
|
|
262
|
+
def on_created(self, event):
|
|
263
|
+
if not event.is_directory and fnmatch.fnmatch(event.src_path, glob_pattern):
|
|
264
|
+
event_queue.put(("created", event.src_path))
|
|
265
|
+
|
|
266
|
+
observer = Observer()
|
|
267
|
+
observer.schedule(GlobHandler(), watch_dir, recursive=False)
|
|
268
|
+
observer.start()
|
|
269
|
+
self._observer = observer
|
|
270
|
+
|
|
271
|
+
try:
|
|
272
|
+
while not self._closed:
|
|
273
|
+
try:
|
|
274
|
+
action, filepath = event_queue.get(timeout=0.5)
|
|
275
|
+
except queue.Empty:
|
|
276
|
+
continue
|
|
277
|
+
|
|
278
|
+
# Drain the queue to batch process notifications
|
|
279
|
+
pending = [(action, filepath)]
|
|
280
|
+
while not event_queue.empty():
|
|
281
|
+
try:
|
|
282
|
+
pending.append(event_queue.get_nowait())
|
|
283
|
+
except queue.Empty:
|
|
284
|
+
break
|
|
285
|
+
|
|
286
|
+
for _action, filepath in pending:
|
|
287
|
+
known_pos = self._file_positions.get(filepath, 0)
|
|
288
|
+
|
|
289
|
+
try:
|
|
290
|
+
with open(filepath) as f:
|
|
291
|
+
f.seek(known_pos)
|
|
292
|
+
yield from self._read_lines(f)
|
|
293
|
+
self._file_positions[filepath] = f.tell()
|
|
294
|
+
except OSError as e:
|
|
295
|
+
logger.warning(
|
|
296
|
+
"Skipping file %s during glob+follow (topic=%s, group=%s): %s",
|
|
297
|
+
filepath,
|
|
298
|
+
self._topic,
|
|
299
|
+
self._group,
|
|
300
|
+
e,
|
|
301
|
+
)
|
|
302
|
+
continue
|
|
303
|
+
|
|
304
|
+
# Update GlobOffset: find this file's index in the sorted list
|
|
305
|
+
all_files = sorted(self._file_positions.keys())
|
|
306
|
+
file_idx = all_files.index(filepath)
|
|
307
|
+
self._offset = GlobOffset(
|
|
308
|
+
file_index=file_idx,
|
|
309
|
+
byte_offset=self._file_positions[filepath],
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
self._save_offset()
|
|
313
|
+
finally:
|
|
314
|
+
self._save_offset()
|
|
315
|
+
self._stop_observer(observer)
|
|
316
|
+
|
|
317
|
+
def _iterate_follow(self, f, path):
|
|
318
|
+
"""Tail a file using watchdog for filesystem events."""
|
|
319
|
+
import queue
|
|
320
|
+
|
|
321
|
+
from watchdog.events import FileSystemEventHandler
|
|
322
|
+
from watchdog.observers import Observer
|
|
323
|
+
|
|
324
|
+
event_queue = queue.Queue()
|
|
325
|
+
|
|
326
|
+
class Handler(FileSystemEventHandler):
|
|
327
|
+
def on_modified(self, event):
|
|
328
|
+
if Path(event.src_path).resolve() == path.resolve():
|
|
329
|
+
event_queue.put(True)
|
|
330
|
+
|
|
331
|
+
observer = Observer()
|
|
332
|
+
observer.schedule(Handler(), str(path.parent), recursive=False)
|
|
333
|
+
observer.start()
|
|
334
|
+
self._observer = observer
|
|
335
|
+
|
|
336
|
+
try:
|
|
337
|
+
# First, read any existing lines
|
|
338
|
+
yield from self._read_lines(f)
|
|
339
|
+
|
|
340
|
+
# Then tail for new lines
|
|
341
|
+
while not self._closed:
|
|
342
|
+
try:
|
|
343
|
+
event_queue.get(timeout=0.5)
|
|
344
|
+
except queue.Empty:
|
|
345
|
+
continue
|
|
346
|
+
|
|
347
|
+
# Drain the queue (multiple notifications may have arrived)
|
|
348
|
+
while not event_queue.empty():
|
|
349
|
+
try:
|
|
350
|
+
event_queue.get_nowait()
|
|
351
|
+
except queue.Empty:
|
|
352
|
+
break
|
|
353
|
+
|
|
354
|
+
yield from self._read_lines(f)
|
|
355
|
+
finally:
|
|
356
|
+
self._stop_observer(observer)
|
|
357
|
+
|
|
358
|
+
def close(self) -> None:
|
|
359
|
+
"""Stop the consumer and save the current offset."""
|
|
360
|
+
self._closed = True
|
|
361
|
+
|
|
362
|
+
try:
|
|
363
|
+
# Save offset from current file position if still open
|
|
364
|
+
if self._file_handle is not None and not self._file_handle.closed:
|
|
365
|
+
if isinstance(self._offset, GlobOffset):
|
|
366
|
+
self._offset = GlobOffset(
|
|
367
|
+
file_index=self._offset.file_index,
|
|
368
|
+
byte_offset=self._file_handle.tell(),
|
|
369
|
+
)
|
|
370
|
+
else:
|
|
371
|
+
self._offset = SingleFileOffset(byte_offset=self._file_handle.tell())
|
|
372
|
+
self._save_offset()
|
|
373
|
+
finally:
|
|
374
|
+
if self._observer is not None:
|
|
375
|
+
self._stop_observer(self._observer)
|
|
376
|
+
|
|
377
|
+
def __enter__(self):
|
|
378
|
+
return self
|
|
379
|
+
|
|
380
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
381
|
+
self.close()
|
|
382
|
+
return False
|