brooklet 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
brooklet/__init__.py ADDED
@@ -0,0 +1,23 @@
1
+ # ABOUTME: Public API for brooklet — lightweight JSONL event streaming library
2
+ # ABOUTME: Exports open() convenience function and __version__
3
+
4
+ from pathlib import Path
5
+
6
+ from brooklet.stream import Stream
7
+ from brooklet.types import Event, Mode, SourceDef # noqa: F401
8
+
9
+ __version__ = "0.1.0"
10
+
11
+
12
+ def open(path: str | Path) -> Stream:
13
+ """Open a brooklet stream directory.
14
+
15
+ Creates the directory and .brooklet/ metadata if they don't exist.
16
+
17
+ Args:
18
+ path: Path to the stream directory.
19
+
20
+ Returns:
21
+ A Stream instance for registering sources and consuming events.
22
+ """
23
+ return Stream(path)
brooklet/consumer.py ADDED
@@ -0,0 +1,382 @@
1
+ # ABOUTME: Event consumer with batch and follow modes
2
+ # ABOUTME: Reads JSONL lines from registered sources with offset tracking
3
+
4
+ import fnmatch
5
+ import glob as glob_module
6
+ import logging
7
+ import warnings
8
+ from collections.abc import Iterator
9
+ from pathlib import Path
10
+
11
+ from brooklet.envelope import wrap
12
+ from brooklet.offsets import load, save
13
+ from brooklet.types import Event, GlobOffset, Mode, SingleFileOffset
14
+
15
+ logger = logging.getLogger("brooklet")
16
+
17
+ _OBSERVER_JOIN_TIMEOUT = 5
18
+
19
+
20
+ class Consumer:
21
+ """Iterator over JSONL events with offset tracking.
22
+
23
+ Supports single-file and glob modes. Tracks byte offsets per consumer
24
+ group so consumption resumes where it left off.
25
+ """
26
+
27
+ def __init__(
28
+ self,
29
+ path: str,
30
+ mode: Mode,
31
+ group: str,
32
+ topic: str,
33
+ offsets_dir: str | Path,
34
+ source: str | None = None,
35
+ follow: bool = False,
36
+ ) -> None:
37
+ self._path = path
38
+ self._mode: Mode = mode
39
+ self._group = group
40
+ self._topic = topic
41
+ self._offsets_dir = Path(offsets_dir)
42
+ self._source = source
43
+ self._follow = follow
44
+ self._seq = 0
45
+ self._closed = False
46
+ self._file_handle = None
47
+ self._observer = None
48
+
49
+ self._offset: SingleFileOffset | GlobOffset = self._load_offset()
50
+ # Per-file byte positions used during glob+follow tailing
51
+ self._file_positions: dict[str, int] = {}
52
+
53
+ def _load_offset(self) -> SingleFileOffset | GlobOffset:
54
+ """Load offset from storage, returning the appropriate typed offset."""
55
+ raw = load(self._offsets_dir, self._group, self._topic)
56
+ if self._mode == "glob":
57
+ return GlobOffset.decode(raw)
58
+ return SingleFileOffset.decode(raw)
59
+
60
+ def _save_offset(self) -> None:
61
+ """Save the current offset to storage."""
62
+ save(self._offsets_dir, self._group, self._topic, self._offset.encode())
63
+
64
+ def _stop_observer(self, observer) -> None:
65
+ """Stop a watchdog observer with a bounded join timeout."""
66
+ observer.stop()
67
+ observer.join(timeout=_OBSERVER_JOIN_TIMEOUT)
68
+ if observer.is_alive():
69
+ observer.daemon = True # Allow process exit despite hung thread
70
+ logger.error(
71
+ "Watchdog observer did not stop within %ss "
72
+ "(topic=%s, group=%s). Thread will be abandoned.",
73
+ _OBSERVER_JOIN_TIMEOUT,
74
+ self._topic,
75
+ self._group,
76
+ )
77
+
78
+ def __iter__(self) -> Iterator[Event]:
79
+ return self._iterate()
80
+
81
+ def _iterate(self):
82
+ """Yield events from the source."""
83
+ if self._mode == "single-file":
84
+ yield from self._iterate_single_file()
85
+ elif self._mode == "glob":
86
+ if self._follow:
87
+ yield from self._iterate_glob_follow()
88
+ else:
89
+ yield from self._iterate_glob()
90
+ else:
91
+ raise ValueError(f"Unknown consumer mode: {self._mode!r}")
92
+
93
+ def _iterate_single_file(self):
94
+ """Read events from a single JSONL file."""
95
+ path = Path(self._path).expanduser()
96
+ if not path.exists():
97
+ warnings.warn(
98
+ f"Source file does not exist: {path} "
99
+ f"(topic={self._topic!r}, group={self._group!r})",
100
+ stacklevel=2,
101
+ )
102
+ return
103
+
104
+ f = open(path) # noqa: SIM115
105
+ self._file_handle = f
106
+ try:
107
+ assert isinstance(self._offset, SingleFileOffset)
108
+ f.seek(self._offset.byte_offset)
109
+
110
+ if self._follow:
111
+ yield from self._iterate_follow(f, path)
112
+ else:
113
+ yield from self._read_lines(f)
114
+
115
+ self._offset = SingleFileOffset(byte_offset=f.tell())
116
+ self._save_offset()
117
+ finally:
118
+ self._file_handle = None
119
+ f.close()
120
+
121
+ def _read_lines(self, f):
122
+ """Read and yield all available lines from a file handle.
123
+
124
+ Uses readline() instead of iteration to keep tell() available.
125
+ """
126
+ while True:
127
+ line = f.readline()
128
+ if not line:
129
+ break
130
+ self._seq += 1
131
+ event = wrap(line, seq=self._seq, source=self._source)
132
+ if event is not None:
133
+ yield event
134
+
135
+ def _catch_up_glob(self, files: list[str]) -> None:
136
+ """Read all unread events from glob-matched files, updating offset.
137
+
138
+ Shared between batch glob and glob+follow modes. During follow mode,
139
+ also populates _file_positions for subsequent tailing.
140
+ """
141
+ assert isinstance(self._offset, GlobOffset)
142
+
143
+ if not files:
144
+ if self._offset.file_index != 0 or self._offset.byte_offset != 0:
145
+ logger.error(
146
+ "Glob matched no files but offset is non-zero "
147
+ "(file_index=%d, byte_offset=%d). "
148
+ "Resetting offset (topic=%s, group=%s).",
149
+ self._offset.file_index,
150
+ self._offset.byte_offset,
151
+ self._topic,
152
+ self._group,
153
+ )
154
+ self._offset = GlobOffset(file_index=0, byte_offset=0)
155
+ return
156
+
157
+ start_file_index = self._offset.file_index
158
+ start_byte_offset = self._offset.byte_offset
159
+
160
+ if start_file_index >= len(files):
161
+ logger.error(
162
+ "Saved file_index %d is out of bounds (only %d files matched). "
163
+ "Files may have been added or removed between sessions. "
164
+ "Resetting to start of all files (topic=%s, group=%s).",
165
+ start_file_index,
166
+ len(files),
167
+ self._topic,
168
+ self._group,
169
+ )
170
+ start_file_index = 0
171
+ start_byte_offset = 0
172
+ self._offset = GlobOffset(file_index=0, byte_offset=0)
173
+
174
+ for i, filepath in enumerate(files):
175
+ if i < start_file_index:
176
+ # Still record position for follow mode
177
+ if self._follow:
178
+ try:
179
+ self._file_positions[filepath] = Path(filepath).stat().st_size
180
+ except OSError as e:
181
+ logger.warning(
182
+ "Cannot stat skipped file %s (topic=%s, group=%s): %s",
183
+ filepath,
184
+ self._topic,
185
+ self._group,
186
+ e,
187
+ )
188
+ continue
189
+
190
+ try:
191
+ f = open(filepath) # noqa: SIM115
192
+ except OSError as e:
193
+ logger.warning(
194
+ "Cannot open file %s during catch-up (topic=%s, group=%s): %s",
195
+ filepath,
196
+ self._topic,
197
+ self._group,
198
+ e,
199
+ )
200
+ # Advance offset past this file
201
+ if i == len(files) - 1:
202
+ self._offset = GlobOffset(file_index=i, byte_offset=0)
203
+ else:
204
+ self._offset = GlobOffset(file_index=i + 1, byte_offset=0)
205
+ continue
206
+
207
+ try:
208
+ if i == start_file_index:
209
+ f.seek(start_byte_offset)
210
+
211
+ yield from self._read_lines(f)
212
+
213
+ end_pos = f.tell()
214
+ if self._follow:
215
+ self._file_positions[filepath] = end_pos
216
+
217
+ # After reading this file, update offset to next file
218
+ if i == len(files) - 1:
219
+ self._offset = GlobOffset(file_index=i, byte_offset=end_pos)
220
+ else:
221
+ self._offset = GlobOffset(file_index=i + 1, byte_offset=0)
222
+ finally:
223
+ f.close()
224
+
225
+ def _iterate_glob(self):
226
+ """Read events across multiple files matched by glob pattern."""
227
+ files = sorted(glob_module.glob(self._path))
228
+ if not files:
229
+ logger.warning(
230
+ "Glob pattern matched no files: %s (topic=%s, group=%s)",
231
+ self._path,
232
+ self._topic,
233
+ self._group,
234
+ )
235
+ yield from self._catch_up_glob(files)
236
+ self._save_offset()
237
+
238
+ def _iterate_glob_follow(self):
239
+ """Catch up on existing glob files, then tail for changes and new files."""
240
+ import queue
241
+
242
+ from watchdog.events import FileSystemEventHandler
243
+ from watchdog.observers import Observer
244
+
245
+ assert isinstance(self._offset, GlobOffset)
246
+
247
+ # Phase 1: catch-up on existing files
248
+ files = sorted(glob_module.glob(self._path))
249
+ yield from self._catch_up_glob(files)
250
+ self._save_offset()
251
+
252
+ # Phase 2: tail using watchdog on the parent directory
253
+ glob_pattern = self._path
254
+ watch_dir = str(Path(self._path).parent)
255
+ event_queue = queue.Queue()
256
+
257
+ class GlobHandler(FileSystemEventHandler):
258
+ def on_modified(self, event):
259
+ if not event.is_directory and fnmatch.fnmatch(event.src_path, glob_pattern):
260
+ event_queue.put(("modified", event.src_path))
261
+
262
+ def on_created(self, event):
263
+ if not event.is_directory and fnmatch.fnmatch(event.src_path, glob_pattern):
264
+ event_queue.put(("created", event.src_path))
265
+
266
+ observer = Observer()
267
+ observer.schedule(GlobHandler(), watch_dir, recursive=False)
268
+ observer.start()
269
+ self._observer = observer
270
+
271
+ try:
272
+ while not self._closed:
273
+ try:
274
+ action, filepath = event_queue.get(timeout=0.5)
275
+ except queue.Empty:
276
+ continue
277
+
278
+ # Drain the queue to batch process notifications
279
+ pending = [(action, filepath)]
280
+ while not event_queue.empty():
281
+ try:
282
+ pending.append(event_queue.get_nowait())
283
+ except queue.Empty:
284
+ break
285
+
286
+ for _action, filepath in pending:
287
+ known_pos = self._file_positions.get(filepath, 0)
288
+
289
+ try:
290
+ with open(filepath) as f:
291
+ f.seek(known_pos)
292
+ yield from self._read_lines(f)
293
+ self._file_positions[filepath] = f.tell()
294
+ except OSError as e:
295
+ logger.warning(
296
+ "Skipping file %s during glob+follow (topic=%s, group=%s): %s",
297
+ filepath,
298
+ self._topic,
299
+ self._group,
300
+ e,
301
+ )
302
+ continue
303
+
304
+ # Update GlobOffset: find this file's index in the sorted list
305
+ all_files = sorted(self._file_positions.keys())
306
+ file_idx = all_files.index(filepath)
307
+ self._offset = GlobOffset(
308
+ file_index=file_idx,
309
+ byte_offset=self._file_positions[filepath],
310
+ )
311
+
312
+ self._save_offset()
313
+ finally:
314
+ self._save_offset()
315
+ self._stop_observer(observer)
316
+
317
+ def _iterate_follow(self, f, path):
318
+ """Tail a file using watchdog for filesystem events."""
319
+ import queue
320
+
321
+ from watchdog.events import FileSystemEventHandler
322
+ from watchdog.observers import Observer
323
+
324
+ event_queue = queue.Queue()
325
+
326
+ class Handler(FileSystemEventHandler):
327
+ def on_modified(self, event):
328
+ if Path(event.src_path).resolve() == path.resolve():
329
+ event_queue.put(True)
330
+
331
+ observer = Observer()
332
+ observer.schedule(Handler(), str(path.parent), recursive=False)
333
+ observer.start()
334
+ self._observer = observer
335
+
336
+ try:
337
+ # First, read any existing lines
338
+ yield from self._read_lines(f)
339
+
340
+ # Then tail for new lines
341
+ while not self._closed:
342
+ try:
343
+ event_queue.get(timeout=0.5)
344
+ except queue.Empty:
345
+ continue
346
+
347
+ # Drain the queue (multiple notifications may have arrived)
348
+ while not event_queue.empty():
349
+ try:
350
+ event_queue.get_nowait()
351
+ except queue.Empty:
352
+ break
353
+
354
+ yield from self._read_lines(f)
355
+ finally:
356
+ self._stop_observer(observer)
357
+
358
+ def close(self) -> None:
359
+ """Stop the consumer and save the current offset."""
360
+ self._closed = True
361
+
362
+ try:
363
+ # Save offset from current file position if still open
364
+ if self._file_handle is not None and not self._file_handle.closed:
365
+ if isinstance(self._offset, GlobOffset):
366
+ self._offset = GlobOffset(
367
+ file_index=self._offset.file_index,
368
+ byte_offset=self._file_handle.tell(),
369
+ )
370
+ else:
371
+ self._offset = SingleFileOffset(byte_offset=self._file_handle.tell())
372
+ self._save_offset()
373
+ finally:
374
+ if self._observer is not None:
375
+ self._stop_observer(self._observer)
376
+
377
+ def __enter__(self):
378
+ return self
379
+
380
+ def __exit__(self, exc_type, exc_val, exc_tb):
381
+ self.close()
382
+ return False
@@ -0,0 +1,2 @@
1
+ # ABOUTME: Package init for brooklet contrib modules
2
+ # ABOUTME: Houses optional consumer modules built on top of brooklet core