claude-dev-env 1.30.0 → 1.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CLAUDE.md +8 -0
  2. package/agents/clean-coder.md +275 -111
  3. package/agents/code-quality-agent.md +196 -209
  4. package/bin/install.mjs +81 -0
  5. package/bin/install.test.mjs +158 -0
  6. package/bin/install_mypy_ini.mjs +51 -0
  7. package/bin/install_mypy_ini.test.mjs +121 -0
  8. package/commands/hook-log-extract.md +70 -0
  9. package/commands/hook-log-init.md +76 -0
  10. package/docs/CODE_RULES.md +40 -0
  11. package/hooks/blocking/code_rules_enforcer.py +5 -3
  12. package/hooks/blocking/destructive_command_blocker.py +187 -0
  13. package/hooks/blocking/question_to_user_enforcer.py +140 -0
  14. package/hooks/blocking/test_code_rules_enforcer_file_global_constants.py +39 -0
  15. package/hooks/blocking/test_destructive_command_blocker.py +397 -0
  16. package/hooks/blocking/test_question_to_user_enforcer.py +163 -0
  17. package/hooks/config/hook_log_extractor_constants.py +221 -0
  18. package/hooks/config/messages.py +3 -0
  19. package/hooks/config/test_hook_log_extractor_constants.py +96 -0
  20. package/hooks/config/test_messages.py +5 -0
  21. package/hooks/diagnostic/hook_log_extractor.py +907 -0
  22. package/hooks/diagnostic/hook_log_init.py +202 -0
  23. package/hooks/diagnostic/hook_log_stop_wrapper.py +84 -0
  24. package/hooks/diagnostic/migrations/2026-04-25-drop-themes-hook-events.sql +3 -0
  25. package/hooks/diagnostic/migrations/README.md +77 -0
  26. package/hooks/diagnostic/queries/block_details_for_hook.sql +26 -0
  27. package/hooks/diagnostic/queries/blocks_by_category.sql +10 -0
  28. package/hooks/diagnostic/queries/blocks_by_tool.sql +9 -0
  29. package/hooks/diagnostic/queries/blocks_last_7_days.sql +11 -0
  30. package/hooks/diagnostic/queries/top_blockers_last_24_hours.sql +12 -0
  31. package/hooks/diagnostic/queries/top_blockers_overall.sql +12 -0
  32. package/hooks/diagnostic/requirements-hook-logs-dev.txt +2 -0
  33. package/hooks/diagnostic/requirements-hook-logs.txt +1 -0
  34. package/hooks/diagnostic/schema.sql +51 -0
  35. package/hooks/diagnostic/test_hook_log_extractor.py +1531 -0
  36. package/hooks/diagnostic/test_hook_log_init.py +227 -0
  37. package/hooks/diagnostic/test_hook_log_stop_wrapper.py +98 -0
  38. package/hooks/hooks.json +10 -0
  39. package/package.json +1 -1
  40. package/rules/ask-user-question-required.md +44 -0
  41. package/scripts/config/test_spec_implementer_prompt.py +0 -4
  42. package/scripts/test_groq_bugteam_spec.py +0 -8
@@ -0,0 +1,907 @@
1
+ #!/usr/bin/env python3
2
+ """Extract hook-firing records from per-session JSONL transcripts into Neon.
3
+
4
+ Reads JSONL transcripts at ``PROJECTS_TRANSCRIPT_ROOT`` and ingests only
5
+ ``attachment`` records whose inner ``attachment.type`` is one of the
6
+ five variants enumerated in ``OUTCOME_BY_ATTACHMENT_TYPE``
7
+ (``hook_success``, ``hook_blocking_error``, ``hook_non_blocking_error``,
8
+ ``hook_system_message``, ``hook_additional_context``). Unknown
9
+ ``hook_``-prefixed variants are skipped until
10
+ ``OUTCOME_BY_ATTACHMENT_TYPE`` is extended to cover them. Each ingested
11
+ record becomes one row in the ``hook_events`` table. Idempotence is
12
+ enforced at the database layer via a UNIQUE constraint on
13
+ ``(source_jsonl_path, source_line_number)`` combined with
14
+ ``ON CONFLICT DO NOTHING``. Per-file byte offsets in
15
+ ``OFFSET_STATE_FILE`` skip re-reading unchanged bytes.
16
+
17
+ Offline graceful behavior: ``psycopg.OperationalError`` or any
18
+ connect-time failure appends one ISO-8601 line to
19
+ ``OFFLINE_WARNING_LOG`` and exits 0, so the Stop hook never blocks
20
+ session end on a missing network.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import contextlib
26
+ import datetime
27
+ import errno
28
+ import glob
29
+ import io
30
+ import json
31
+ import os
32
+ import re
33
+ import sys
34
+ import tempfile
35
+ import time
36
+ from pathlib import Path
37
+ from typing import IO, Iterator, Optional, Sequence
38
+
39
+ if os.name == "nt":
40
+ try:
41
+ import msvcrt
42
+ except ImportError:
43
+ msvcrt = None
44
+ fcntl = None
45
+ else:
46
+ try:
47
+ import fcntl
48
+ except ImportError:
49
+ fcntl = None
50
+ msvcrt = None
51
+
52
+ if str(Path(__file__).resolve().parent.parent) not in sys.path:
53
+ sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
54
+
55
+ try:
56
+ import psycopg
57
+ except ImportError:
58
+ psycopg = None
59
+
60
+ from config.hook_log_extractor_constants import (
61
+ ATTACHMENT_TYPE_HOOK_ADDITIONAL_CONTEXT,
62
+ ATTACHMENT_TYPE_HOOK_BLOCKING_ERROR,
63
+ ATTACHMENT_TYPE_HOOK_SUCCESS,
64
+ ATTACHMENT_TYPE_HOOK_SYSTEM_MESSAGE,
65
+ ATTACHMENT_TYPE_PREFIX,
66
+ BYTE_OFFSET_KEY,
67
+ CATEGORY_PATH_MINIMUM_PARTS,
68
+ COMMAND_EXCERPT_MAX_CHARACTERS,
69
+ CONNECT_TIMEOUT_SECONDS,
70
+ DEFAULT_QUERY_FOR_SUMMARY,
71
+ EMPTY_STRING,
72
+ EXIT_CODE_EXTRACTOR_ENVIRONMENT_MISSING,
73
+ EXIT_CODE_SUCCESS,
74
+ EXIT_CODE_UNKNOWN_QUERY,
75
+ FLAG_FULL_REBUILD,
76
+ FLAG_INCREMENTAL,
77
+ FLAG_QUERY,
78
+ FLAG_SUMMARY,
79
+ HOOK_CATEGORY_UNCATEGORIZED,
80
+ HOOK_EVENTS_INSERT_SQL,
81
+ HOOK_EVENTS_TRUNCATE_SQL,
82
+ HOOK_NAME_TOOL_SEPARATOR,
83
+ HOOKS_DIRECTORY_TOKEN,
84
+ INSERT_BATCH_SIZE,
85
+ INVALID_QUERY_NAME_MESSAGE_PREFIX,
86
+ JSONL_FILE_GLOB,
87
+ KNOWN_HOOK_CATEGORIES,
88
+ LEGACY_OFFSETS_FORMAT_WARNING_LABEL,
89
+ LINE_NUMBER_KEY,
90
+ LOCK_MAXIMUM_RETRY_COUNT,
91
+ LOCK_RETRY_SLEEP_SECONDS,
92
+ MISSING_NEON_DATABASE_URL_WARNING_LABEL,
93
+ MISSING_PSYCOPG_WARNING_LABEL,
94
+ NEON_DATABASE_URL_ENVIRONMENT_VARIABLE,
95
+ NEWLINE_JOINER,
96
+ OFFLINE_WARNING_LOG,
97
+ OFFSET_STATE_FILE,
98
+ OFFSETS_JSON_INDENT,
99
+ OUTCOME_BY_ATTACHMENT_TYPE,
100
+ PROJECTS_TRANSCRIPT_ROOT,
101
+ QUERIES_DIRECTORY_NAME,
102
+ QUERY_NAME_PATTERN,
103
+ QUERY_NO_ROWS_RETURNED_MESSAGE,
104
+ SCRIPT_PATH_PYTHON_PREFIXES,
105
+ SQL_FILE_EXTENSION,
106
+ STDERR_EXCERPT_MAX_CHARACTERS,
107
+ STDOUT_EXCERPT_MAX_CHARACTERS,
108
+ SUMMARY_COLUMN_HEADINGS,
109
+ SUMMARY_NO_NEW_BLOCKS_MESSAGE,
110
+ SUMMARY_TABLE_COLUMN_GAP,
111
+ TOP_BLOCKED_COMMAND_PREVIEW_MAX_CHARACTERS,
112
+ TOP_BLOCKERS_LAST_24_HOURS_SQL,
113
+ TOP_LEVEL_ATTACHMENT_TYPE,
114
+ UNKNOWN_QUERY_MESSAGE_PREFIX,
115
+ )
116
+
117
+
118
+ class MissingNeonDatabaseUrlError(RuntimeError):
119
+ """Raised when the Neon connection URL environment variable is unset."""
120
+
121
+
122
+ class MissingPsycopgDependencyError(RuntimeError):
123
+ """Raised when the psycopg driver is not installed in the interpreter."""
124
+
125
+
126
+ def derive_category(script_path: Optional[str]) -> str:
127
+ """Return the category parent-directory name for a script path."""
128
+ if not script_path:
129
+ return HOOK_CATEGORY_UNCATEGORIZED
130
+ normalized_path = script_path.replace("\\", "/")
131
+ for each_prefix in SCRIPT_PATH_PYTHON_PREFIXES:
132
+ if normalized_path.startswith(each_prefix):
133
+ normalized_path = normalized_path[len(each_prefix) :]
134
+ break
135
+ hooks_directory_token_index = normalized_path.rfind(HOOKS_DIRECTORY_TOKEN)
136
+ if hooks_directory_token_index == -1:
137
+ return HOOK_CATEGORY_UNCATEGORIZED
138
+ remainder_after_hooks_segment = normalized_path[
139
+ hooks_directory_token_index + len(HOOKS_DIRECTORY_TOKEN) :
140
+ ]
141
+ all_remainder_parts = remainder_after_hooks_segment.split("/")
142
+ if len(all_remainder_parts) < CATEGORY_PATH_MINIMUM_PARTS:
143
+ return HOOK_CATEGORY_UNCATEGORIZED
144
+ candidate_category = all_remainder_parts[0]
145
+ if candidate_category in KNOWN_HOOK_CATEGORIES:
146
+ return candidate_category
147
+ return HOOK_CATEGORY_UNCATEGORIZED
148
+
149
+
150
+ def derive_outcome(attachment_type: str) -> str:
151
+ """Map an attachment.type value to its outcome label."""
152
+ return OUTCOME_BY_ATTACHMENT_TYPE[attachment_type]
153
+
154
+
155
+ def extract_script_path(attachment: dict[str, object]) -> Optional[str]:
156
+ """Return the script path embedded in a hook attachment, if any."""
157
+ attachment_type = attachment.get("type", EMPTY_STRING)
158
+ if attachment_type == ATTACHMENT_TYPE_HOOK_SUCCESS:
159
+ return _strip_python_prefix(attachment.get("command"))
160
+ if attachment_type == ATTACHMENT_TYPE_HOOK_BLOCKING_ERROR:
161
+ blocking_error_block = attachment.get("blockingError") or {}
162
+ if isinstance(blocking_error_block, dict):
163
+ return _strip_python_prefix(blocking_error_block.get("command"))
164
+ return None
165
+
166
+
167
+ def _strip_python_prefix(command_string: Optional[str]) -> Optional[str]:
168
+ if not command_string:
169
+ return None
170
+ for each_prefix in SCRIPT_PATH_PYTHON_PREFIXES:
171
+ if command_string.startswith(each_prefix):
172
+ return command_string[len(each_prefix) :]
173
+ return command_string
174
+
175
+
176
+ def extract_tool_name(hook_name: Optional[str]) -> Optional[str]:
177
+ """Return the tool name after the colon in a hook name, if present."""
178
+ if not hook_name:
179
+ return None
180
+ if HOOK_NAME_TOOL_SEPARATOR not in hook_name:
181
+ return None
182
+ return hook_name.split(HOOK_NAME_TOOL_SEPARATOR, 1)[1]
183
+
184
+
185
+ def truncate_command_excerpt(command_text: Optional[str]) -> Optional[str]:
186
+ """Truncate a command string to the configured excerpt budget."""
187
+ return _truncate_to_length(command_text, COMMAND_EXCERPT_MAX_CHARACTERS)
188
+
189
+
190
+ def truncate_stdout_excerpt(stdout_text: Optional[str]) -> Optional[str]:
191
+ """Truncate a stdout string to the configured excerpt budget."""
192
+ return _truncate_to_length(stdout_text, STDOUT_EXCERPT_MAX_CHARACTERS)
193
+
194
+
195
+ def truncate_stderr_excerpt(stderr_text: Optional[str]) -> Optional[str]:
196
+ """Truncate a stderr string to the configured excerpt budget."""
197
+ return _truncate_to_length(stderr_text, STDERR_EXCERPT_MAX_CHARACTERS)
198
+
199
+
200
+ def _truncate_to_length(
201
+ text_or_none: Optional[str], maximum_length: int
202
+ ) -> Optional[str]:
203
+ if text_or_none is None:
204
+ return None
205
+ if len(text_or_none) <= maximum_length:
206
+ return text_or_none
207
+ return text_or_none[:maximum_length]
208
+
209
+
210
+ def _normalize_content_to_text(content_or_none: object) -> Optional[str]:
211
+ if content_or_none is None:
212
+ return None
213
+ if isinstance(content_or_none, str):
214
+ return content_or_none
215
+ if isinstance(content_or_none, list):
216
+ all_string_items = [
217
+ each_entry for each_entry in content_or_none if isinstance(each_entry, str)
218
+ ]
219
+ return NEWLINE_JOINER.join(all_string_items) if all_string_items else None
220
+ return str(content_or_none)
221
+
222
+
223
+ def build_row_from_attachment(
224
+ parsed_record: dict[str, object],
225
+ source_jsonl_path: str,
226
+ source_line_number: int,
227
+ ) -> dict[str, object]:
228
+ """Build a hook_events row dict from one parsed JSONL record."""
229
+ attachment_block = parsed_record.get("attachment") or {}
230
+ attachment_type = attachment_block.get("type", EMPTY_STRING)
231
+ outcome_label = derive_outcome(attachment_type)
232
+ script_path_or_none = extract_script_path(attachment_block)
233
+ hook_category_label = derive_category(script_path_or_none)
234
+ hook_name_string = attachment_block.get("hookName")
235
+ hook_event_string = attachment_block.get("hookEvent", EMPTY_STRING)
236
+ tool_use_id_or_none = attachment_block.get("toolUseID")
237
+ tool_name_or_none = extract_tool_name(hook_name_string)
238
+
239
+ command_text_or_none = attachment_block.get("command")
240
+ stdout_text_or_none: Optional[str] = attachment_block.get("stdout")
241
+ stderr_text_or_none: Optional[str] = attachment_block.get("stderr")
242
+ exit_code_or_none = attachment_block.get("exitCode")
243
+ duration_milliseconds_or_none = attachment_block.get("durationMs")
244
+
245
+ if attachment_type == ATTACHMENT_TYPE_HOOK_BLOCKING_ERROR:
246
+ blocking_error_block = attachment_block.get("blockingError") or {}
247
+ if isinstance(blocking_error_block, dict):
248
+ command_text_or_none = blocking_error_block.get("command")
249
+ blocking_error_message = blocking_error_block.get("blockingError")
250
+ if blocking_error_message:
251
+ stderr_text_or_none = blocking_error_message
252
+ elif attachment_type == ATTACHMENT_TYPE_HOOK_SYSTEM_MESSAGE:
253
+ stdout_text_or_none = _normalize_content_to_text(
254
+ attachment_block.get("content")
255
+ )
256
+ elif attachment_type == ATTACHMENT_TYPE_HOOK_ADDITIONAL_CONTEXT:
257
+ stdout_text_or_none = _normalize_content_to_text(
258
+ attachment_block.get("content")
259
+ )
260
+
261
+ return {
262
+ "event_timestamp": parsed_record.get("timestamp"),
263
+ "session_id": parsed_record.get("sessionId", EMPTY_STRING),
264
+ "cwd": parsed_record.get("cwd"),
265
+ "git_branch": parsed_record.get("gitBranch"),
266
+ "hook_event": hook_event_string,
267
+ "hook_name": hook_name_string or EMPTY_STRING,
268
+ "hook_category": hook_category_label,
269
+ "script_path": script_path_or_none,
270
+ "tool_name": tool_name_or_none,
271
+ "tool_use_id": tool_use_id_or_none,
272
+ "outcome": outcome_label,
273
+ "exit_code": exit_code_or_none,
274
+ "duration_ms": duration_milliseconds_or_none,
275
+ "command_excerpt": truncate_command_excerpt(command_text_or_none),
276
+ "stdout_excerpt": truncate_stdout_excerpt(stdout_text_or_none),
277
+ "stderr_excerpt": truncate_stderr_excerpt(stderr_text_or_none),
278
+ "source_jsonl_path": source_jsonl_path,
279
+ "source_line_number": source_line_number,
280
+ }
281
+
282
+
283
+ class AttachmentRecordIterator:
284
+ """Iterates hook attachment records and tracks bytes actually consumed.
285
+
286
+ ``final_line_number`` reflects the number of lines read from the file
287
+ (including malformed and non-attachment lines), not just the line
288
+ number of the last yielded record. ``final_byte_offset`` reflects the
289
+ byte position after the last successfully-read line (or
290
+ ``start_offset`` when the file did not exist). ``drained`` is True
291
+ once iteration reached EOF. Callers persist ``final_byte_offset``
292
+ and ``final_line_number`` whenever ``drained`` is True so resumption
293
+ starts from the exact position after the last bytes the iterator
294
+ consumed.
295
+ """
296
+
297
+ def __init__(
298
+ self,
299
+ jsonl_file_path: str,
300
+ start_offset: int,
301
+ start_line_number: int = 0,
302
+ ) -> None:
303
+ self._jsonl_file_path = jsonl_file_path
304
+ self._start_offset = start_offset
305
+ self._start_line_number = start_line_number
306
+ self.final_line_number = start_line_number
307
+ self.final_byte_offset = start_offset
308
+ self.drained = False
309
+
310
+ def __iter__(self) -> Iterator[tuple[dict[str, object], int, int]]:
311
+ try:
312
+ jsonl_file_handle = io.open(self._jsonl_file_path, "rb")
313
+ except (FileNotFoundError, OSError):
314
+ self.final_line_number = self._start_line_number
315
+ self.final_byte_offset = self._start_offset
316
+ self.drained = True
317
+ return
318
+ with jsonl_file_handle:
319
+ if self._start_offset > 0:
320
+ jsonl_file_handle.seek(self._start_offset)
321
+ current_line_number = self._start_line_number
322
+ current_byte_offset = jsonl_file_handle.tell()
323
+ self.final_byte_offset = current_byte_offset
324
+ while True:
325
+ raw_bytes = jsonl_file_handle.readline()
326
+ if not raw_bytes:
327
+ self.final_line_number = current_line_number
328
+ self.final_byte_offset = current_byte_offset
329
+ self.drained = True
330
+ return
331
+ current_line_number += 1
332
+ current_byte_offset += len(raw_bytes)
333
+ self.final_line_number = current_line_number
334
+ self.final_byte_offset = current_byte_offset
335
+ try:
336
+ parsed_record = json.loads(raw_bytes.decode("utf-8"))
337
+ except (UnicodeDecodeError, json.JSONDecodeError):
338
+ continue
339
+ if not isinstance(parsed_record, dict):
340
+ continue
341
+ if parsed_record.get("type") != TOP_LEVEL_ATTACHMENT_TYPE:
342
+ continue
343
+ attachment_block = parsed_record.get("attachment") or {}
344
+ if not isinstance(attachment_block, dict):
345
+ continue
346
+ attachment_type = attachment_block.get("type", EMPTY_STRING)
347
+ if not isinstance(attachment_type, str):
348
+ continue
349
+ if not attachment_type.startswith(ATTACHMENT_TYPE_PREFIX):
350
+ continue
351
+ if attachment_type not in OUTCOME_BY_ATTACHMENT_TYPE:
352
+ continue
353
+ yield parsed_record, current_line_number, current_byte_offset
354
+
355
+
356
+ def iter_attachment_records_from_file(
357
+ jsonl_file_path: str,
358
+ start_offset: int,
359
+ start_line_number: int = 0,
360
+ ) -> AttachmentRecordIterator:
361
+ """Return an iterator over hook attachment records in a JSONL file.
362
+
363
+ The returned object supports iteration and exposes
364
+ ``final_line_number`` after iteration completes. ``final_line_number``
365
+ is the total number of lines consumed (malformed and non-attachment
366
+ lines included), which differs from the line number of the last
367
+ yielded record when non-attachment lines trail the last attachment.
368
+ """
369
+ return AttachmentRecordIterator(
370
+ jsonl_file_path=jsonl_file_path,
371
+ start_offset=start_offset,
372
+ start_line_number=start_line_number,
373
+ )
374
+
375
+
376
+ def load_offsets(state_file_path: str) -> dict[str, dict[str, int]]:
377
+ """Load per-file ``{byte_offset, line_number}`` entries from disk.
378
+
379
+ Returns an empty dict when the state file is missing or contains
380
+ malformed JSON. Legacy bare-integer entries trigger a single
381
+ offline-warning line and are treated as invalid so the caller
382
+ re-extracts from the start of each file.
383
+ """
384
+ if not os.path.exists(state_file_path):
385
+ return {}
386
+ try:
387
+ with io.open(state_file_path, "r", encoding="utf-8") as state_file_handle:
388
+ loaded_content = json.load(state_file_handle)
389
+ except json.JSONDecodeError:
390
+ return {}
391
+ if not isinstance(loaded_content, dict):
392
+ return {}
393
+ migrated_offsets: dict[str, dict[str, int]] = {}
394
+ has_legacy_entries = False
395
+ for each_path, each_entry in loaded_content.items():
396
+ path_string = str(each_path)
397
+ if isinstance(each_entry, dict):
398
+ byte_offset_value = each_entry.get(BYTE_OFFSET_KEY)
399
+ line_number_value = each_entry.get(LINE_NUMBER_KEY)
400
+ if isinstance(byte_offset_value, int) and isinstance(
401
+ line_number_value, int
402
+ ):
403
+ migrated_offsets[path_string] = {
404
+ BYTE_OFFSET_KEY: byte_offset_value,
405
+ LINE_NUMBER_KEY: line_number_value,
406
+ }
407
+ continue
408
+ has_legacy_entries = True
409
+ if has_legacy_entries:
410
+ _append_legacy_offsets_warning_line()
411
+ return migrated_offsets
412
+
413
+
414
+ def save_offsets(
415
+ state_file_path: str,
416
+ offset_by_jsonl_path: dict[str, dict[str, int]],
417
+ ) -> None:
418
+ """Persist per-file offset entries atomically via tempfile + os.replace."""
419
+ state_file_parent = os.path.dirname(state_file_path)
420
+ if state_file_parent:
421
+ os.makedirs(state_file_parent, exist_ok=True)
422
+ temporary_file_handle = tempfile.NamedTemporaryFile(
423
+ mode="w",
424
+ encoding="utf-8",
425
+ dir=state_file_parent or None,
426
+ delete=False,
427
+ )
428
+ temporary_file_path = temporary_file_handle.name
429
+ try:
430
+ try:
431
+ json.dump(
432
+ offset_by_jsonl_path,
433
+ temporary_file_handle,
434
+ indent=OFFSETS_JSON_INDENT,
435
+ sort_keys=True,
436
+ )
437
+ temporary_file_handle.flush()
438
+ os.fsync(temporary_file_handle.fileno())
439
+ finally:
440
+ temporary_file_handle.close()
441
+ os.replace(temporary_file_path, state_file_path)
442
+ except Exception:
443
+ try:
444
+ os.unlink(temporary_file_path)
445
+ except OSError:
446
+ pass
447
+ raise
448
+
449
+
450
+ @contextlib.contextmanager
451
+ def _acquire_offsets_lock(state_file_path: str) -> Iterator[None]:
452
+ """Hold a cross-platform advisory lock around an offsets read-modify-write.
453
+
454
+ Serializes concurrent extractor runs so two Claude Code sessions
455
+ closing at once cannot clobber each other's offset updates. Uses
456
+ ``msvcrt.locking`` on Windows and ``fcntl.flock`` on POSIX; falls
457
+ back to no locking on platforms where neither module is available.
458
+
459
+ The sidecar path ``state_file_path + ".lock"`` is intentional,
460
+ permanent infrastructure. It is a byte-range-lockable companion to
461
+ the offsets file and is deliberately never unlinked. Attempting to
462
+ unlink it after release would open a TOCTOU window on Windows,
463
+ where another process may still hold it open. The stable sidecar
464
+ is the safer choice; reused on every run, its presence in the
465
+ state directory is expected and carries no other meaning.
466
+ """
467
+ lock_file_path = state_file_path + ".lock"
468
+ lock_parent_directory = os.path.dirname(lock_file_path)
469
+ if lock_parent_directory:
470
+ os.makedirs(lock_parent_directory, exist_ok=True)
471
+ lock_file_handle = io.open(lock_file_path, "a+", encoding="utf-8")
472
+ try:
473
+ _lock_file_handle_blocking(lock_file_handle)
474
+ try:
475
+ yield
476
+ finally:
477
+ _unlock_file_handle(lock_file_handle)
478
+ finally:
479
+ lock_file_handle.close()
480
+
481
+
482
+ def _lock_file_handle_blocking(lock_file_handle: IO[str]) -> None:
483
+ """Acquire an exclusive byte-range lock with a bounded retry budget.
484
+
485
+ Both the Windows (``msvcrt.locking``) and POSIX (``fcntl.flock``)
486
+ branches deliberately fail fast: Windows uses ``LK_NBLCK`` instead
487
+ of ``LK_LOCK`` so the kernel never blocks ~10s internally, and
488
+ POSIX pairs ``LOCK_EX`` with ``LOCK_NB`` so ``EWOULDBLOCK`` bubbles
489
+ up immediately. The Python ``time.sleep(LOCK_RETRY_SLEEP_SECONDS)``
490
+ between attempts is the sole pacing mechanism, keeping the total
491
+ retry budget within the intended ``LOCK_MAXIMUM_RETRY_COUNT *
492
+ LOCK_RETRY_SLEEP_SECONDS`` window so the caller never exceeds the
493
+ 30s Stop hook timeout under sustained contention.
494
+ """
495
+ if msvcrt is not None:
496
+ lock_byte_count = 1
497
+ for _each_attempt_index in range(LOCK_MAXIMUM_RETRY_COUNT):
498
+ try:
499
+ msvcrt.locking(
500
+ lock_file_handle.fileno(), msvcrt.LK_NBLCK, lock_byte_count
501
+ )
502
+ return
503
+ except OSError as lock_exception:
504
+ if lock_exception.errno != errno.EACCES:
505
+ raise
506
+ time.sleep(LOCK_RETRY_SLEEP_SECONDS)
507
+ raise OSError(
508
+ errno.EACCES,
509
+ "offsets lock retry budget exhausted",
510
+ )
511
+ if fcntl is not None:
512
+ for _each_attempt_index in range(LOCK_MAXIMUM_RETRY_COUNT):
513
+ try:
514
+ fcntl.flock(
515
+ lock_file_handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
516
+ )
517
+ return
518
+ except OSError as lock_exception:
519
+ if lock_exception.errno not in (errno.EAGAIN, errno.EWOULDBLOCK):
520
+ raise
521
+ time.sleep(LOCK_RETRY_SLEEP_SECONDS)
522
+ raise OSError(
523
+ errno.EWOULDBLOCK,
524
+ "offsets lock retry budget exhausted",
525
+ )
526
+
527
+
528
+ def _unlock_file_handle(lock_file_handle: IO[str]) -> None:
529
+ if msvcrt is not None:
530
+ lock_byte_count = 1
531
+ try:
532
+ msvcrt.locking(
533
+ lock_file_handle.fileno(), msvcrt.LK_UNLCK, lock_byte_count
534
+ )
535
+ except OSError:
536
+ return
537
+ return
538
+ if fcntl is not None:
539
+ fcntl.flock(lock_file_handle.fileno(), fcntl.LOCK_UN)
540
+
541
+
542
+ def is_operational_error(exception_instance: BaseException) -> bool:
543
+ """Return True when an exception should trigger the offline fallback."""
544
+ if isinstance(
545
+ exception_instance,
546
+ (MissingNeonDatabaseUrlError, MissingPsycopgDependencyError),
547
+ ):
548
+ return True
549
+ class_name = type(exception_instance).__name__
550
+ return class_name in {"OperationalError", "InterfaceError", "TimeoutError"}
551
+
552
+
553
+ def connect_to_neon() -> object:
554
+ """Open a psycopg connection using the Neon database URL env var.
555
+
556
+ Raises ``MissingNeonDatabaseUrlError`` when the URL env var is unset
557
+ and ``MissingPsycopgDependencyError`` when psycopg is not installed.
558
+ Both are treated as offline by ``is_operational_error`` so the Stop
559
+ hook never blocks session end on a missing environment.
560
+ """
561
+ if psycopg is None:
562
+ raise MissingPsycopgDependencyError(MISSING_PSYCOPG_WARNING_LABEL)
563
+ raw_database_url = os.environ.get(NEON_DATABASE_URL_ENVIRONMENT_VARIABLE)
564
+ database_url = raw_database_url.strip() if raw_database_url is not None else None
565
+ if not database_url:
566
+ raise MissingNeonDatabaseUrlError(MISSING_NEON_DATABASE_URL_WARNING_LABEL)
567
+ return psycopg.connect(database_url, connect_timeout=CONNECT_TIMEOUT_SECONDS)
568
+
569
+
570
+ def insert_rows_batch(
571
+ neon_connection: object,
572
+ all_rows: Sequence[dict[str, object]],
573
+ ) -> None:
574
+ """Insert a batch of hook_events rows with ON CONFLICT DO NOTHING."""
575
+ if not all_rows:
576
+ return
577
+ with neon_connection.cursor() as neon_cursor:
578
+ neon_cursor.executemany(HOOK_EVENTS_INSERT_SQL, list(all_rows))
579
+ neon_connection.commit()
580
+
581
+
582
+ def _append_offline_warning_line(exception_instance: BaseException) -> None:
583
+ """Append an offline-marker line to the warning log; swallow disk errors.
584
+
585
+ The Stop hook contract requires that the offline-graceful path
586
+ always exits with ``EXIT_CODE_EXTRACTOR_ENVIRONMENT_MISSING`` so
587
+ session shutdown never stalls on a failed extractor. A read-only
588
+ filesystem, missing parent path, or EACCES on the warning log file
589
+ itself must not propagate to the caller.
590
+ """
591
+ try:
592
+ warning_log_parent = os.path.dirname(OFFLINE_WARNING_LOG)
593
+ if warning_log_parent:
594
+ os.makedirs(warning_log_parent, exist_ok=True)
595
+ timestamp_iso = datetime.datetime.now(datetime.timezone.utc).isoformat()
596
+ exception_class_name = type(exception_instance).__name__
597
+ warning_line_text = f"{timestamp_iso}\toffline\t{exception_class_name}"
598
+ with io.open(OFFLINE_WARNING_LOG, "a", encoding="utf-8") as warning_log_handle:
599
+ warning_log_handle.write(warning_line_text + "\n")
600
+ except OSError:
601
+ return
602
+
603
+
604
+ def _append_legacy_offsets_warning_line() -> None:
605
+ try:
606
+ warning_log_parent = os.path.dirname(OFFLINE_WARNING_LOG)
607
+ if warning_log_parent:
608
+ os.makedirs(warning_log_parent, exist_ok=True)
609
+ timestamp_iso = datetime.datetime.now(datetime.timezone.utc).isoformat()
610
+ warning_line_text = (
611
+ f"{timestamp_iso}\tmigration\t{LEGACY_OFFSETS_FORMAT_WARNING_LABEL}"
612
+ )
613
+ with io.open(OFFLINE_WARNING_LOG, "a", encoding="utf-8") as warning_log_handle:
614
+ warning_log_handle.write(warning_line_text + "\n")
615
+ except OSError:
616
+ return
617
+
618
+
619
+ def run_full_extraction(
620
+ transcripts_root: str,
621
+ state_file_path: str,
622
+ full_rebuild: bool,
623
+ ) -> int:
624
+ """Execute one extraction pass (incremental by default).
625
+
626
+ The offsets lock is held only around the initial offsets load and
627
+ around each offsets write (where the latest on-disk offsets are
628
+ re-read and merged with this process's pending updates via a
629
+ per-file max). DB I/O and JSONL iteration run without the lock so
630
+ concurrent Stop hooks do not serialize on each other's slow work.
631
+
632
+ Returns process exit code (0 on success, 0 on offline fallback).
633
+ """
634
+ try:
635
+ neon_connection = connect_to_neon()
636
+ except Exception as connect_exception:
637
+ if is_operational_error(connect_exception):
638
+ _append_offline_warning_line(connect_exception)
639
+ return EXIT_CODE_EXTRACTOR_ENVIRONMENT_MISSING
640
+ raise
641
+
642
+ try:
643
+ starting_offset_by_jsonl_path = _load_starting_offsets(
644
+ neon_connection=neon_connection,
645
+ state_file_path=state_file_path,
646
+ full_rebuild=full_rebuild,
647
+ )
648
+
649
+ all_jsonl_file_paths = _discover_jsonl_files(transcripts_root)
650
+ for each_jsonl_file_path in all_jsonl_file_paths:
651
+ previous_entry = starting_offset_by_jsonl_path.get(each_jsonl_file_path)
652
+ start_offset = (
653
+ previous_entry[BYTE_OFFSET_KEY]
654
+ if previous_entry is not None
655
+ else 0
656
+ )
657
+ start_line_number = (
658
+ previous_entry[LINE_NUMBER_KEY]
659
+ if previous_entry is not None
660
+ else 0
661
+ )
662
+ batch_buffer: list[dict[str, object]] = []
663
+ attachment_iterator = iter_attachment_records_from_file(
664
+ each_jsonl_file_path,
665
+ start_offset=start_offset,
666
+ start_line_number=start_line_number,
667
+ )
668
+ for (
669
+ parsed_record,
670
+ line_number,
671
+ byte_offset_after,
672
+ ) in attachment_iterator:
673
+ built_row = build_row_from_attachment(
674
+ parsed_record=parsed_record,
675
+ source_jsonl_path=each_jsonl_file_path,
676
+ source_line_number=line_number,
677
+ )
678
+ batch_buffer.append(built_row)
679
+ if len(batch_buffer) >= INSERT_BATCH_SIZE:
680
+ insert_rows_batch(neon_connection, batch_buffer)
681
+ batch_buffer.clear()
682
+ _merge_and_save_offsets_under_lock(
683
+ state_file_path=state_file_path,
684
+ pending_updates={
685
+ each_jsonl_file_path: {
686
+ BYTE_OFFSET_KEY: byte_offset_after,
687
+ LINE_NUMBER_KEY: attachment_iterator.final_line_number,
688
+ },
689
+ },
690
+ )
691
+ if batch_buffer:
692
+ insert_rows_batch(neon_connection, batch_buffer)
693
+ if attachment_iterator.drained:
694
+ _merge_and_save_offsets_under_lock(
695
+ state_file_path=state_file_path,
696
+ pending_updates={
697
+ each_jsonl_file_path: {
698
+ BYTE_OFFSET_KEY: attachment_iterator.final_byte_offset,
699
+ LINE_NUMBER_KEY: attachment_iterator.final_line_number,
700
+ },
701
+ },
702
+ )
703
+ finally:
704
+ try:
705
+ neon_connection.close()
706
+ except Exception:
707
+ pass
708
+ return EXIT_CODE_SUCCESS
709
+
710
+
711
+ def _load_starting_offsets(
712
+ neon_connection: object,
713
+ state_file_path: str,
714
+ full_rebuild: bool,
715
+ ) -> dict[str, dict[str, int]]:
716
+ with _acquire_offsets_lock(state_file_path):
717
+ if full_rebuild:
718
+ with neon_connection.cursor() as neon_cursor:
719
+ neon_cursor.execute(HOOK_EVENTS_TRUNCATE_SQL)
720
+ neon_connection.commit()
721
+ save_offsets(state_file_path, {})
722
+ return {}
723
+ return load_offsets(state_file_path)
724
+
725
+
726
+ def _merge_and_save_offsets_under_lock(
727
+ state_file_path: str,
728
+ pending_updates: dict[str, dict[str, int]],
729
+ ) -> None:
730
+ with _acquire_offsets_lock(state_file_path):
731
+ latest_on_disk_offsets = load_offsets(state_file_path)
732
+ merged_offsets = _merge_offsets_taking_max(
733
+ latest_on_disk_offsets, pending_updates
734
+ )
735
+ save_offsets(state_file_path, merged_offsets)
736
+
737
+
738
+ def _merge_offsets_taking_max(
739
+ disk_offsets: dict[str, dict[str, int]],
740
+ pending_updates: dict[str, dict[str, int]],
741
+ ) -> dict[str, dict[str, int]]:
742
+ merged: dict[str, dict[str, int]] = dict(disk_offsets)
743
+ for each_path, each_pending_entry in pending_updates.items():
744
+ existing_entry = merged.get(each_path)
745
+ if existing_entry is None:
746
+ merged[each_path] = dict(each_pending_entry)
747
+ continue
748
+ merged[each_path] = {
749
+ BYTE_OFFSET_KEY: max(
750
+ existing_entry[BYTE_OFFSET_KEY],
751
+ each_pending_entry[BYTE_OFFSET_KEY],
752
+ ),
753
+ LINE_NUMBER_KEY: max(
754
+ existing_entry[LINE_NUMBER_KEY],
755
+ each_pending_entry[LINE_NUMBER_KEY],
756
+ ),
757
+ }
758
+ return merged
759
+
760
+
761
+ def _discover_jsonl_files(transcripts_root: str) -> list[str]:
762
+ recursive_glob_pattern = os.path.join(transcripts_root, "**", JSONL_FILE_GLOB)
763
+ top_level_glob_pattern = os.path.join(transcripts_root, JSONL_FILE_GLOB)
764
+ all_discovered_paths = set(glob.glob(recursive_glob_pattern, recursive=True))
765
+ all_discovered_paths.update(glob.glob(top_level_glob_pattern))
766
+ return sorted(all_discovered_paths)
767
+
768
+
769
+ def run_summary() -> int:
770
+ """Print the top-10 over-blockers summary and return exit code."""
771
+ try:
772
+ neon_connection = connect_to_neon()
773
+ except Exception as connect_exception:
774
+ if is_operational_error(connect_exception):
775
+ _append_offline_warning_line(connect_exception)
776
+ return EXIT_CODE_EXTRACTOR_ENVIRONMENT_MISSING
777
+ raise
778
+ try:
779
+ with neon_connection.cursor() as neon_cursor:
780
+ neon_cursor.execute(TOP_BLOCKERS_LAST_24_HOURS_SQL)
781
+ all_result_rows = neon_cursor.fetchall()
782
+ finally:
783
+ try:
784
+ neon_connection.close()
785
+ except Exception:
786
+ pass
787
+ if not all_result_rows:
788
+ print(SUMMARY_NO_NEW_BLOCKS_MESSAGE)
789
+ return EXIT_CODE_SUCCESS
790
+ _print_summary_table(all_result_rows)
791
+ return EXIT_CODE_SUCCESS
792
+
793
+
794
+ def _print_summary_table(all_result_rows: Sequence[tuple[object, ...]]) -> None:
795
+ all_preview_rows: list[tuple[str, str, str, str]] = []
796
+ for each_result_row in all_result_rows:
797
+ (
798
+ hook_name_string,
799
+ hook_category_string,
800
+ block_count_integer,
801
+ top_command_preview,
802
+ ) = each_result_row
803
+ truncated_preview = (top_command_preview or EMPTY_STRING)[
804
+ :TOP_BLOCKED_COMMAND_PREVIEW_MAX_CHARACTERS
805
+ ]
806
+ all_preview_rows.append(
807
+ (
808
+ str(hook_name_string),
809
+ str(hook_category_string),
810
+ str(block_count_integer),
811
+ truncated_preview,
812
+ ),
813
+ )
814
+ all_display_rows = [SUMMARY_COLUMN_HEADINGS, *all_preview_rows]
815
+ all_column_widths = [
816
+ max(len(each_row[each_column_index]) for each_row in all_display_rows)
817
+ for each_column_index in range(len(SUMMARY_COLUMN_HEADINGS))
818
+ ]
819
+ for each_display_row in all_display_rows:
820
+ formatted_columns = [
821
+ each_cell.ljust(all_column_widths[each_column_index])
822
+ for each_column_index, each_cell in enumerate(each_display_row)
823
+ ]
824
+ print(SUMMARY_TABLE_COLUMN_GAP.join(formatted_columns))
825
+
826
+
827
+ def run_query(named_query: str) -> int:
828
+ """Execute a pre-baked SQL file under ``queries/`` and print results."""
829
+ if not re.fullmatch(QUERY_NAME_PATTERN, named_query):
830
+ print(
831
+ f"{INVALID_QUERY_NAME_MESSAGE_PREFIX}{named_query}",
832
+ file=sys.stderr,
833
+ )
834
+ return EXIT_CODE_UNKNOWN_QUERY
835
+ queries_directory = Path(__file__).resolve().parent / QUERIES_DIRECTORY_NAME
836
+ query_file_path = queries_directory / f"{named_query}{SQL_FILE_EXTENSION}"
837
+ if not query_file_path.exists():
838
+ print(f"{UNKNOWN_QUERY_MESSAGE_PREFIX}{named_query}", file=sys.stderr)
839
+ return EXIT_CODE_UNKNOWN_QUERY
840
+ query_text = query_file_path.read_text(encoding="utf-8")
841
+ try:
842
+ neon_connection = connect_to_neon()
843
+ except Exception as connect_exception:
844
+ if is_operational_error(connect_exception):
845
+ _append_offline_warning_line(connect_exception)
846
+ return EXIT_CODE_EXTRACTOR_ENVIRONMENT_MISSING
847
+ raise
848
+ try:
849
+ with neon_connection.cursor() as neon_cursor:
850
+ neon_cursor.execute(query_text)
851
+ all_result_rows = neon_cursor.fetchall()
852
+ all_column_names = [
853
+ each_description[0]
854
+ for each_description in (neon_cursor.description or [])
855
+ ]
856
+ finally:
857
+ try:
858
+ neon_connection.close()
859
+ except Exception:
860
+ pass
861
+ if not all_result_rows:
862
+ print(QUERY_NO_ROWS_RETURNED_MESSAGE)
863
+ return EXIT_CODE_SUCCESS
864
+ print(SUMMARY_TABLE_COLUMN_GAP.join(all_column_names))
865
+ for each_result_row in all_result_rows:
866
+ print(
867
+ SUMMARY_TABLE_COLUMN_GAP.join(
868
+ str(each_cell) for each_cell in each_result_row
869
+ )
870
+ )
871
+ return EXIT_CODE_SUCCESS
872
+
873
+
874
+ def main() -> int:
875
+ """Entry point for the hook-log extractor CLI.
876
+
877
+ Supported flags:
878
+
879
+ * ``--summary`` prints the top blockers of the last twenty-four hours.
880
+ * ``--query <name>`` runs a pre-baked SQL file under ``queries/``.
881
+ * ``--full-rebuild`` truncates ``hook_events`` and re-reads every
882
+ JSONL from byte zero.
883
+ * ``--incremental`` is a documented no-op; it selects the default
884
+ byte-offset resumption path that the Stop hook also uses when no
885
+ flags are passed.
886
+ """
887
+ all_cli_arguments = list(sys.argv[1:])
888
+ if FLAG_SUMMARY in all_cli_arguments:
889
+ return run_summary()
890
+ if FLAG_QUERY in all_cli_arguments:
891
+ flag_index = all_cli_arguments.index(FLAG_QUERY)
892
+ if flag_index + 1 >= len(all_cli_arguments):
893
+ return run_query(DEFAULT_QUERY_FOR_SUMMARY)
894
+ return run_query(all_cli_arguments[flag_index + 1])
895
+ is_full_rebuild_requested = FLAG_FULL_REBUILD in all_cli_arguments
896
+ is_incremental_requested = FLAG_INCREMENTAL in all_cli_arguments
897
+ if is_incremental_requested and is_full_rebuild_requested:
898
+ is_full_rebuild_requested = False
899
+ return run_full_extraction(
900
+ transcripts_root=PROJECTS_TRANSCRIPT_ROOT,
901
+ state_file_path=OFFSET_STATE_FILE,
902
+ full_rebuild=is_full_rebuild_requested,
903
+ )
904
+
905
+
906
+ if __name__ == "__main__":
907
+ sys.exit(main())