claude-dev-env 1.30.1 → 1.32.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/clean-coder.md +275 -111
- package/agents/code-quality-agent.md +196 -209
- package/bin/install.mjs +81 -0
- package/bin/install.test.mjs +158 -0
- package/bin/install_mypy_ini.mjs +51 -0
- package/bin/install_mypy_ini.test.mjs +121 -0
- package/commands/hook-log-extract.md +70 -0
- package/commands/hook-log-init.md +76 -0
- package/hooks/blocking/code_rules_enforcer.py +5 -3
- package/hooks/blocking/destructive_command_blocker.py +187 -0
- package/hooks/blocking/question_to_user_enforcer.py +140 -0
- package/hooks/blocking/test_code_rules_enforcer_file_global_constants.py +39 -0
- package/hooks/blocking/test_destructive_command_blocker.py +397 -0
- package/hooks/blocking/test_question_to_user_enforcer.py +163 -0
- package/hooks/blocking/test_windows_rmtree_blocker.py +148 -0
- package/hooks/blocking/windows_rmtree_blocker.py +106 -0
- package/hooks/config/hook_log_extractor_constants.py +234 -0
- package/hooks/config/messages.py +3 -0
- package/hooks/config/session_env_cleanup_constants.py +18 -0
- package/hooks/config/test_hook_log_extractor_constants.py +123 -0
- package/hooks/config/test_messages.py +5 -0
- package/hooks/config/test_session_env_cleanup_constants.py +55 -0
- package/hooks/diagnostic/hook_log_extractor.py +907 -0
- package/hooks/diagnostic/hook_log_init.py +202 -0
- package/hooks/diagnostic/hook_log_stop_wrapper.py +172 -0
- package/hooks/diagnostic/migrations/2026-04-25-drop-themes-hook-events.sql +3 -0
- package/hooks/diagnostic/migrations/README.md +77 -0
- package/hooks/diagnostic/queries/block_details_for_hook.sql +26 -0
- package/hooks/diagnostic/queries/blocks_by_category.sql +10 -0
- package/hooks/diagnostic/queries/blocks_by_tool.sql +9 -0
- package/hooks/diagnostic/queries/blocks_last_7_days.sql +11 -0
- package/hooks/diagnostic/queries/top_blockers_last_24_hours.sql +12 -0
- package/hooks/diagnostic/queries/top_blockers_overall.sql +12 -0
- package/hooks/diagnostic/requirements-hook-logs-dev.txt +2 -0
- package/hooks/diagnostic/requirements-hook-logs.txt +1 -0
- package/hooks/diagnostic/schema.sql +51 -0
- package/hooks/diagnostic/test_hook_log_extractor.py +1531 -0
- package/hooks/diagnostic/test_hook_log_init.py +227 -0
- package/hooks/diagnostic/test_hook_log_stop_wrapper.py +345 -0
- package/hooks/hooks.json +25 -0
- package/hooks/session/session_env_cleanup.py +129 -0
- package/hooks/session/test_session_env_cleanup.py +278 -0
- package/package.json +1 -1
- package/rules/ask-user-question-required.md +44 -0
- package/rules/windows-filesystem-safe.md +93 -0
- package/scripts/config/test_spec_implementer_prompt.py +0 -4
- package/scripts/test_groq_bugteam_spec.py +0 -8
- package/skills/bugteam/SKILL.md +15 -1
- package/skills/bugteam/SKILL_EVALS.md +1 -1
- package/skills/bugteam/reference/teardown-publish-permissions.md +1 -1
- package/skills/bugteam/scripts/README.md +17 -0
- package/skills/bugteam/scripts/bugteam_fix_hookspath.py +238 -0
- package/skills/bugteam/scripts/test_bugteam_fix_hookspath.py +267 -0
- package/skills/logifix/SKILL.md +69 -0
- package/skills/logifix/scripts/logifix.ps1 +205 -0
- package/skills/rebase/SKILL.md +157 -0
|
@@ -0,0 +1,907 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Extract hook-firing records from per-session JSONL transcripts into Neon.
|
|
3
|
+
|
|
4
|
+
Reads JSONL transcripts at ``PROJECTS_TRANSCRIPT_ROOT`` and ingests only
|
|
5
|
+
``attachment`` records whose inner ``attachment.type`` is one of the
|
|
6
|
+
five variants enumerated in ``OUTCOME_BY_ATTACHMENT_TYPE``
|
|
7
|
+
(``hook_success``, ``hook_blocking_error``, ``hook_non_blocking_error``,
|
|
8
|
+
``hook_system_message``, ``hook_additional_context``). Unknown
|
|
9
|
+
``hook_``-prefixed variants are skipped until
|
|
10
|
+
``OUTCOME_BY_ATTACHMENT_TYPE`` is extended to cover them. Each ingested
|
|
11
|
+
record becomes one row in the ``hook_events`` table. Idempotence is
|
|
12
|
+
enforced at the database layer via a UNIQUE constraint on
|
|
13
|
+
``(source_jsonl_path, source_line_number)`` combined with
|
|
14
|
+
``ON CONFLICT DO NOTHING``. Per-file byte offsets in
|
|
15
|
+
``OFFSET_STATE_FILE`` skip re-reading unchanged bytes.
|
|
16
|
+
|
|
17
|
+
Offline graceful behavior: ``psycopg.OperationalError`` or any
|
|
18
|
+
connect-time failure appends one ISO-8601 line to
|
|
19
|
+
``OFFLINE_WARNING_LOG`` and exits 0, so the Stop hook never blocks
|
|
20
|
+
session end on a missing network.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import contextlib
|
|
26
|
+
import datetime
|
|
27
|
+
import errno
|
|
28
|
+
import glob
|
|
29
|
+
import io
|
|
30
|
+
import json
|
|
31
|
+
import os
|
|
32
|
+
import re
|
|
33
|
+
import sys
|
|
34
|
+
import tempfile
|
|
35
|
+
import time
|
|
36
|
+
from pathlib import Path
|
|
37
|
+
from typing import IO, Iterator, Optional, Sequence
|
|
38
|
+
|
|
39
|
+
if os.name == "nt":
|
|
40
|
+
try:
|
|
41
|
+
import msvcrt
|
|
42
|
+
except ImportError:
|
|
43
|
+
msvcrt = None
|
|
44
|
+
fcntl = None
|
|
45
|
+
else:
|
|
46
|
+
try:
|
|
47
|
+
import fcntl
|
|
48
|
+
except ImportError:
|
|
49
|
+
fcntl = None
|
|
50
|
+
msvcrt = None
|
|
51
|
+
|
|
52
|
+
if str(Path(__file__).resolve().parent.parent) not in sys.path:
|
|
53
|
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
import psycopg
|
|
57
|
+
except ImportError:
|
|
58
|
+
psycopg = None
|
|
59
|
+
|
|
60
|
+
from config.hook_log_extractor_constants import (
|
|
61
|
+
ATTACHMENT_TYPE_HOOK_ADDITIONAL_CONTEXT,
|
|
62
|
+
ATTACHMENT_TYPE_HOOK_BLOCKING_ERROR,
|
|
63
|
+
ATTACHMENT_TYPE_HOOK_SUCCESS,
|
|
64
|
+
ATTACHMENT_TYPE_HOOK_SYSTEM_MESSAGE,
|
|
65
|
+
ATTACHMENT_TYPE_PREFIX,
|
|
66
|
+
BYTE_OFFSET_KEY,
|
|
67
|
+
CATEGORY_PATH_MINIMUM_PARTS,
|
|
68
|
+
COMMAND_EXCERPT_MAX_CHARACTERS,
|
|
69
|
+
CONNECT_TIMEOUT_SECONDS,
|
|
70
|
+
DEFAULT_QUERY_FOR_SUMMARY,
|
|
71
|
+
EMPTY_STRING,
|
|
72
|
+
EXIT_CODE_EXTRACTOR_ENVIRONMENT_MISSING,
|
|
73
|
+
EXIT_CODE_SUCCESS,
|
|
74
|
+
EXIT_CODE_UNKNOWN_QUERY,
|
|
75
|
+
FLAG_FULL_REBUILD,
|
|
76
|
+
FLAG_INCREMENTAL,
|
|
77
|
+
FLAG_QUERY,
|
|
78
|
+
FLAG_SUMMARY,
|
|
79
|
+
HOOK_CATEGORY_UNCATEGORIZED,
|
|
80
|
+
HOOK_EVENTS_INSERT_SQL,
|
|
81
|
+
HOOK_EVENTS_TRUNCATE_SQL,
|
|
82
|
+
HOOK_NAME_TOOL_SEPARATOR,
|
|
83
|
+
HOOKS_DIRECTORY_TOKEN,
|
|
84
|
+
INSERT_BATCH_SIZE,
|
|
85
|
+
INVALID_QUERY_NAME_MESSAGE_PREFIX,
|
|
86
|
+
JSONL_FILE_GLOB,
|
|
87
|
+
KNOWN_HOOK_CATEGORIES,
|
|
88
|
+
LEGACY_OFFSETS_FORMAT_WARNING_LABEL,
|
|
89
|
+
LINE_NUMBER_KEY,
|
|
90
|
+
LOCK_MAXIMUM_RETRY_COUNT,
|
|
91
|
+
LOCK_RETRY_SLEEP_SECONDS,
|
|
92
|
+
MISSING_NEON_DATABASE_URL_WARNING_LABEL,
|
|
93
|
+
MISSING_PSYCOPG_WARNING_LABEL,
|
|
94
|
+
NEON_DATABASE_URL_ENVIRONMENT_VARIABLE,
|
|
95
|
+
NEWLINE_JOINER,
|
|
96
|
+
OFFLINE_WARNING_LOG,
|
|
97
|
+
OFFSET_STATE_FILE,
|
|
98
|
+
OFFSETS_JSON_INDENT,
|
|
99
|
+
OUTCOME_BY_ATTACHMENT_TYPE,
|
|
100
|
+
PROJECTS_TRANSCRIPT_ROOT,
|
|
101
|
+
QUERIES_DIRECTORY_NAME,
|
|
102
|
+
QUERY_NAME_PATTERN,
|
|
103
|
+
QUERY_NO_ROWS_RETURNED_MESSAGE,
|
|
104
|
+
SCRIPT_PATH_PYTHON_PREFIXES,
|
|
105
|
+
SQL_FILE_EXTENSION,
|
|
106
|
+
STDERR_EXCERPT_MAX_CHARACTERS,
|
|
107
|
+
STDOUT_EXCERPT_MAX_CHARACTERS,
|
|
108
|
+
SUMMARY_COLUMN_HEADINGS,
|
|
109
|
+
SUMMARY_NO_NEW_BLOCKS_MESSAGE,
|
|
110
|
+
SUMMARY_TABLE_COLUMN_GAP,
|
|
111
|
+
TOP_BLOCKED_COMMAND_PREVIEW_MAX_CHARACTERS,
|
|
112
|
+
TOP_BLOCKERS_LAST_24_HOURS_SQL,
|
|
113
|
+
TOP_LEVEL_ATTACHMENT_TYPE,
|
|
114
|
+
UNKNOWN_QUERY_MESSAGE_PREFIX,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class MissingNeonDatabaseUrlError(RuntimeError):
|
|
119
|
+
"""Raised when the Neon connection URL environment variable is unset."""
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class MissingPsycopgDependencyError(RuntimeError):
|
|
123
|
+
"""Raised when the psycopg driver is not installed in the interpreter."""
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def derive_category(script_path: Optional[str]) -> str:
|
|
127
|
+
"""Return the category parent-directory name for a script path."""
|
|
128
|
+
if not script_path:
|
|
129
|
+
return HOOK_CATEGORY_UNCATEGORIZED
|
|
130
|
+
normalized_path = script_path.replace("\\", "/")
|
|
131
|
+
for each_prefix in SCRIPT_PATH_PYTHON_PREFIXES:
|
|
132
|
+
if normalized_path.startswith(each_prefix):
|
|
133
|
+
normalized_path = normalized_path[len(each_prefix) :]
|
|
134
|
+
break
|
|
135
|
+
hooks_directory_token_index = normalized_path.rfind(HOOKS_DIRECTORY_TOKEN)
|
|
136
|
+
if hooks_directory_token_index == -1:
|
|
137
|
+
return HOOK_CATEGORY_UNCATEGORIZED
|
|
138
|
+
remainder_after_hooks_segment = normalized_path[
|
|
139
|
+
hooks_directory_token_index + len(HOOKS_DIRECTORY_TOKEN) :
|
|
140
|
+
]
|
|
141
|
+
all_remainder_parts = remainder_after_hooks_segment.split("/")
|
|
142
|
+
if len(all_remainder_parts) < CATEGORY_PATH_MINIMUM_PARTS:
|
|
143
|
+
return HOOK_CATEGORY_UNCATEGORIZED
|
|
144
|
+
candidate_category = all_remainder_parts[0]
|
|
145
|
+
if candidate_category in KNOWN_HOOK_CATEGORIES:
|
|
146
|
+
return candidate_category
|
|
147
|
+
return HOOK_CATEGORY_UNCATEGORIZED
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def derive_outcome(attachment_type: str) -> str:
|
|
151
|
+
"""Map an attachment.type value to its outcome label."""
|
|
152
|
+
return OUTCOME_BY_ATTACHMENT_TYPE[attachment_type]
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def extract_script_path(attachment: dict[str, object]) -> Optional[str]:
|
|
156
|
+
"""Return the script path embedded in a hook attachment, if any."""
|
|
157
|
+
attachment_type = attachment.get("type", EMPTY_STRING)
|
|
158
|
+
if attachment_type == ATTACHMENT_TYPE_HOOK_SUCCESS:
|
|
159
|
+
return _strip_python_prefix(attachment.get("command"))
|
|
160
|
+
if attachment_type == ATTACHMENT_TYPE_HOOK_BLOCKING_ERROR:
|
|
161
|
+
blocking_error_block = attachment.get("blockingError") or {}
|
|
162
|
+
if isinstance(blocking_error_block, dict):
|
|
163
|
+
return _strip_python_prefix(blocking_error_block.get("command"))
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _strip_python_prefix(command_string: Optional[str]) -> Optional[str]:
|
|
168
|
+
if not command_string:
|
|
169
|
+
return None
|
|
170
|
+
for each_prefix in SCRIPT_PATH_PYTHON_PREFIXES:
|
|
171
|
+
if command_string.startswith(each_prefix):
|
|
172
|
+
return command_string[len(each_prefix) :]
|
|
173
|
+
return command_string
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def extract_tool_name(hook_name: Optional[str]) -> Optional[str]:
|
|
177
|
+
"""Return the tool name after the colon in a hook name, if present."""
|
|
178
|
+
if not hook_name:
|
|
179
|
+
return None
|
|
180
|
+
if HOOK_NAME_TOOL_SEPARATOR not in hook_name:
|
|
181
|
+
return None
|
|
182
|
+
return hook_name.split(HOOK_NAME_TOOL_SEPARATOR, 1)[1]
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def truncate_command_excerpt(command_text: Optional[str]) -> Optional[str]:
|
|
186
|
+
"""Truncate a command string to the configured excerpt budget."""
|
|
187
|
+
return _truncate_to_length(command_text, COMMAND_EXCERPT_MAX_CHARACTERS)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def truncate_stdout_excerpt(stdout_text: Optional[str]) -> Optional[str]:
|
|
191
|
+
"""Truncate a stdout string to the configured excerpt budget."""
|
|
192
|
+
return _truncate_to_length(stdout_text, STDOUT_EXCERPT_MAX_CHARACTERS)
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def truncate_stderr_excerpt(stderr_text: Optional[str]) -> Optional[str]:
|
|
196
|
+
"""Truncate a stderr string to the configured excerpt budget."""
|
|
197
|
+
return _truncate_to_length(stderr_text, STDERR_EXCERPT_MAX_CHARACTERS)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _truncate_to_length(
|
|
201
|
+
text_or_none: Optional[str], maximum_length: int
|
|
202
|
+
) -> Optional[str]:
|
|
203
|
+
if text_or_none is None:
|
|
204
|
+
return None
|
|
205
|
+
if len(text_or_none) <= maximum_length:
|
|
206
|
+
return text_or_none
|
|
207
|
+
return text_or_none[:maximum_length]
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _normalize_content_to_text(content_or_none: object) -> Optional[str]:
|
|
211
|
+
if content_or_none is None:
|
|
212
|
+
return None
|
|
213
|
+
if isinstance(content_or_none, str):
|
|
214
|
+
return content_or_none
|
|
215
|
+
if isinstance(content_or_none, list):
|
|
216
|
+
all_string_items = [
|
|
217
|
+
each_entry for each_entry in content_or_none if isinstance(each_entry, str)
|
|
218
|
+
]
|
|
219
|
+
return NEWLINE_JOINER.join(all_string_items) if all_string_items else None
|
|
220
|
+
return str(content_or_none)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def build_row_from_attachment(
|
|
224
|
+
parsed_record: dict[str, object],
|
|
225
|
+
source_jsonl_path: str,
|
|
226
|
+
source_line_number: int,
|
|
227
|
+
) -> dict[str, object]:
|
|
228
|
+
"""Build a hook_events row dict from one parsed JSONL record."""
|
|
229
|
+
attachment_block = parsed_record.get("attachment") or {}
|
|
230
|
+
attachment_type = attachment_block.get("type", EMPTY_STRING)
|
|
231
|
+
outcome_label = derive_outcome(attachment_type)
|
|
232
|
+
script_path_or_none = extract_script_path(attachment_block)
|
|
233
|
+
hook_category_label = derive_category(script_path_or_none)
|
|
234
|
+
hook_name_string = attachment_block.get("hookName")
|
|
235
|
+
hook_event_string = attachment_block.get("hookEvent", EMPTY_STRING)
|
|
236
|
+
tool_use_id_or_none = attachment_block.get("toolUseID")
|
|
237
|
+
tool_name_or_none = extract_tool_name(hook_name_string)
|
|
238
|
+
|
|
239
|
+
command_text_or_none = attachment_block.get("command")
|
|
240
|
+
stdout_text_or_none: Optional[str] = attachment_block.get("stdout")
|
|
241
|
+
stderr_text_or_none: Optional[str] = attachment_block.get("stderr")
|
|
242
|
+
exit_code_or_none = attachment_block.get("exitCode")
|
|
243
|
+
duration_milliseconds_or_none = attachment_block.get("durationMs")
|
|
244
|
+
|
|
245
|
+
if attachment_type == ATTACHMENT_TYPE_HOOK_BLOCKING_ERROR:
|
|
246
|
+
blocking_error_block = attachment_block.get("blockingError") or {}
|
|
247
|
+
if isinstance(blocking_error_block, dict):
|
|
248
|
+
command_text_or_none = blocking_error_block.get("command")
|
|
249
|
+
blocking_error_message = blocking_error_block.get("blockingError")
|
|
250
|
+
if blocking_error_message:
|
|
251
|
+
stderr_text_or_none = blocking_error_message
|
|
252
|
+
elif attachment_type == ATTACHMENT_TYPE_HOOK_SYSTEM_MESSAGE:
|
|
253
|
+
stdout_text_or_none = _normalize_content_to_text(
|
|
254
|
+
attachment_block.get("content")
|
|
255
|
+
)
|
|
256
|
+
elif attachment_type == ATTACHMENT_TYPE_HOOK_ADDITIONAL_CONTEXT:
|
|
257
|
+
stdout_text_or_none = _normalize_content_to_text(
|
|
258
|
+
attachment_block.get("content")
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
return {
|
|
262
|
+
"event_timestamp": parsed_record.get("timestamp"),
|
|
263
|
+
"session_id": parsed_record.get("sessionId", EMPTY_STRING),
|
|
264
|
+
"cwd": parsed_record.get("cwd"),
|
|
265
|
+
"git_branch": parsed_record.get("gitBranch"),
|
|
266
|
+
"hook_event": hook_event_string,
|
|
267
|
+
"hook_name": hook_name_string or EMPTY_STRING,
|
|
268
|
+
"hook_category": hook_category_label,
|
|
269
|
+
"script_path": script_path_or_none,
|
|
270
|
+
"tool_name": tool_name_or_none,
|
|
271
|
+
"tool_use_id": tool_use_id_or_none,
|
|
272
|
+
"outcome": outcome_label,
|
|
273
|
+
"exit_code": exit_code_or_none,
|
|
274
|
+
"duration_ms": duration_milliseconds_or_none,
|
|
275
|
+
"command_excerpt": truncate_command_excerpt(command_text_or_none),
|
|
276
|
+
"stdout_excerpt": truncate_stdout_excerpt(stdout_text_or_none),
|
|
277
|
+
"stderr_excerpt": truncate_stderr_excerpt(stderr_text_or_none),
|
|
278
|
+
"source_jsonl_path": source_jsonl_path,
|
|
279
|
+
"source_line_number": source_line_number,
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
|
|
283
|
+
class AttachmentRecordIterator:
|
|
284
|
+
"""Iterates hook attachment records and tracks bytes actually consumed.
|
|
285
|
+
|
|
286
|
+
``final_line_number`` reflects the number of lines read from the file
|
|
287
|
+
(including malformed and non-attachment lines), not just the line
|
|
288
|
+
number of the last yielded record. ``final_byte_offset`` reflects the
|
|
289
|
+
byte position after the last successfully-read line (or
|
|
290
|
+
``start_offset`` when the file did not exist). ``drained`` is True
|
|
291
|
+
once iteration reached EOF. Callers persist ``final_byte_offset``
|
|
292
|
+
and ``final_line_number`` whenever ``drained`` is True so resumption
|
|
293
|
+
starts from the exact position after the last bytes the iterator
|
|
294
|
+
consumed.
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
def __init__(
|
|
298
|
+
self,
|
|
299
|
+
jsonl_file_path: str,
|
|
300
|
+
start_offset: int,
|
|
301
|
+
start_line_number: int = 0,
|
|
302
|
+
) -> None:
|
|
303
|
+
self._jsonl_file_path = jsonl_file_path
|
|
304
|
+
self._start_offset = start_offset
|
|
305
|
+
self._start_line_number = start_line_number
|
|
306
|
+
self.final_line_number = start_line_number
|
|
307
|
+
self.final_byte_offset = start_offset
|
|
308
|
+
self.drained = False
|
|
309
|
+
|
|
310
|
+
def __iter__(self) -> Iterator[tuple[dict[str, object], int, int]]:
|
|
311
|
+
try:
|
|
312
|
+
jsonl_file_handle = io.open(self._jsonl_file_path, "rb")
|
|
313
|
+
except (FileNotFoundError, OSError):
|
|
314
|
+
self.final_line_number = self._start_line_number
|
|
315
|
+
self.final_byte_offset = self._start_offset
|
|
316
|
+
self.drained = True
|
|
317
|
+
return
|
|
318
|
+
with jsonl_file_handle:
|
|
319
|
+
if self._start_offset > 0:
|
|
320
|
+
jsonl_file_handle.seek(self._start_offset)
|
|
321
|
+
current_line_number = self._start_line_number
|
|
322
|
+
current_byte_offset = jsonl_file_handle.tell()
|
|
323
|
+
self.final_byte_offset = current_byte_offset
|
|
324
|
+
while True:
|
|
325
|
+
raw_bytes = jsonl_file_handle.readline()
|
|
326
|
+
if not raw_bytes:
|
|
327
|
+
self.final_line_number = current_line_number
|
|
328
|
+
self.final_byte_offset = current_byte_offset
|
|
329
|
+
self.drained = True
|
|
330
|
+
return
|
|
331
|
+
current_line_number += 1
|
|
332
|
+
current_byte_offset += len(raw_bytes)
|
|
333
|
+
self.final_line_number = current_line_number
|
|
334
|
+
self.final_byte_offset = current_byte_offset
|
|
335
|
+
try:
|
|
336
|
+
parsed_record = json.loads(raw_bytes.decode("utf-8"))
|
|
337
|
+
except (UnicodeDecodeError, json.JSONDecodeError):
|
|
338
|
+
continue
|
|
339
|
+
if not isinstance(parsed_record, dict):
|
|
340
|
+
continue
|
|
341
|
+
if parsed_record.get("type") != TOP_LEVEL_ATTACHMENT_TYPE:
|
|
342
|
+
continue
|
|
343
|
+
attachment_block = parsed_record.get("attachment") or {}
|
|
344
|
+
if not isinstance(attachment_block, dict):
|
|
345
|
+
continue
|
|
346
|
+
attachment_type = attachment_block.get("type", EMPTY_STRING)
|
|
347
|
+
if not isinstance(attachment_type, str):
|
|
348
|
+
continue
|
|
349
|
+
if not attachment_type.startswith(ATTACHMENT_TYPE_PREFIX):
|
|
350
|
+
continue
|
|
351
|
+
if attachment_type not in OUTCOME_BY_ATTACHMENT_TYPE:
|
|
352
|
+
continue
|
|
353
|
+
yield parsed_record, current_line_number, current_byte_offset
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def iter_attachment_records_from_file(
|
|
357
|
+
jsonl_file_path: str,
|
|
358
|
+
start_offset: int,
|
|
359
|
+
start_line_number: int = 0,
|
|
360
|
+
) -> AttachmentRecordIterator:
|
|
361
|
+
"""Return an iterator over hook attachment records in a JSONL file.
|
|
362
|
+
|
|
363
|
+
The returned object supports iteration and exposes
|
|
364
|
+
``final_line_number`` after iteration completes. ``final_line_number``
|
|
365
|
+
is the total number of lines consumed (malformed and non-attachment
|
|
366
|
+
lines included), which differs from the line number of the last
|
|
367
|
+
yielded record when non-attachment lines trail the last attachment.
|
|
368
|
+
"""
|
|
369
|
+
return AttachmentRecordIterator(
|
|
370
|
+
jsonl_file_path=jsonl_file_path,
|
|
371
|
+
start_offset=start_offset,
|
|
372
|
+
start_line_number=start_line_number,
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def load_offsets(state_file_path: str) -> dict[str, dict[str, int]]:
|
|
377
|
+
"""Load per-file ``{byte_offset, line_number}`` entries from disk.
|
|
378
|
+
|
|
379
|
+
Returns an empty dict when the state file is missing or contains
|
|
380
|
+
malformed JSON. Legacy bare-integer entries trigger a single
|
|
381
|
+
offline-warning line and are treated as invalid so the caller
|
|
382
|
+
re-extracts from the start of each file.
|
|
383
|
+
"""
|
|
384
|
+
if not os.path.exists(state_file_path):
|
|
385
|
+
return {}
|
|
386
|
+
try:
|
|
387
|
+
with io.open(state_file_path, "r", encoding="utf-8") as state_file_handle:
|
|
388
|
+
loaded_content = json.load(state_file_handle)
|
|
389
|
+
except json.JSONDecodeError:
|
|
390
|
+
return {}
|
|
391
|
+
if not isinstance(loaded_content, dict):
|
|
392
|
+
return {}
|
|
393
|
+
migrated_offsets: dict[str, dict[str, int]] = {}
|
|
394
|
+
has_legacy_entries = False
|
|
395
|
+
for each_path, each_entry in loaded_content.items():
|
|
396
|
+
path_string = str(each_path)
|
|
397
|
+
if isinstance(each_entry, dict):
|
|
398
|
+
byte_offset_value = each_entry.get(BYTE_OFFSET_KEY)
|
|
399
|
+
line_number_value = each_entry.get(LINE_NUMBER_KEY)
|
|
400
|
+
if isinstance(byte_offset_value, int) and isinstance(
|
|
401
|
+
line_number_value, int
|
|
402
|
+
):
|
|
403
|
+
migrated_offsets[path_string] = {
|
|
404
|
+
BYTE_OFFSET_KEY: byte_offset_value,
|
|
405
|
+
LINE_NUMBER_KEY: line_number_value,
|
|
406
|
+
}
|
|
407
|
+
continue
|
|
408
|
+
has_legacy_entries = True
|
|
409
|
+
if has_legacy_entries:
|
|
410
|
+
_append_legacy_offsets_warning_line()
|
|
411
|
+
return migrated_offsets
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def save_offsets(
|
|
415
|
+
state_file_path: str,
|
|
416
|
+
offset_by_jsonl_path: dict[str, dict[str, int]],
|
|
417
|
+
) -> None:
|
|
418
|
+
"""Persist per-file offset entries atomically via tempfile + os.replace."""
|
|
419
|
+
state_file_parent = os.path.dirname(state_file_path)
|
|
420
|
+
if state_file_parent:
|
|
421
|
+
os.makedirs(state_file_parent, exist_ok=True)
|
|
422
|
+
temporary_file_handle = tempfile.NamedTemporaryFile(
|
|
423
|
+
mode="w",
|
|
424
|
+
encoding="utf-8",
|
|
425
|
+
dir=state_file_parent or None,
|
|
426
|
+
delete=False,
|
|
427
|
+
)
|
|
428
|
+
temporary_file_path = temporary_file_handle.name
|
|
429
|
+
try:
|
|
430
|
+
try:
|
|
431
|
+
json.dump(
|
|
432
|
+
offset_by_jsonl_path,
|
|
433
|
+
temporary_file_handle,
|
|
434
|
+
indent=OFFSETS_JSON_INDENT,
|
|
435
|
+
sort_keys=True,
|
|
436
|
+
)
|
|
437
|
+
temporary_file_handle.flush()
|
|
438
|
+
os.fsync(temporary_file_handle.fileno())
|
|
439
|
+
finally:
|
|
440
|
+
temporary_file_handle.close()
|
|
441
|
+
os.replace(temporary_file_path, state_file_path)
|
|
442
|
+
except Exception:
|
|
443
|
+
try:
|
|
444
|
+
os.unlink(temporary_file_path)
|
|
445
|
+
except OSError:
|
|
446
|
+
pass
|
|
447
|
+
raise
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
@contextlib.contextmanager
|
|
451
|
+
def _acquire_offsets_lock(state_file_path: str) -> Iterator[None]:
|
|
452
|
+
"""Hold a cross-platform advisory lock around an offsets read-modify-write.
|
|
453
|
+
|
|
454
|
+
Serializes concurrent extractor runs so two Claude Code sessions
|
|
455
|
+
closing at once cannot clobber each other's offset updates. Uses
|
|
456
|
+
``msvcrt.locking`` on Windows and ``fcntl.flock`` on POSIX; falls
|
|
457
|
+
back to no locking on platforms where neither module is available.
|
|
458
|
+
|
|
459
|
+
The sidecar path ``state_file_path + ".lock"`` is intentional,
|
|
460
|
+
permanent infrastructure. It is a byte-range-lockable companion to
|
|
461
|
+
the offsets file and is deliberately never unlinked. Attempting to
|
|
462
|
+
unlink it after release would open a TOCTOU window on Windows,
|
|
463
|
+
where another process may still hold it open. The stable sidecar
|
|
464
|
+
is the safer choice; reused on every run, its presence in the
|
|
465
|
+
state directory is expected and carries no other meaning.
|
|
466
|
+
"""
|
|
467
|
+
lock_file_path = state_file_path + ".lock"
|
|
468
|
+
lock_parent_directory = os.path.dirname(lock_file_path)
|
|
469
|
+
if lock_parent_directory:
|
|
470
|
+
os.makedirs(lock_parent_directory, exist_ok=True)
|
|
471
|
+
lock_file_handle = io.open(lock_file_path, "a+", encoding="utf-8")
|
|
472
|
+
try:
|
|
473
|
+
_lock_file_handle_blocking(lock_file_handle)
|
|
474
|
+
try:
|
|
475
|
+
yield
|
|
476
|
+
finally:
|
|
477
|
+
_unlock_file_handle(lock_file_handle)
|
|
478
|
+
finally:
|
|
479
|
+
lock_file_handle.close()
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def _lock_file_handle_blocking(lock_file_handle: IO[str]) -> None:
|
|
483
|
+
"""Acquire an exclusive byte-range lock with a bounded retry budget.
|
|
484
|
+
|
|
485
|
+
Both the Windows (``msvcrt.locking``) and POSIX (``fcntl.flock``)
|
|
486
|
+
branches deliberately fail fast: Windows uses ``LK_NBLCK`` instead
|
|
487
|
+
of ``LK_LOCK`` so the kernel never blocks ~10s internally, and
|
|
488
|
+
POSIX pairs ``LOCK_EX`` with ``LOCK_NB`` so ``EWOULDBLOCK`` bubbles
|
|
489
|
+
up immediately. The Python ``time.sleep(LOCK_RETRY_SLEEP_SECONDS)``
|
|
490
|
+
between attempts is the sole pacing mechanism, keeping the total
|
|
491
|
+
retry budget within the intended ``LOCK_MAXIMUM_RETRY_COUNT *
|
|
492
|
+
LOCK_RETRY_SLEEP_SECONDS`` window so the caller never exceeds the
|
|
493
|
+
30s Stop hook timeout under sustained contention.
|
|
494
|
+
"""
|
|
495
|
+
if msvcrt is not None:
|
|
496
|
+
lock_byte_count = 1
|
|
497
|
+
for _each_attempt_index in range(LOCK_MAXIMUM_RETRY_COUNT):
|
|
498
|
+
try:
|
|
499
|
+
msvcrt.locking(
|
|
500
|
+
lock_file_handle.fileno(), msvcrt.LK_NBLCK, lock_byte_count
|
|
501
|
+
)
|
|
502
|
+
return
|
|
503
|
+
except OSError as lock_exception:
|
|
504
|
+
if lock_exception.errno != errno.EACCES:
|
|
505
|
+
raise
|
|
506
|
+
time.sleep(LOCK_RETRY_SLEEP_SECONDS)
|
|
507
|
+
raise OSError(
|
|
508
|
+
errno.EACCES,
|
|
509
|
+
"offsets lock retry budget exhausted",
|
|
510
|
+
)
|
|
511
|
+
if fcntl is not None:
|
|
512
|
+
for _each_attempt_index in range(LOCK_MAXIMUM_RETRY_COUNT):
|
|
513
|
+
try:
|
|
514
|
+
fcntl.flock(
|
|
515
|
+
lock_file_handle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB
|
|
516
|
+
)
|
|
517
|
+
return
|
|
518
|
+
except OSError as lock_exception:
|
|
519
|
+
if lock_exception.errno not in (errno.EAGAIN, errno.EWOULDBLOCK):
|
|
520
|
+
raise
|
|
521
|
+
time.sleep(LOCK_RETRY_SLEEP_SECONDS)
|
|
522
|
+
raise OSError(
|
|
523
|
+
errno.EWOULDBLOCK,
|
|
524
|
+
"offsets lock retry budget exhausted",
|
|
525
|
+
)
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _unlock_file_handle(lock_file_handle: IO[str]) -> None:
|
|
529
|
+
if msvcrt is not None:
|
|
530
|
+
lock_byte_count = 1
|
|
531
|
+
try:
|
|
532
|
+
msvcrt.locking(
|
|
533
|
+
lock_file_handle.fileno(), msvcrt.LK_UNLCK, lock_byte_count
|
|
534
|
+
)
|
|
535
|
+
except OSError:
|
|
536
|
+
return
|
|
537
|
+
return
|
|
538
|
+
if fcntl is not None:
|
|
539
|
+
fcntl.flock(lock_file_handle.fileno(), fcntl.LOCK_UN)
|
|
540
|
+
|
|
541
|
+
|
|
542
|
+
def is_operational_error(exception_instance: BaseException) -> bool:
|
|
543
|
+
"""Return True when an exception should trigger the offline fallback."""
|
|
544
|
+
if isinstance(
|
|
545
|
+
exception_instance,
|
|
546
|
+
(MissingNeonDatabaseUrlError, MissingPsycopgDependencyError),
|
|
547
|
+
):
|
|
548
|
+
return True
|
|
549
|
+
class_name = type(exception_instance).__name__
|
|
550
|
+
return class_name in {"OperationalError", "InterfaceError", "TimeoutError"}
|
|
551
|
+
|
|
552
|
+
|
|
553
|
+
def connect_to_neon() -> object:
|
|
554
|
+
"""Open a psycopg connection using the Neon database URL env var.
|
|
555
|
+
|
|
556
|
+
Raises ``MissingNeonDatabaseUrlError`` when the URL env var is unset
|
|
557
|
+
and ``MissingPsycopgDependencyError`` when psycopg is not installed.
|
|
558
|
+
Both are treated as offline by ``is_operational_error`` so the Stop
|
|
559
|
+
hook never blocks session end on a missing environment.
|
|
560
|
+
"""
|
|
561
|
+
if psycopg is None:
|
|
562
|
+
raise MissingPsycopgDependencyError(MISSING_PSYCOPG_WARNING_LABEL)
|
|
563
|
+
raw_database_url = os.environ.get(NEON_DATABASE_URL_ENVIRONMENT_VARIABLE)
|
|
564
|
+
database_url = raw_database_url.strip() if raw_database_url is not None else None
|
|
565
|
+
if not database_url:
|
|
566
|
+
raise MissingNeonDatabaseUrlError(MISSING_NEON_DATABASE_URL_WARNING_LABEL)
|
|
567
|
+
return psycopg.connect(database_url, connect_timeout=CONNECT_TIMEOUT_SECONDS)
|
|
568
|
+
|
|
569
|
+
|
|
570
|
+
def insert_rows_batch(
|
|
571
|
+
neon_connection: object,
|
|
572
|
+
all_rows: Sequence[dict[str, object]],
|
|
573
|
+
) -> None:
|
|
574
|
+
"""Insert a batch of hook_events rows with ON CONFLICT DO NOTHING."""
|
|
575
|
+
if not all_rows:
|
|
576
|
+
return
|
|
577
|
+
with neon_connection.cursor() as neon_cursor:
|
|
578
|
+
neon_cursor.executemany(HOOK_EVENTS_INSERT_SQL, list(all_rows))
|
|
579
|
+
neon_connection.commit()
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def _append_offline_warning_line(exception_instance: BaseException) -> None:
|
|
583
|
+
"""Append an offline-marker line to the warning log; swallow disk errors.
|
|
584
|
+
|
|
585
|
+
The Stop hook contract requires that the offline-graceful path
|
|
586
|
+
always exits with ``EXIT_CODE_EXTRACTOR_ENVIRONMENT_MISSING`` so
|
|
587
|
+
session shutdown never stalls on a failed extractor. A read-only
|
|
588
|
+
filesystem, missing parent path, or EACCES on the warning log file
|
|
589
|
+
itself must not propagate to the caller.
|
|
590
|
+
"""
|
|
591
|
+
try:
|
|
592
|
+
warning_log_parent = os.path.dirname(OFFLINE_WARNING_LOG)
|
|
593
|
+
if warning_log_parent:
|
|
594
|
+
os.makedirs(warning_log_parent, exist_ok=True)
|
|
595
|
+
timestamp_iso = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
|
596
|
+
exception_class_name = type(exception_instance).__name__
|
|
597
|
+
warning_line_text = f"{timestamp_iso}\toffline\t{exception_class_name}"
|
|
598
|
+
with io.open(OFFLINE_WARNING_LOG, "a", encoding="utf-8") as warning_log_handle:
|
|
599
|
+
warning_log_handle.write(warning_line_text + "\n")
|
|
600
|
+
except OSError:
|
|
601
|
+
return
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def _append_legacy_offsets_warning_line() -> None:
|
|
605
|
+
try:
|
|
606
|
+
warning_log_parent = os.path.dirname(OFFLINE_WARNING_LOG)
|
|
607
|
+
if warning_log_parent:
|
|
608
|
+
os.makedirs(warning_log_parent, exist_ok=True)
|
|
609
|
+
timestamp_iso = datetime.datetime.now(datetime.timezone.utc).isoformat()
|
|
610
|
+
warning_line_text = (
|
|
611
|
+
f"{timestamp_iso}\tmigration\t{LEGACY_OFFSETS_FORMAT_WARNING_LABEL}"
|
|
612
|
+
)
|
|
613
|
+
with io.open(OFFLINE_WARNING_LOG, "a", encoding="utf-8") as warning_log_handle:
|
|
614
|
+
warning_log_handle.write(warning_line_text + "\n")
|
|
615
|
+
except OSError:
|
|
616
|
+
return
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def run_full_extraction(
|
|
620
|
+
transcripts_root: str,
|
|
621
|
+
state_file_path: str,
|
|
622
|
+
full_rebuild: bool,
|
|
623
|
+
) -> int:
|
|
624
|
+
"""Execute one extraction pass (incremental by default).
|
|
625
|
+
|
|
626
|
+
The offsets lock is held only around the initial offsets load and
|
|
627
|
+
around each offsets write (where the latest on-disk offsets are
|
|
628
|
+
re-read and merged with this process's pending updates via a
|
|
629
|
+
per-file max). DB I/O and JSONL iteration run without the lock so
|
|
630
|
+
concurrent Stop hooks do not serialize on each other's slow work.
|
|
631
|
+
|
|
632
|
+
Returns process exit code (0 on success, 0 on offline fallback).
|
|
633
|
+
"""
|
|
634
|
+
try:
|
|
635
|
+
neon_connection = connect_to_neon()
|
|
636
|
+
except Exception as connect_exception:
|
|
637
|
+
if is_operational_error(connect_exception):
|
|
638
|
+
_append_offline_warning_line(connect_exception)
|
|
639
|
+
return EXIT_CODE_EXTRACTOR_ENVIRONMENT_MISSING
|
|
640
|
+
raise
|
|
641
|
+
|
|
642
|
+
try:
|
|
643
|
+
starting_offset_by_jsonl_path = _load_starting_offsets(
|
|
644
|
+
neon_connection=neon_connection,
|
|
645
|
+
state_file_path=state_file_path,
|
|
646
|
+
full_rebuild=full_rebuild,
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
all_jsonl_file_paths = _discover_jsonl_files(transcripts_root)
|
|
650
|
+
for each_jsonl_file_path in all_jsonl_file_paths:
|
|
651
|
+
previous_entry = starting_offset_by_jsonl_path.get(each_jsonl_file_path)
|
|
652
|
+
start_offset = (
|
|
653
|
+
previous_entry[BYTE_OFFSET_KEY]
|
|
654
|
+
if previous_entry is not None
|
|
655
|
+
else 0
|
|
656
|
+
)
|
|
657
|
+
start_line_number = (
|
|
658
|
+
previous_entry[LINE_NUMBER_KEY]
|
|
659
|
+
if previous_entry is not None
|
|
660
|
+
else 0
|
|
661
|
+
)
|
|
662
|
+
batch_buffer: list[dict[str, object]] = []
|
|
663
|
+
attachment_iterator = iter_attachment_records_from_file(
|
|
664
|
+
each_jsonl_file_path,
|
|
665
|
+
start_offset=start_offset,
|
|
666
|
+
start_line_number=start_line_number,
|
|
667
|
+
)
|
|
668
|
+
for (
|
|
669
|
+
parsed_record,
|
|
670
|
+
line_number,
|
|
671
|
+
byte_offset_after,
|
|
672
|
+
) in attachment_iterator:
|
|
673
|
+
built_row = build_row_from_attachment(
|
|
674
|
+
parsed_record=parsed_record,
|
|
675
|
+
source_jsonl_path=each_jsonl_file_path,
|
|
676
|
+
source_line_number=line_number,
|
|
677
|
+
)
|
|
678
|
+
batch_buffer.append(built_row)
|
|
679
|
+
if len(batch_buffer) >= INSERT_BATCH_SIZE:
|
|
680
|
+
insert_rows_batch(neon_connection, batch_buffer)
|
|
681
|
+
batch_buffer.clear()
|
|
682
|
+
_merge_and_save_offsets_under_lock(
|
|
683
|
+
state_file_path=state_file_path,
|
|
684
|
+
pending_updates={
|
|
685
|
+
each_jsonl_file_path: {
|
|
686
|
+
BYTE_OFFSET_KEY: byte_offset_after,
|
|
687
|
+
LINE_NUMBER_KEY: attachment_iterator.final_line_number,
|
|
688
|
+
},
|
|
689
|
+
},
|
|
690
|
+
)
|
|
691
|
+
if batch_buffer:
|
|
692
|
+
insert_rows_batch(neon_connection, batch_buffer)
|
|
693
|
+
if attachment_iterator.drained:
|
|
694
|
+
_merge_and_save_offsets_under_lock(
|
|
695
|
+
state_file_path=state_file_path,
|
|
696
|
+
pending_updates={
|
|
697
|
+
each_jsonl_file_path: {
|
|
698
|
+
BYTE_OFFSET_KEY: attachment_iterator.final_byte_offset,
|
|
699
|
+
LINE_NUMBER_KEY: attachment_iterator.final_line_number,
|
|
700
|
+
},
|
|
701
|
+
},
|
|
702
|
+
)
|
|
703
|
+
finally:
|
|
704
|
+
try:
|
|
705
|
+
neon_connection.close()
|
|
706
|
+
except Exception:
|
|
707
|
+
pass
|
|
708
|
+
return EXIT_CODE_SUCCESS
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
def _load_starting_offsets(
|
|
712
|
+
neon_connection: object,
|
|
713
|
+
state_file_path: str,
|
|
714
|
+
full_rebuild: bool,
|
|
715
|
+
) -> dict[str, dict[str, int]]:
|
|
716
|
+
with _acquire_offsets_lock(state_file_path):
|
|
717
|
+
if full_rebuild:
|
|
718
|
+
with neon_connection.cursor() as neon_cursor:
|
|
719
|
+
neon_cursor.execute(HOOK_EVENTS_TRUNCATE_SQL)
|
|
720
|
+
neon_connection.commit()
|
|
721
|
+
save_offsets(state_file_path, {})
|
|
722
|
+
return {}
|
|
723
|
+
return load_offsets(state_file_path)
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def _merge_and_save_offsets_under_lock(
|
|
727
|
+
state_file_path: str,
|
|
728
|
+
pending_updates: dict[str, dict[str, int]],
|
|
729
|
+
) -> None:
|
|
730
|
+
with _acquire_offsets_lock(state_file_path):
|
|
731
|
+
latest_on_disk_offsets = load_offsets(state_file_path)
|
|
732
|
+
merged_offsets = _merge_offsets_taking_max(
|
|
733
|
+
latest_on_disk_offsets, pending_updates
|
|
734
|
+
)
|
|
735
|
+
save_offsets(state_file_path, merged_offsets)
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
def _merge_offsets_taking_max(
|
|
739
|
+
disk_offsets: dict[str, dict[str, int]],
|
|
740
|
+
pending_updates: dict[str, dict[str, int]],
|
|
741
|
+
) -> dict[str, dict[str, int]]:
|
|
742
|
+
merged: dict[str, dict[str, int]] = dict(disk_offsets)
|
|
743
|
+
for each_path, each_pending_entry in pending_updates.items():
|
|
744
|
+
existing_entry = merged.get(each_path)
|
|
745
|
+
if existing_entry is None:
|
|
746
|
+
merged[each_path] = dict(each_pending_entry)
|
|
747
|
+
continue
|
|
748
|
+
merged[each_path] = {
|
|
749
|
+
BYTE_OFFSET_KEY: max(
|
|
750
|
+
existing_entry[BYTE_OFFSET_KEY],
|
|
751
|
+
each_pending_entry[BYTE_OFFSET_KEY],
|
|
752
|
+
),
|
|
753
|
+
LINE_NUMBER_KEY: max(
|
|
754
|
+
existing_entry[LINE_NUMBER_KEY],
|
|
755
|
+
each_pending_entry[LINE_NUMBER_KEY],
|
|
756
|
+
),
|
|
757
|
+
}
|
|
758
|
+
return merged
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
def _discover_jsonl_files(transcripts_root: str) -> list[str]:
|
|
762
|
+
recursive_glob_pattern = os.path.join(transcripts_root, "**", JSONL_FILE_GLOB)
|
|
763
|
+
top_level_glob_pattern = os.path.join(transcripts_root, JSONL_FILE_GLOB)
|
|
764
|
+
all_discovered_paths = set(glob.glob(recursive_glob_pattern, recursive=True))
|
|
765
|
+
all_discovered_paths.update(glob.glob(top_level_glob_pattern))
|
|
766
|
+
return sorted(all_discovered_paths)
|
|
767
|
+
|
|
768
|
+
|
|
769
|
+
def run_summary() -> int:
|
|
770
|
+
"""Print the top-10 over-blockers summary and return exit code."""
|
|
771
|
+
try:
|
|
772
|
+
neon_connection = connect_to_neon()
|
|
773
|
+
except Exception as connect_exception:
|
|
774
|
+
if is_operational_error(connect_exception):
|
|
775
|
+
_append_offline_warning_line(connect_exception)
|
|
776
|
+
return EXIT_CODE_EXTRACTOR_ENVIRONMENT_MISSING
|
|
777
|
+
raise
|
|
778
|
+
try:
|
|
779
|
+
with neon_connection.cursor() as neon_cursor:
|
|
780
|
+
neon_cursor.execute(TOP_BLOCKERS_LAST_24_HOURS_SQL)
|
|
781
|
+
all_result_rows = neon_cursor.fetchall()
|
|
782
|
+
finally:
|
|
783
|
+
try:
|
|
784
|
+
neon_connection.close()
|
|
785
|
+
except Exception:
|
|
786
|
+
pass
|
|
787
|
+
if not all_result_rows:
|
|
788
|
+
print(SUMMARY_NO_NEW_BLOCKS_MESSAGE)
|
|
789
|
+
return EXIT_CODE_SUCCESS
|
|
790
|
+
_print_summary_table(all_result_rows)
|
|
791
|
+
return EXIT_CODE_SUCCESS
|
|
792
|
+
|
|
793
|
+
|
|
794
|
+
def _print_summary_table(all_result_rows: Sequence[tuple[object, ...]]) -> None:
|
|
795
|
+
all_preview_rows: list[tuple[str, str, str, str]] = []
|
|
796
|
+
for each_result_row in all_result_rows:
|
|
797
|
+
(
|
|
798
|
+
hook_name_string,
|
|
799
|
+
hook_category_string,
|
|
800
|
+
block_count_integer,
|
|
801
|
+
top_command_preview,
|
|
802
|
+
) = each_result_row
|
|
803
|
+
truncated_preview = (top_command_preview or EMPTY_STRING)[
|
|
804
|
+
:TOP_BLOCKED_COMMAND_PREVIEW_MAX_CHARACTERS
|
|
805
|
+
]
|
|
806
|
+
all_preview_rows.append(
|
|
807
|
+
(
|
|
808
|
+
str(hook_name_string),
|
|
809
|
+
str(hook_category_string),
|
|
810
|
+
str(block_count_integer),
|
|
811
|
+
truncated_preview,
|
|
812
|
+
),
|
|
813
|
+
)
|
|
814
|
+
all_display_rows = [SUMMARY_COLUMN_HEADINGS, *all_preview_rows]
|
|
815
|
+
all_column_widths = [
|
|
816
|
+
max(len(each_row[each_column_index]) for each_row in all_display_rows)
|
|
817
|
+
for each_column_index in range(len(SUMMARY_COLUMN_HEADINGS))
|
|
818
|
+
]
|
|
819
|
+
for each_display_row in all_display_rows:
|
|
820
|
+
formatted_columns = [
|
|
821
|
+
each_cell.ljust(all_column_widths[each_column_index])
|
|
822
|
+
for each_column_index, each_cell in enumerate(each_display_row)
|
|
823
|
+
]
|
|
824
|
+
print(SUMMARY_TABLE_COLUMN_GAP.join(formatted_columns))
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
def run_query(named_query: str) -> int:
|
|
828
|
+
"""Execute a pre-baked SQL file under ``queries/`` and print results."""
|
|
829
|
+
if not re.fullmatch(QUERY_NAME_PATTERN, named_query):
|
|
830
|
+
print(
|
|
831
|
+
f"{INVALID_QUERY_NAME_MESSAGE_PREFIX}{named_query}",
|
|
832
|
+
file=sys.stderr,
|
|
833
|
+
)
|
|
834
|
+
return EXIT_CODE_UNKNOWN_QUERY
|
|
835
|
+
queries_directory = Path(__file__).resolve().parent / QUERIES_DIRECTORY_NAME
|
|
836
|
+
query_file_path = queries_directory / f"{named_query}{SQL_FILE_EXTENSION}"
|
|
837
|
+
if not query_file_path.exists():
|
|
838
|
+
print(f"{UNKNOWN_QUERY_MESSAGE_PREFIX}{named_query}", file=sys.stderr)
|
|
839
|
+
return EXIT_CODE_UNKNOWN_QUERY
|
|
840
|
+
query_text = query_file_path.read_text(encoding="utf-8")
|
|
841
|
+
try:
|
|
842
|
+
neon_connection = connect_to_neon()
|
|
843
|
+
except Exception as connect_exception:
|
|
844
|
+
if is_operational_error(connect_exception):
|
|
845
|
+
_append_offline_warning_line(connect_exception)
|
|
846
|
+
return EXIT_CODE_EXTRACTOR_ENVIRONMENT_MISSING
|
|
847
|
+
raise
|
|
848
|
+
try:
|
|
849
|
+
with neon_connection.cursor() as neon_cursor:
|
|
850
|
+
neon_cursor.execute(query_text)
|
|
851
|
+
all_result_rows = neon_cursor.fetchall()
|
|
852
|
+
all_column_names = [
|
|
853
|
+
each_description[0]
|
|
854
|
+
for each_description in (neon_cursor.description or [])
|
|
855
|
+
]
|
|
856
|
+
finally:
|
|
857
|
+
try:
|
|
858
|
+
neon_connection.close()
|
|
859
|
+
except Exception:
|
|
860
|
+
pass
|
|
861
|
+
if not all_result_rows:
|
|
862
|
+
print(QUERY_NO_ROWS_RETURNED_MESSAGE)
|
|
863
|
+
return EXIT_CODE_SUCCESS
|
|
864
|
+
print(SUMMARY_TABLE_COLUMN_GAP.join(all_column_names))
|
|
865
|
+
for each_result_row in all_result_rows:
|
|
866
|
+
print(
|
|
867
|
+
SUMMARY_TABLE_COLUMN_GAP.join(
|
|
868
|
+
str(each_cell) for each_cell in each_result_row
|
|
869
|
+
)
|
|
870
|
+
)
|
|
871
|
+
return EXIT_CODE_SUCCESS
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
def main() -> int:
|
|
875
|
+
"""Entry point for the hook-log extractor CLI.
|
|
876
|
+
|
|
877
|
+
Supported flags:
|
|
878
|
+
|
|
879
|
+
* ``--summary`` prints the top blockers of the last twenty-four hours.
|
|
880
|
+
* ``--query <name>`` runs a pre-baked SQL file under ``queries/``.
|
|
881
|
+
* ``--full-rebuild`` truncates ``hook_events`` and re-reads every
|
|
882
|
+
JSONL from byte zero.
|
|
883
|
+
* ``--incremental`` is a documented no-op; it selects the default
|
|
884
|
+
byte-offset resumption path that the Stop hook also uses when no
|
|
885
|
+
flags are passed.
|
|
886
|
+
"""
|
|
887
|
+
all_cli_arguments = list(sys.argv[1:])
|
|
888
|
+
if FLAG_SUMMARY in all_cli_arguments:
|
|
889
|
+
return run_summary()
|
|
890
|
+
if FLAG_QUERY in all_cli_arguments:
|
|
891
|
+
flag_index = all_cli_arguments.index(FLAG_QUERY)
|
|
892
|
+
if flag_index + 1 >= len(all_cli_arguments):
|
|
893
|
+
return run_query(DEFAULT_QUERY_FOR_SUMMARY)
|
|
894
|
+
return run_query(all_cli_arguments[flag_index + 1])
|
|
895
|
+
is_full_rebuild_requested = FLAG_FULL_REBUILD in all_cli_arguments
|
|
896
|
+
is_incremental_requested = FLAG_INCREMENTAL in all_cli_arguments
|
|
897
|
+
if is_incremental_requested and is_full_rebuild_requested:
|
|
898
|
+
is_full_rebuild_requested = False
|
|
899
|
+
return run_full_extraction(
|
|
900
|
+
transcripts_root=PROJECTS_TRANSCRIPT_ROOT,
|
|
901
|
+
state_file_path=OFFSET_STATE_FILE,
|
|
902
|
+
full_rebuild=is_full_rebuild_requested,
|
|
903
|
+
)
|
|
904
|
+
|
|
905
|
+
|
|
906
|
+
if __name__ == "__main__":
|
|
907
|
+
sys.exit(main())
|