claude-dev-env 1.30.0 → 1.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAUDE.md +8 -0
- package/agents/clean-coder.md +275 -111
- package/agents/code-quality-agent.md +196 -209
- package/bin/install.mjs +81 -0
- package/bin/install.test.mjs +158 -0
- package/bin/install_mypy_ini.mjs +51 -0
- package/bin/install_mypy_ini.test.mjs +121 -0
- package/commands/hook-log-extract.md +70 -0
- package/commands/hook-log-init.md +76 -0
- package/docs/CODE_RULES.md +40 -0
- package/hooks/blocking/code_rules_enforcer.py +5 -3
- package/hooks/blocking/destructive_command_blocker.py +187 -0
- package/hooks/blocking/question_to_user_enforcer.py +140 -0
- package/hooks/blocking/test_code_rules_enforcer_file_global_constants.py +39 -0
- package/hooks/blocking/test_destructive_command_blocker.py +397 -0
- package/hooks/blocking/test_question_to_user_enforcer.py +163 -0
- package/hooks/config/hook_log_extractor_constants.py +221 -0
- package/hooks/config/messages.py +3 -0
- package/hooks/config/test_hook_log_extractor_constants.py +96 -0
- package/hooks/config/test_messages.py +5 -0
- package/hooks/diagnostic/hook_log_extractor.py +907 -0
- package/hooks/diagnostic/hook_log_init.py +202 -0
- package/hooks/diagnostic/hook_log_stop_wrapper.py +84 -0
- package/hooks/diagnostic/migrations/2026-04-25-drop-themes-hook-events.sql +3 -0
- package/hooks/diagnostic/migrations/README.md +77 -0
- package/hooks/diagnostic/queries/block_details_for_hook.sql +26 -0
- package/hooks/diagnostic/queries/blocks_by_category.sql +10 -0
- package/hooks/diagnostic/queries/blocks_by_tool.sql +9 -0
- package/hooks/diagnostic/queries/blocks_last_7_days.sql +11 -0
- package/hooks/diagnostic/queries/top_blockers_last_24_hours.sql +12 -0
- package/hooks/diagnostic/queries/top_blockers_overall.sql +12 -0
- package/hooks/diagnostic/requirements-hook-logs-dev.txt +2 -0
- package/hooks/diagnostic/requirements-hook-logs.txt +1 -0
- package/hooks/diagnostic/schema.sql +51 -0
- package/hooks/diagnostic/test_hook_log_extractor.py +1531 -0
- package/hooks/diagnostic/test_hook_log_init.py +227 -0
- package/hooks/diagnostic/test_hook_log_stop_wrapper.py +98 -0
- package/hooks/hooks.json +10 -0
- package/package.json +1 -1
- package/rules/ask-user-question-required.md +44 -0
- package/scripts/config/test_spec_implementer_prompt.py +0 -4
- package/scripts/test_groq_bugteam_spec.py +0 -8
|
@@ -0,0 +1,1531 @@
|
|
|
1
|
+
"""Failing-first tests for hook_log_extractor.
|
|
2
|
+
|
|
3
|
+
Covers category derivation (15 known + uncategorized fallback), outcome
|
|
4
|
+
mapping (4 attachment types), excerpt truncation, offset advance,
|
|
5
|
+
idempotence via ON CONFLICT, offline graceful fallback, and batched
|
|
6
|
+
INSERT shape. psycopg is mocked at the connect boundary.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import contextlib
|
|
12
|
+
import errno
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
import threading
|
|
16
|
+
import time
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
from unittest.mock import MagicMock, patch
|
|
20
|
+
|
|
21
|
+
import pytest
|
|
22
|
+
|
|
23
|
+
_HOOKS_ROOT = Path(__file__).resolve().parent.parent
|
|
24
|
+
if str(_HOOKS_ROOT) not in sys.path:
|
|
25
|
+
sys.path.insert(0, str(_HOOKS_ROOT))
|
|
26
|
+
|
|
27
|
+
from diagnostic import hook_log_extractor
|
|
28
|
+
from config.hook_log_extractor_constants import (
|
|
29
|
+
COMMAND_EXCERPT_MAX_CHARACTERS,
|
|
30
|
+
EXIT_CODE_UNKNOWN_QUERY,
|
|
31
|
+
HOOK_CATEGORY_UNCATEGORIZED,
|
|
32
|
+
KNOWN_HOOK_CATEGORIES,
|
|
33
|
+
NEON_DATABASE_URL_ENVIRONMENT_VARIABLE,
|
|
34
|
+
OUTCOME_ADDED_CONTEXT,
|
|
35
|
+
OUTCOME_BLOCKED,
|
|
36
|
+
OUTCOME_NON_BLOCKING_ERROR,
|
|
37
|
+
OUTCOME_SUCCESS,
|
|
38
|
+
OUTCOME_SYSTEM_MESSAGE,
|
|
39
|
+
STDERR_EXCERPT_MAX_CHARACTERS,
|
|
40
|
+
STDOUT_EXCERPT_MAX_CHARACTERS,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _make_success_line(
|
|
45
|
+
session_id: str = "session-alpha",
|
|
46
|
+
hook_name: str = "PreToolUse:Bash",
|
|
47
|
+
hook_event: str = "PreToolUse",
|
|
48
|
+
tool_use_id: str = "toolu_001",
|
|
49
|
+
command: str = "python C:/Users/jon/.claude/hooks/blocking/destructive_command_blocker.py",
|
|
50
|
+
stdout: str = "ok\n",
|
|
51
|
+
stderr: str = "",
|
|
52
|
+
exit_code: int = 0,
|
|
53
|
+
duration_ms: int = 42,
|
|
54
|
+
timestamp: str = "2026-04-24T13:32:07.978Z",
|
|
55
|
+
cwd: str = "Y:\\Projects\\repo",
|
|
56
|
+
git_branch: str = "main",
|
|
57
|
+
) -> str:
|
|
58
|
+
record = {
|
|
59
|
+
"type": "attachment",
|
|
60
|
+
"attachment": {
|
|
61
|
+
"type": "hook_success",
|
|
62
|
+
"hookName": hook_name,
|
|
63
|
+
"hookEvent": hook_event,
|
|
64
|
+
"toolUseID": tool_use_id,
|
|
65
|
+
"command": command,
|
|
66
|
+
"stdout": stdout,
|
|
67
|
+
"stderr": stderr,
|
|
68
|
+
"exitCode": exit_code,
|
|
69
|
+
"durationMs": duration_ms,
|
|
70
|
+
},
|
|
71
|
+
"timestamp": timestamp,
|
|
72
|
+
"sessionId": session_id,
|
|
73
|
+
"cwd": cwd,
|
|
74
|
+
"gitBranch": git_branch,
|
|
75
|
+
}
|
|
76
|
+
return json.dumps(record)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _make_blocking_line(
|
|
80
|
+
session_id: str = "session-alpha",
|
|
81
|
+
hook_name: str = "PreToolUse:Bash",
|
|
82
|
+
hook_event: str = "PreToolUse",
|
|
83
|
+
tool_use_id: str = "toolu_002",
|
|
84
|
+
blocking_message: str = "blocked for reason",
|
|
85
|
+
command: str = "python C:/Users/jon/.claude/hooks/blocking/content_search_to_zoekt_redirector.py",
|
|
86
|
+
timestamp: str = "2026-04-24T13:32:54.293Z",
|
|
87
|
+
cwd: str = "Y:\\Projects\\repo",
|
|
88
|
+
git_branch: str = "main",
|
|
89
|
+
) -> str:
|
|
90
|
+
record = {
|
|
91
|
+
"type": "attachment",
|
|
92
|
+
"attachment": {
|
|
93
|
+
"type": "hook_blocking_error",
|
|
94
|
+
"hookName": hook_name,
|
|
95
|
+
"hookEvent": hook_event,
|
|
96
|
+
"toolUseID": tool_use_id,
|
|
97
|
+
"blockingError": {
|
|
98
|
+
"blockingError": blocking_message,
|
|
99
|
+
"command": command,
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
"timestamp": timestamp,
|
|
103
|
+
"sessionId": session_id,
|
|
104
|
+
"cwd": cwd,
|
|
105
|
+
"gitBranch": git_branch,
|
|
106
|
+
}
|
|
107
|
+
return json.dumps(record)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _make_system_message_line(
|
|
111
|
+
session_id: str = "session-alpha",
|
|
112
|
+
hook_name: str = "PreToolUse:Bash",
|
|
113
|
+
hook_event: str = "PreToolUse",
|
|
114
|
+
tool_use_id: str = "toolu_003",
|
|
115
|
+
content: str = "[destructive-gate] blocked",
|
|
116
|
+
timestamp: str = "2026-04-24T13:32:54.293Z",
|
|
117
|
+
cwd: str = "Y:\\Projects\\repo",
|
|
118
|
+
git_branch: str = "main",
|
|
119
|
+
) -> str:
|
|
120
|
+
record = {
|
|
121
|
+
"type": "attachment",
|
|
122
|
+
"attachment": {
|
|
123
|
+
"type": "hook_system_message",
|
|
124
|
+
"hookName": hook_name,
|
|
125
|
+
"hookEvent": hook_event,
|
|
126
|
+
"toolUseID": tool_use_id,
|
|
127
|
+
"content": content,
|
|
128
|
+
},
|
|
129
|
+
"timestamp": timestamp,
|
|
130
|
+
"sessionId": session_id,
|
|
131
|
+
"cwd": cwd,
|
|
132
|
+
"gitBranch": git_branch,
|
|
133
|
+
}
|
|
134
|
+
return json.dumps(record)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _make_additional_context_line(
|
|
138
|
+
session_id: str = "session-alpha",
|
|
139
|
+
hook_name: str = "PreToolUse:Bash",
|
|
140
|
+
hook_event: str = "PreToolUse",
|
|
141
|
+
tool_use_id: str = "toolu_004",
|
|
142
|
+
content: list[str] | None = None,
|
|
143
|
+
timestamp: str = "2026-04-24T13:32:54.293Z",
|
|
144
|
+
cwd: str = "Y:\\Projects\\repo",
|
|
145
|
+
git_branch: str = "main",
|
|
146
|
+
) -> str:
|
|
147
|
+
record = {
|
|
148
|
+
"type": "attachment",
|
|
149
|
+
"attachment": {
|
|
150
|
+
"type": "hook_additional_context",
|
|
151
|
+
"hookName": hook_name,
|
|
152
|
+
"hookEvent": hook_event,
|
|
153
|
+
"toolUseID": tool_use_id,
|
|
154
|
+
"content": content or ["extra context"],
|
|
155
|
+
},
|
|
156
|
+
"timestamp": timestamp,
|
|
157
|
+
"sessionId": session_id,
|
|
158
|
+
"cwd": cwd,
|
|
159
|
+
"gitBranch": git_branch,
|
|
160
|
+
}
|
|
161
|
+
return json.dumps(record)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
@pytest.mark.parametrize(
|
|
165
|
+
"expected_category",
|
|
166
|
+
sorted(KNOWN_HOOK_CATEGORIES),
|
|
167
|
+
)
|
|
168
|
+
def test_derive_category_accepts_each_known_category(expected_category: str) -> None:
|
|
169
|
+
script_path = f"python C:/Users/jon/.claude/hooks/{expected_category}/some_hook.py"
|
|
170
|
+
assert hook_log_extractor.derive_category(script_path) == expected_category
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def test_derive_category_returns_uncategorized_for_unknown_parent() -> None:
|
|
174
|
+
script_path = "python C:/Users/jon/.claude/hooks/unheard_of_bucket/some_hook.py"
|
|
175
|
+
assert (
|
|
176
|
+
hook_log_extractor.derive_category(script_path) == HOOK_CATEGORY_UNCATEGORIZED
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def test_derive_category_returns_uncategorized_for_empty_path() -> None:
|
|
181
|
+
assert hook_log_extractor.derive_category(None) == HOOK_CATEGORY_UNCATEGORIZED
|
|
182
|
+
assert hook_log_extractor.derive_category("") == HOOK_CATEGORY_UNCATEGORIZED
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def test_derive_category_handles_windows_backslash_paths() -> None:
|
|
186
|
+
script_path = "python C:\\Users\\jon\\.claude\\hooks\\blocking\\destructive_command_blocker.py"
|
|
187
|
+
assert hook_log_extractor.derive_category(script_path) == "blocking"
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def test_derive_category_strips_python_launcher_prefix() -> None:
|
|
191
|
+
script_path = "python3 /home/jon/.claude/hooks/session/code_rules_reminder.py"
|
|
192
|
+
assert hook_log_extractor.derive_category(script_path) == "session"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def test_derive_outcome_maps_hook_success() -> None:
|
|
196
|
+
assert hook_log_extractor.derive_outcome("hook_success") == OUTCOME_SUCCESS
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def test_derive_outcome_maps_hook_blocking_error() -> None:
|
|
200
|
+
assert hook_log_extractor.derive_outcome("hook_blocking_error") == OUTCOME_BLOCKED
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def test_derive_outcome_maps_hook_system_message() -> None:
|
|
204
|
+
assert (
|
|
205
|
+
hook_log_extractor.derive_outcome("hook_system_message")
|
|
206
|
+
== OUTCOME_SYSTEM_MESSAGE
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def test_derive_outcome_maps_hook_additional_context() -> None:
|
|
211
|
+
assert (
|
|
212
|
+
hook_log_extractor.derive_outcome("hook_additional_context")
|
|
213
|
+
== OUTCOME_ADDED_CONTEXT
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def test_derive_outcome_maps_hook_non_blocking_error() -> None:
|
|
218
|
+
assert (
|
|
219
|
+
hook_log_extractor.derive_outcome("hook_non_blocking_error")
|
|
220
|
+
== OUTCOME_NON_BLOCKING_ERROR
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def test_iter_attachment_records_skips_unknown_hook_attachment_type(
|
|
225
|
+
tmp_path: Path,
|
|
226
|
+
) -> None:
|
|
227
|
+
jsonl_path = tmp_path / "session-with-unknown-hook-type.jsonl"
|
|
228
|
+
unknown_type_record = {
|
|
229
|
+
"type": "attachment",
|
|
230
|
+
"attachment": {
|
|
231
|
+
"type": "hook_future_unknown_variant",
|
|
232
|
+
"hookName": "PreToolUse:Bash",
|
|
233
|
+
"hookEvent": "PreToolUse",
|
|
234
|
+
},
|
|
235
|
+
"timestamp": "2026-04-24T13:32:54.293Z",
|
|
236
|
+
"sessionId": "session-alpha",
|
|
237
|
+
"cwd": "Y:/Projects/repo",
|
|
238
|
+
"gitBranch": "main",
|
|
239
|
+
}
|
|
240
|
+
jsonl_path.write_text(
|
|
241
|
+
_make_success_line() + "\n" + json.dumps(unknown_type_record) + "\n",
|
|
242
|
+
encoding="utf-8",
|
|
243
|
+
)
|
|
244
|
+
all_yielded_records = list(
|
|
245
|
+
hook_log_extractor.iter_attachment_records_from_file(
|
|
246
|
+
str(jsonl_path),
|
|
247
|
+
start_offset=0,
|
|
248
|
+
),
|
|
249
|
+
)
|
|
250
|
+
assert len(all_yielded_records) == 1
|
|
251
|
+
first_parsed_record, _line_number, _offset = all_yielded_records[0]
|
|
252
|
+
assert first_parsed_record["attachment"]["type"] == "hook_success"
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def test_derive_outcome_raises_on_unknown_type() -> None:
|
|
256
|
+
with pytest.raises(KeyError):
|
|
257
|
+
hook_log_extractor.derive_outcome("hook_something_else")
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def test_extract_script_path_from_success_record() -> None:
|
|
261
|
+
record_json = _make_success_line(
|
|
262
|
+
command="python C:/Users/jon/.claude/hooks/blocking/foo.py",
|
|
263
|
+
)
|
|
264
|
+
parsed = json.loads(record_json)
|
|
265
|
+
assert (
|
|
266
|
+
hook_log_extractor.extract_script_path(parsed["attachment"])
|
|
267
|
+
== "C:/Users/jon/.claude/hooks/blocking/foo.py"
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def test_extract_script_path_from_blocking_record() -> None:
|
|
272
|
+
record_json = _make_blocking_line(
|
|
273
|
+
command="python3 /home/jon/.claude/hooks/blocking/bar.py",
|
|
274
|
+
)
|
|
275
|
+
parsed = json.loads(record_json)
|
|
276
|
+
assert (
|
|
277
|
+
hook_log_extractor.extract_script_path(parsed["attachment"])
|
|
278
|
+
== "/home/jon/.claude/hooks/blocking/bar.py"
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
|
|
282
|
+
def test_extract_script_path_returns_none_for_system_message() -> None:
|
|
283
|
+
record_json = _make_system_message_line()
|
|
284
|
+
parsed = json.loads(record_json)
|
|
285
|
+
assert hook_log_extractor.extract_script_path(parsed["attachment"]) is None
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def test_excerpt_truncation_respects_command_limit() -> None:
|
|
289
|
+
long_command = "x" * (COMMAND_EXCERPT_MAX_CHARACTERS + 50)
|
|
290
|
+
truncated = hook_log_extractor.truncate_command_excerpt(long_command)
|
|
291
|
+
assert len(truncated) == COMMAND_EXCERPT_MAX_CHARACTERS
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
def test_excerpt_truncation_preserves_short_command() -> None:
|
|
295
|
+
short_command = "python foo.py"
|
|
296
|
+
assert hook_log_extractor.truncate_command_excerpt(short_command) == short_command
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def test_excerpt_truncation_handles_none_command() -> None:
|
|
300
|
+
assert hook_log_extractor.truncate_command_excerpt(None) is None
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def test_excerpt_truncation_respects_stdout_limit() -> None:
|
|
304
|
+
long_stdout = "y" * (STDOUT_EXCERPT_MAX_CHARACTERS + 100)
|
|
305
|
+
truncated = hook_log_extractor.truncate_stdout_excerpt(long_stdout)
|
|
306
|
+
assert len(truncated) == STDOUT_EXCERPT_MAX_CHARACTERS
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def test_excerpt_truncation_respects_stderr_limit() -> None:
|
|
310
|
+
long_stderr = "z" * (STDERR_EXCERPT_MAX_CHARACTERS + 100)
|
|
311
|
+
truncated = hook_log_extractor.truncate_stderr_excerpt(long_stderr)
|
|
312
|
+
assert len(truncated) == STDERR_EXCERPT_MAX_CHARACTERS
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def test_build_row_from_success_attachment() -> None:
|
|
316
|
+
record_json = _make_success_line()
|
|
317
|
+
parsed = json.loads(record_json)
|
|
318
|
+
row = hook_log_extractor.build_row_from_attachment(
|
|
319
|
+
parsed_record=parsed,
|
|
320
|
+
source_jsonl_path="C:/fake/path.jsonl",
|
|
321
|
+
source_line_number=1,
|
|
322
|
+
)
|
|
323
|
+
assert row["session_id"] == "session-alpha"
|
|
324
|
+
assert row["hook_event"] == "PreToolUse"
|
|
325
|
+
assert row["hook_name"] == "PreToolUse:Bash"
|
|
326
|
+
assert row["tool_name"] == "Bash"
|
|
327
|
+
assert row["tool_use_id"] == "toolu_001"
|
|
328
|
+
assert row["outcome"] == OUTCOME_SUCCESS
|
|
329
|
+
assert row["exit_code"] == 0
|
|
330
|
+
assert row["duration_ms"] == 42
|
|
331
|
+
assert row["hook_category"] == "blocking"
|
|
332
|
+
assert row["source_jsonl_path"] == "C:/fake/path.jsonl"
|
|
333
|
+
assert row["source_line_number"] == 1
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def test_build_row_from_blocking_attachment_has_no_exit_code_or_duration() -> None:
|
|
337
|
+
record_json = _make_blocking_line()
|
|
338
|
+
parsed = json.loads(record_json)
|
|
339
|
+
row = hook_log_extractor.build_row_from_attachment(
|
|
340
|
+
parsed_record=parsed,
|
|
341
|
+
source_jsonl_path="C:/fake/path.jsonl",
|
|
342
|
+
source_line_number=2,
|
|
343
|
+
)
|
|
344
|
+
assert row["outcome"] == OUTCOME_BLOCKED
|
|
345
|
+
assert row["exit_code"] is None
|
|
346
|
+
assert row["duration_ms"] is None
|
|
347
|
+
assert (
|
|
348
|
+
row["stderr_excerpt"] is not None
|
|
349
|
+
and "blocked for reason" in row["stderr_excerpt"]
|
|
350
|
+
)
|
|
351
|
+
assert row["hook_category"] == "blocking"
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def test_build_row_from_system_message_uses_content_as_stdout_excerpt() -> None:
|
|
355
|
+
record_json = _make_system_message_line(content="[gate] blocked Bash(grep)")
|
|
356
|
+
parsed = json.loads(record_json)
|
|
357
|
+
row = hook_log_extractor.build_row_from_attachment(
|
|
358
|
+
parsed_record=parsed,
|
|
359
|
+
source_jsonl_path="C:/fake/path.jsonl",
|
|
360
|
+
source_line_number=3,
|
|
361
|
+
)
|
|
362
|
+
assert row["outcome"] == OUTCOME_SYSTEM_MESSAGE
|
|
363
|
+
assert row["stdout_excerpt"] == "[gate] blocked Bash(grep)"
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def test_build_row_from_additional_context_joins_list_content() -> None:
|
|
367
|
+
record_json = _make_additional_context_line(content=["first note", "second note"])
|
|
368
|
+
parsed = json.loads(record_json)
|
|
369
|
+
row = hook_log_extractor.build_row_from_attachment(
|
|
370
|
+
parsed_record=parsed,
|
|
371
|
+
source_jsonl_path="C:/fake/path.jsonl",
|
|
372
|
+
source_line_number=4,
|
|
373
|
+
)
|
|
374
|
+
assert row["outcome"] == OUTCOME_ADDED_CONTEXT
|
|
375
|
+
assert row["stdout_excerpt"] is not None
|
|
376
|
+
assert "first note" in row["stdout_excerpt"]
|
|
377
|
+
assert "second note" in row["stdout_excerpt"]
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def test_iter_attachment_records_skips_non_attachment_rows(tmp_path: Path) -> None:
|
|
381
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
382
|
+
lines = [
|
|
383
|
+
json.dumps({"type": "user", "content": "hi"}),
|
|
384
|
+
_make_success_line(),
|
|
385
|
+
json.dumps({"type": "assistant", "content": "hello"}),
|
|
386
|
+
_make_blocking_line(),
|
|
387
|
+
]
|
|
388
|
+
jsonl_file.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
389
|
+
|
|
390
|
+
all_parsed_records = list(
|
|
391
|
+
hook_log_extractor.iter_attachment_records_from_file(
|
|
392
|
+
str(jsonl_file), start_offset=0
|
|
393
|
+
),
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
assert len(all_parsed_records) == 2
|
|
397
|
+
first_parsed_record, first_line_number, _first_offset = all_parsed_records[0]
|
|
398
|
+
assert first_parsed_record["attachment"]["type"] == "hook_success"
|
|
399
|
+
assert first_line_number == 2
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def test_iter_attachment_records_resumes_from_offset(tmp_path: Path) -> None:
|
|
403
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
404
|
+
first_line = _make_success_line(tool_use_id="toolu_a")
|
|
405
|
+
second_line = _make_success_line(tool_use_id="toolu_b")
|
|
406
|
+
jsonl_file.write_text(first_line + "\n" + second_line + "\n", encoding="utf-8")
|
|
407
|
+
first_line_byte_length = len((first_line + "\n").encode("utf-8"))
|
|
408
|
+
|
|
409
|
+
all_parsed_records = list(
|
|
410
|
+
hook_log_extractor.iter_attachment_records_from_file(
|
|
411
|
+
str(jsonl_file),
|
|
412
|
+
start_offset=first_line_byte_length,
|
|
413
|
+
),
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
assert len(all_parsed_records) == 1
|
|
417
|
+
assert all_parsed_records[0][0]["attachment"]["toolUseID"] == "toolu_b"
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def test_iter_attachment_records_ignores_malformed_json(tmp_path: Path) -> None:
|
|
421
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
422
|
+
lines = [
|
|
423
|
+
"{this is not json",
|
|
424
|
+
_make_success_line(),
|
|
425
|
+
]
|
|
426
|
+
jsonl_file.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
427
|
+
|
|
428
|
+
all_parsed_records = list(
|
|
429
|
+
hook_log_extractor.iter_attachment_records_from_file(
|
|
430
|
+
str(jsonl_file), start_offset=0
|
|
431
|
+
),
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
assert len(all_parsed_records) == 1
|
|
435
|
+
|
|
436
|
+
|
|
437
|
+
def test_load_offsets_returns_empty_when_file_missing(tmp_path: Path) -> None:
|
|
438
|
+
missing_state_file = tmp_path / "does_not_exist.json"
|
|
439
|
+
assert hook_log_extractor.load_offsets(str(missing_state_file)) == {}
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def test_save_and_load_offsets_round_trips(tmp_path: Path) -> None:
|
|
443
|
+
state_file = tmp_path / "nested" / "state.json"
|
|
444
|
+
original_offset_by_path = {
|
|
445
|
+
"C:/foo.jsonl": {"byte_offset": 100, "line_number": 3},
|
|
446
|
+
"C:/bar.jsonl": {"byte_offset": 250, "line_number": 8},
|
|
447
|
+
}
|
|
448
|
+
hook_log_extractor.save_offsets(str(state_file), original_offset_by_path)
|
|
449
|
+
round_tripped = hook_log_extractor.load_offsets(str(state_file))
|
|
450
|
+
assert round_tripped == original_offset_by_path
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def test_insert_rows_batches_uses_execute_values_or_executemany() -> None:
|
|
454
|
+
fake_cursor = MagicMock()
|
|
455
|
+
fake_connection = MagicMock()
|
|
456
|
+
fake_connection.cursor.return_value.__enter__.return_value = fake_cursor
|
|
457
|
+
|
|
458
|
+
all_rows = [
|
|
459
|
+
{
|
|
460
|
+
"event_timestamp": "2026-04-24T13:32:07.978Z",
|
|
461
|
+
"session_id": "s1",
|
|
462
|
+
"cwd": "c",
|
|
463
|
+
"git_branch": "b",
|
|
464
|
+
"hook_event": "PreToolUse",
|
|
465
|
+
"hook_name": "PreToolUse:Bash",
|
|
466
|
+
"hook_category": "blocking",
|
|
467
|
+
"script_path": "s",
|
|
468
|
+
"tool_name": "Bash",
|
|
469
|
+
"tool_use_id": "t",
|
|
470
|
+
"outcome": OUTCOME_SUCCESS,
|
|
471
|
+
"exit_code": 0,
|
|
472
|
+
"duration_ms": 1,
|
|
473
|
+
"command_excerpt": "cmd",
|
|
474
|
+
"stdout_excerpt": "out",
|
|
475
|
+
"stderr_excerpt": "",
|
|
476
|
+
"source_jsonl_path": "/p.jsonl",
|
|
477
|
+
"source_line_number": each_line_number,
|
|
478
|
+
}
|
|
479
|
+
for each_line_number in range(1, 4)
|
|
480
|
+
]
|
|
481
|
+
|
|
482
|
+
hook_log_extractor.insert_rows_batch(fake_connection, all_rows)
|
|
483
|
+
|
|
484
|
+
assert fake_cursor.executemany.called or fake_cursor.execute.called
|
|
485
|
+
|
|
486
|
+
|
|
487
|
+
def test_run_full_extraction_advances_offset(tmp_path: Path) -> None:
|
|
488
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
489
|
+
jsonl_file.write_text(_make_success_line() + "\n", encoding="utf-8")
|
|
490
|
+
|
|
491
|
+
state_file = tmp_path / "offsets.json"
|
|
492
|
+
|
|
493
|
+
fake_connection = MagicMock()
|
|
494
|
+
fake_connection.cursor.return_value.__enter__.return_value = MagicMock()
|
|
495
|
+
|
|
496
|
+
with patch.object(
|
|
497
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
498
|
+
):
|
|
499
|
+
exit_code = hook_log_extractor.run_full_extraction(
|
|
500
|
+
transcripts_root=str(tmp_path),
|
|
501
|
+
state_file_path=str(state_file),
|
|
502
|
+
full_rebuild=False,
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
assert exit_code == 0
|
|
506
|
+
saved_offsets = hook_log_extractor.load_offsets(str(state_file))
|
|
507
|
+
assert str(jsonl_file) in saved_offsets
|
|
508
|
+
assert saved_offsets[str(jsonl_file)]["byte_offset"] > 0
|
|
509
|
+
assert saved_offsets[str(jsonl_file)]["line_number"] >= 1
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def test_run_full_extraction_idempotent_when_offset_at_end(tmp_path: Path) -> None:
|
|
513
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
514
|
+
success_line = _make_success_line() + "\n"
|
|
515
|
+
jsonl_file.write_text(success_line, encoding="utf-8")
|
|
516
|
+
|
|
517
|
+
state_file = tmp_path / "offsets.json"
|
|
518
|
+
hook_log_extractor.save_offsets(
|
|
519
|
+
str(state_file),
|
|
520
|
+
{
|
|
521
|
+
str(jsonl_file): {
|
|
522
|
+
"byte_offset": len(success_line.encode("utf-8")),
|
|
523
|
+
"line_number": 1,
|
|
524
|
+
},
|
|
525
|
+
},
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
fake_cursor = MagicMock()
|
|
529
|
+
fake_connection = MagicMock()
|
|
530
|
+
fake_connection.cursor.return_value.__enter__.return_value = fake_cursor
|
|
531
|
+
|
|
532
|
+
with patch.object(
|
|
533
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
534
|
+
):
|
|
535
|
+
exit_code = hook_log_extractor.run_full_extraction(
|
|
536
|
+
transcripts_root=str(tmp_path),
|
|
537
|
+
state_file_path=str(state_file),
|
|
538
|
+
full_rebuild=False,
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
assert exit_code == 0
|
|
542
|
+
assert not fake_cursor.executemany.called
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def test_run_full_rebuild_clears_offsets_and_truncates(tmp_path: Path) -> None:
|
|
546
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
547
|
+
jsonl_file.write_text(_make_success_line() + "\n", encoding="utf-8")
|
|
548
|
+
|
|
549
|
+
state_file = tmp_path / "offsets.json"
|
|
550
|
+
hook_log_extractor.save_offsets(
|
|
551
|
+
str(state_file),
|
|
552
|
+
{str(jsonl_file): {"byte_offset": 99999, "line_number": 100}},
|
|
553
|
+
)
|
|
554
|
+
|
|
555
|
+
fake_cursor = MagicMock()
|
|
556
|
+
fake_connection = MagicMock()
|
|
557
|
+
fake_connection.cursor.return_value.__enter__.return_value = fake_cursor
|
|
558
|
+
|
|
559
|
+
with patch.object(
|
|
560
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
561
|
+
):
|
|
562
|
+
exit_code = hook_log_extractor.run_full_extraction(
|
|
563
|
+
transcripts_root=str(tmp_path),
|
|
564
|
+
state_file_path=str(state_file),
|
|
565
|
+
full_rebuild=True,
|
|
566
|
+
)
|
|
567
|
+
|
|
568
|
+
assert exit_code == 0
|
|
569
|
+
all_executed_statements = [
|
|
570
|
+
each_call.args[0] for each_call in fake_cursor.execute.call_args_list
|
|
571
|
+
]
|
|
572
|
+
assert any(
|
|
573
|
+
"TRUNCATE" in each_statement.upper()
|
|
574
|
+
for each_statement in all_executed_statements
|
|
575
|
+
)
|
|
576
|
+
saved_offsets_after_rebuild = hook_log_extractor.load_offsets(str(state_file))
|
|
577
|
+
rebuilt_entry = saved_offsets_after_rebuild.get(str(jsonl_file), {})
|
|
578
|
+
assert rebuilt_entry.get("byte_offset", 0) > 0
|
|
579
|
+
assert rebuilt_entry.get("line_number", 0) >= 1
|
|
580
|
+
|
|
581
|
+
|
|
582
|
+
def test_offline_fallback_writes_one_log_line_when_connect_fails(
|
|
583
|
+
tmp_path: Path,
|
|
584
|
+
) -> None:
|
|
585
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
586
|
+
jsonl_file.write_text(_make_success_line() + "\n", encoding="utf-8")
|
|
587
|
+
|
|
588
|
+
state_file = tmp_path / "offsets.json"
|
|
589
|
+
warning_log = tmp_path / "hook-extractor.log"
|
|
590
|
+
|
|
591
|
+
class _FakeOperationalError(Exception):
|
|
592
|
+
pass
|
|
593
|
+
|
|
594
|
+
def _raise(*_args: Any, **_kwargs: Any) -> None:
|
|
595
|
+
raise _FakeOperationalError("boom")
|
|
596
|
+
|
|
597
|
+
with (
|
|
598
|
+
patch.object(hook_log_extractor, "connect_to_neon", side_effect=_raise),
|
|
599
|
+
patch.object(hook_log_extractor, "is_operational_error", return_value=True),
|
|
600
|
+
patch.object(hook_log_extractor, "OFFLINE_WARNING_LOG", str(warning_log)),
|
|
601
|
+
):
|
|
602
|
+
exit_code = hook_log_extractor.run_full_extraction(
|
|
603
|
+
transcripts_root=str(tmp_path),
|
|
604
|
+
state_file_path=str(state_file),
|
|
605
|
+
full_rebuild=False,
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
assert exit_code == 0
|
|
609
|
+
log_contents = warning_log.read_text(encoding="utf-8")
|
|
610
|
+
assert len(log_contents.strip().splitlines()) == 1
|
|
611
|
+
|
|
612
|
+
|
|
613
|
+
def test_tool_name_extracted_from_hook_name_prefix() -> None:
|
|
614
|
+
assert hook_log_extractor.extract_tool_name("PreToolUse:Bash") == "Bash"
|
|
615
|
+
assert hook_log_extractor.extract_tool_name("PreToolUse:Write|Edit") == "Write|Edit"
|
|
616
|
+
assert hook_log_extractor.extract_tool_name("SessionStart") is None
|
|
617
|
+
assert hook_log_extractor.extract_tool_name("UserPromptSubmit") is None
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def test_run_summary_prints_no_new_blocks_when_cursor_empty(
|
|
621
|
+
capsys: pytest.CaptureFixture[str],
|
|
622
|
+
) -> None:
|
|
623
|
+
fake_cursor = MagicMock()
|
|
624
|
+
fake_cursor.fetchall.return_value = []
|
|
625
|
+
fake_connection = MagicMock()
|
|
626
|
+
fake_connection.cursor.return_value.__enter__.return_value = fake_cursor
|
|
627
|
+
|
|
628
|
+
with patch.object(
|
|
629
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
630
|
+
):
|
|
631
|
+
exit_code = hook_log_extractor.run_summary()
|
|
632
|
+
|
|
633
|
+
captured = capsys.readouterr()
|
|
634
|
+
assert exit_code == 0
|
|
635
|
+
assert "No new blocks since last run." in captured.out
|
|
636
|
+
|
|
637
|
+
|
|
638
|
+
def test_run_summary_prints_table_when_rows_returned(
|
|
639
|
+
capsys: pytest.CaptureFixture[str],
|
|
640
|
+
) -> None:
|
|
641
|
+
fake_cursor = MagicMock()
|
|
642
|
+
fake_cursor.fetchall.return_value = [
|
|
643
|
+
("content_search_to_zoekt_redirector.py", "blocking", 7, "Bash(grep foo)"),
|
|
644
|
+
]
|
|
645
|
+
fake_connection = MagicMock()
|
|
646
|
+
fake_connection.cursor.return_value.__enter__.return_value = fake_cursor
|
|
647
|
+
|
|
648
|
+
with patch.object(
|
|
649
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
650
|
+
):
|
|
651
|
+
exit_code = hook_log_extractor.run_summary()
|
|
652
|
+
|
|
653
|
+
captured = capsys.readouterr()
|
|
654
|
+
assert exit_code == 0
|
|
655
|
+
assert "content_search_to_zoekt_redirector.py" in captured.out
|
|
656
|
+
assert "blocking" in captured.out
|
|
657
|
+
assert "7" in captured.out
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def test_run_full_extraction_returns_zero_when_database_url_missing(
|
|
661
|
+
tmp_path: Path,
|
|
662
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
663
|
+
) -> None:
|
|
664
|
+
"""C1: Stop-hook path must exit 0 when NEON URL env var is unset."""
|
|
665
|
+
monkeypatch.delenv(NEON_DATABASE_URL_ENVIRONMENT_VARIABLE, raising=False)
|
|
666
|
+
|
|
667
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
668
|
+
jsonl_file.write_text(_make_success_line() + "\n", encoding="utf-8")
|
|
669
|
+
state_file = tmp_path / "offsets.json"
|
|
670
|
+
warning_log = tmp_path / "hook-extractor.log"
|
|
671
|
+
|
|
672
|
+
with patch.object(hook_log_extractor, "OFFLINE_WARNING_LOG", str(warning_log)):
|
|
673
|
+
exit_code = hook_log_extractor.run_full_extraction(
|
|
674
|
+
transcripts_root=str(tmp_path),
|
|
675
|
+
state_file_path=str(state_file),
|
|
676
|
+
full_rebuild=False,
|
|
677
|
+
)
|
|
678
|
+
|
|
679
|
+
assert exit_code == 0
|
|
680
|
+
assert warning_log.exists()
|
|
681
|
+
warning_text = warning_log.read_text(encoding="utf-8")
|
|
682
|
+
assert "MissingNeonDatabaseUrlError" in warning_text
|
|
683
|
+
|
|
684
|
+
|
|
685
|
+
def test_run_full_extraction_returns_zero_when_psycopg_not_installed(
|
|
686
|
+
tmp_path: Path,
|
|
687
|
+
) -> None:
|
|
688
|
+
"""C10: Stop-hook path must exit 0 when psycopg module is absent."""
|
|
689
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
690
|
+
jsonl_file.write_text(_make_success_line() + "\n", encoding="utf-8")
|
|
691
|
+
state_file = tmp_path / "offsets.json"
|
|
692
|
+
warning_log = tmp_path / "hook-extractor.log"
|
|
693
|
+
|
|
694
|
+
with (
|
|
695
|
+
patch.object(hook_log_extractor, "psycopg", None),
|
|
696
|
+
patch.object(hook_log_extractor, "OFFLINE_WARNING_LOG", str(warning_log)),
|
|
697
|
+
):
|
|
698
|
+
exit_code = hook_log_extractor.run_full_extraction(
|
|
699
|
+
transcripts_root=str(tmp_path),
|
|
700
|
+
state_file_path=str(state_file),
|
|
701
|
+
full_rebuild=False,
|
|
702
|
+
)
|
|
703
|
+
|
|
704
|
+
assert exit_code == 0
|
|
705
|
+
assert warning_log.exists()
|
|
706
|
+
warning_text = warning_log.read_text(encoding="utf-8")
|
|
707
|
+
assert "MissingPsycopgDependencyError" in warning_text
|
|
708
|
+
|
|
709
|
+
|
|
710
|
+
def test_offline_warning_line_does_not_leak_exception_message(
|
|
711
|
+
tmp_path: Path,
|
|
712
|
+
) -> None:
|
|
713
|
+
"""C12: Offline warning log must record only timestamp + class name."""
|
|
714
|
+
warning_log = tmp_path / "hook-extractor.log"
|
|
715
|
+
|
|
716
|
+
class _FakeOperationalError(Exception):
|
|
717
|
+
pass
|
|
718
|
+
|
|
719
|
+
def _raise_with_sensitive_url(*_args: Any, **_kwargs: Any) -> None:
|
|
720
|
+
raise _FakeOperationalError(
|
|
721
|
+
"connection failed to postgres://user:secret@host/db",
|
|
722
|
+
)
|
|
723
|
+
|
|
724
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
725
|
+
jsonl_file.write_text(_make_success_line() + "\n", encoding="utf-8")
|
|
726
|
+
state_file = tmp_path / "offsets.json"
|
|
727
|
+
|
|
728
|
+
with (
|
|
729
|
+
patch.object(
|
|
730
|
+
hook_log_extractor,
|
|
731
|
+
"connect_to_neon",
|
|
732
|
+
side_effect=_raise_with_sensitive_url,
|
|
733
|
+
),
|
|
734
|
+
patch.object(hook_log_extractor, "is_operational_error", return_value=True),
|
|
735
|
+
patch.object(hook_log_extractor, "OFFLINE_WARNING_LOG", str(warning_log)),
|
|
736
|
+
):
|
|
737
|
+
hook_log_extractor.run_full_extraction(
|
|
738
|
+
transcripts_root=str(tmp_path),
|
|
739
|
+
state_file_path=str(state_file),
|
|
740
|
+
full_rebuild=False,
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
warning_text = warning_log.read_text(encoding="utf-8")
|
|
744
|
+
assert "secret" not in warning_text
|
|
745
|
+
assert "postgres://" not in warning_text
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
def test_offline_fallback_still_exits_zero_when_warning_log_write_raises(
|
|
749
|
+
tmp_path: Path,
|
|
750
|
+
) -> None:
|
|
751
|
+
"""Disk-error during warning log write must not break offline-graceful exit.
|
|
752
|
+
|
|
753
|
+
The Stop hook contract requires that connect failures log a warning
|
|
754
|
+
and exit with the documented offline status so session shutdown
|
|
755
|
+
never stalls. A read-only filesystem, a missing parent path, or an
|
|
756
|
+
EACCES on the warning log itself must not propagate and must not
|
|
757
|
+
flip the exit code. This test patches ``io.open`` so only the
|
|
758
|
+
OFFLINE_WARNING_LOG path raises, exercising the real inner
|
|
759
|
+
``try/except OSError`` guard inside ``_append_offline_warning_line``
|
|
760
|
+
rather than monkeypatching the function itself.
|
|
761
|
+
"""
|
|
762
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
763
|
+
jsonl_file.write_text(_make_success_line() + "\n", encoding="utf-8")
|
|
764
|
+
state_file = tmp_path / "offsets.json"
|
|
765
|
+
warning_log = tmp_path / "hook-extractor.log"
|
|
766
|
+
warning_log_path_string = str(warning_log)
|
|
767
|
+
|
|
768
|
+
class _FakeOperationalError(Exception):
|
|
769
|
+
pass
|
|
770
|
+
|
|
771
|
+
def _raise_connection_failure(*_args: Any, **_kwargs: Any) -> None:
|
|
772
|
+
raise _FakeOperationalError("connect failed")
|
|
773
|
+
|
|
774
|
+
real_io_open = hook_log_extractor.io.open
|
|
775
|
+
|
|
776
|
+
def _io_open_blocking_warning_log(
|
|
777
|
+
path_argument: Any, *args: Any, **kwargs: Any
|
|
778
|
+
) -> Any:
|
|
779
|
+
if str(path_argument) == warning_log_path_string:
|
|
780
|
+
raise OSError(errno.EACCES, "permission denied")
|
|
781
|
+
return real_io_open(path_argument, *args, **kwargs)
|
|
782
|
+
|
|
783
|
+
with (
|
|
784
|
+
patch.object(
|
|
785
|
+
hook_log_extractor,
|
|
786
|
+
"connect_to_neon",
|
|
787
|
+
side_effect=_raise_connection_failure,
|
|
788
|
+
),
|
|
789
|
+
patch.object(hook_log_extractor, "is_operational_error", return_value=True),
|
|
790
|
+
patch.object(
|
|
791
|
+
hook_log_extractor, "OFFLINE_WARNING_LOG", warning_log_path_string
|
|
792
|
+
),
|
|
793
|
+
patch.object(
|
|
794
|
+
hook_log_extractor.io,
|
|
795
|
+
"open",
|
|
796
|
+
side_effect=_io_open_blocking_warning_log,
|
|
797
|
+
),
|
|
798
|
+
):
|
|
799
|
+
exit_code = hook_log_extractor.run_full_extraction(
|
|
800
|
+
transcripts_root=str(tmp_path),
|
|
801
|
+
state_file_path=str(state_file),
|
|
802
|
+
full_rebuild=False,
|
|
803
|
+
)
|
|
804
|
+
|
|
805
|
+
assert exit_code == 0
|
|
806
|
+
|
|
807
|
+
|
|
808
|
+
def test_main_accepts_incremental_flag_as_noop(
|
|
809
|
+
tmp_path: Path,
|
|
810
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
811
|
+
) -> None:
|
|
812
|
+
"""C8: ``--incremental`` must be recognized and route to default extraction."""
|
|
813
|
+
captured_arguments: dict[str, object] = {}
|
|
814
|
+
|
|
815
|
+
def _fake_run_full_extraction(
|
|
816
|
+
transcripts_root: str,
|
|
817
|
+
state_file_path: str,
|
|
818
|
+
full_rebuild: bool,
|
|
819
|
+
) -> int:
|
|
820
|
+
captured_arguments["transcripts_root"] = transcripts_root
|
|
821
|
+
captured_arguments["state_file_path"] = state_file_path
|
|
822
|
+
captured_arguments["full_rebuild"] = full_rebuild
|
|
823
|
+
return 0
|
|
824
|
+
|
|
825
|
+
monkeypatch.setattr(sys, "argv", ["hook_log_extractor.py", "--incremental"])
|
|
826
|
+
monkeypatch.setattr(
|
|
827
|
+
hook_log_extractor, "run_full_extraction", _fake_run_full_extraction
|
|
828
|
+
)
|
|
829
|
+
|
|
830
|
+
exit_code = hook_log_extractor.main()
|
|
831
|
+
|
|
832
|
+
assert exit_code == 0
|
|
833
|
+
assert captured_arguments["full_rebuild"] is False
|
|
834
|
+
|
|
835
|
+
|
|
836
|
+
def test_run_query_returns_nonzero_for_unknown_query(
|
|
837
|
+
capsys: pytest.CaptureFixture[str],
|
|
838
|
+
) -> None:
|
|
839
|
+
exit_code = hook_log_extractor.run_query("definitely_not_a_query_name")
|
|
840
|
+
|
|
841
|
+
captured = capsys.readouterr()
|
|
842
|
+
assert exit_code == EXIT_CODE_UNKNOWN_QUERY
|
|
843
|
+
assert "Unknown query" in captured.err
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
def test_run_query_returns_nonzero_for_invalid_query_name(
|
|
847
|
+
capsys: pytest.CaptureFixture[str],
|
|
848
|
+
) -> None:
|
|
849
|
+
exit_code = hook_log_extractor.run_query("../../../etc/passwd")
|
|
850
|
+
|
|
851
|
+
captured = capsys.readouterr()
|
|
852
|
+
assert exit_code == EXIT_CODE_UNKNOWN_QUERY
|
|
853
|
+
assert "Invalid query name" in captured.err
|
|
854
|
+
|
|
855
|
+
|
|
856
|
+
def test_run_query_rejects_uppercase_and_hyphen_names(
|
|
857
|
+
capsys: pytest.CaptureFixture[str],
|
|
858
|
+
) -> None:
|
|
859
|
+
exit_code_upper = hook_log_extractor.run_query("UPPER_CASE")
|
|
860
|
+
exit_code_hyphen = hook_log_extractor.run_query("has-hyphen")
|
|
861
|
+
|
|
862
|
+
captured = capsys.readouterr()
|
|
863
|
+
assert exit_code_upper == EXIT_CODE_UNKNOWN_QUERY
|
|
864
|
+
assert exit_code_hyphen == EXIT_CODE_UNKNOWN_QUERY
|
|
865
|
+
assert captured.err.count("Invalid query name") == 2
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
def test_save_offsets_cleans_up_temp_file_when_replace_fails(
|
|
869
|
+
tmp_path: Path,
|
|
870
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
871
|
+
) -> None:
|
|
872
|
+
state_file = tmp_path / "state.json"
|
|
873
|
+
|
|
874
|
+
def _fail_replace(*_args: Any, **_kwargs: Any) -> None:
|
|
875
|
+
raise OSError("replace failed")
|
|
876
|
+
|
|
877
|
+
monkeypatch.setattr(hook_log_extractor.os, "replace", _fail_replace)
|
|
878
|
+
|
|
879
|
+
with pytest.raises(OSError):
|
|
880
|
+
hook_log_extractor.save_offsets(
|
|
881
|
+
str(state_file),
|
|
882
|
+
{"C:/foo.jsonl": {"byte_offset": 100, "line_number": 2}},
|
|
883
|
+
)
|
|
884
|
+
|
|
885
|
+
leftover_temp_files = list(tmp_path.glob("tmp*"))
|
|
886
|
+
assert leftover_temp_files == []
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
def test_save_offsets_cleans_up_temp_file_when_json_dump_fails(
|
|
890
|
+
tmp_path: Path,
|
|
891
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
892
|
+
) -> None:
|
|
893
|
+
state_file = tmp_path / "state.json"
|
|
894
|
+
|
|
895
|
+
def _fail_dump(*_args: Any, **_kwargs: Any) -> None:
|
|
896
|
+
raise ValueError("dump failed")
|
|
897
|
+
|
|
898
|
+
monkeypatch.setattr(hook_log_extractor.json, "dump", _fail_dump)
|
|
899
|
+
|
|
900
|
+
with pytest.raises(ValueError):
|
|
901
|
+
hook_log_extractor.save_offsets(
|
|
902
|
+
str(state_file),
|
|
903
|
+
{"C:/foo.jsonl": {"byte_offset": 100, "line_number": 2}},
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
leftover_temp_files = list(tmp_path.glob("tmp*"))
|
|
907
|
+
assert leftover_temp_files == []
|
|
908
|
+
|
|
909
|
+
|
|
910
|
+
def test_load_offsets_propagates_os_error_other_than_missing_file(
|
|
911
|
+
tmp_path: Path,
|
|
912
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
913
|
+
) -> None:
|
|
914
|
+
state_file = tmp_path / "state.json"
|
|
915
|
+
state_file.write_text("{}", encoding="utf-8")
|
|
916
|
+
|
|
917
|
+
def _raise_permission(*_args: Any, **_kwargs: Any) -> None:
|
|
918
|
+
raise PermissionError("denied")
|
|
919
|
+
|
|
920
|
+
monkeypatch.setattr(hook_log_extractor.io, "open", _raise_permission)
|
|
921
|
+
|
|
922
|
+
with pytest.raises(PermissionError):
|
|
923
|
+
hook_log_extractor.load_offsets(str(state_file))
|
|
924
|
+
|
|
925
|
+
|
|
926
|
+
def test_load_offsets_returns_empty_for_malformed_json(tmp_path: Path) -> None:
|
|
927
|
+
state_file = tmp_path / "state.json"
|
|
928
|
+
state_file.write_text("not valid json {", encoding="utf-8")
|
|
929
|
+
|
|
930
|
+
assert hook_log_extractor.load_offsets(str(state_file)) == {}
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
def test_iter_attachment_records_accepts_start_line_number(tmp_path: Path) -> None:
|
|
934
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
935
|
+
first_line = _make_success_line(tool_use_id="toolu_a")
|
|
936
|
+
second_line = _make_success_line(tool_use_id="toolu_b")
|
|
937
|
+
jsonl_file.write_text(first_line + "\n" + second_line + "\n", encoding="utf-8")
|
|
938
|
+
first_line_byte_length = len((first_line + "\n").encode("utf-8"))
|
|
939
|
+
|
|
940
|
+
all_parsed_records_with_zero_start = list(
|
|
941
|
+
hook_log_extractor.iter_attachment_records_from_file(
|
|
942
|
+
str(jsonl_file),
|
|
943
|
+
start_offset=first_line_byte_length,
|
|
944
|
+
start_line_number=0,
|
|
945
|
+
),
|
|
946
|
+
)
|
|
947
|
+
all_parsed_records_with_offset_start = list(
|
|
948
|
+
hook_log_extractor.iter_attachment_records_from_file(
|
|
949
|
+
str(jsonl_file),
|
|
950
|
+
start_offset=first_line_byte_length,
|
|
951
|
+
start_line_number=10,
|
|
952
|
+
),
|
|
953
|
+
)
|
|
954
|
+
|
|
955
|
+
assert len(all_parsed_records_with_offset_start) == 1
|
|
956
|
+
_, zero_start_line_number, _ = all_parsed_records_with_zero_start[0]
|
|
957
|
+
_, offset_start_line_number, _ = all_parsed_records_with_offset_start[0]
|
|
958
|
+
assert offset_start_line_number == zero_start_line_number + 10
|
|
959
|
+
|
|
960
|
+
|
|
961
|
+
def test_load_offsets_migrates_bare_int_legacy_entries_to_empty(
|
|
962
|
+
tmp_path: Path,
|
|
963
|
+
) -> None:
|
|
964
|
+
state_file = tmp_path / "state.json"
|
|
965
|
+
legacy_content = json.dumps({"C:/legacy.jsonl": 1234})
|
|
966
|
+
state_file.write_text(legacy_content, encoding="utf-8")
|
|
967
|
+
warning_log = tmp_path / "hook-extractor.log"
|
|
968
|
+
|
|
969
|
+
with patch.object(hook_log_extractor, "OFFLINE_WARNING_LOG", str(warning_log)):
|
|
970
|
+
loaded_offsets = hook_log_extractor.load_offsets(str(state_file))
|
|
971
|
+
|
|
972
|
+
assert loaded_offsets == {}
|
|
973
|
+
assert warning_log.exists()
|
|
974
|
+
assert "legacy_offsets_format" in warning_log.read_text(encoding="utf-8")
|
|
975
|
+
|
|
976
|
+
|
|
977
|
+
def test_load_offsets_ignores_legacy_warning_write_failure(
|
|
978
|
+
tmp_path: Path,
|
|
979
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
980
|
+
) -> None:
|
|
981
|
+
state_file = tmp_path / "state.json"
|
|
982
|
+
legacy_content = json.dumps({"C:/legacy.jsonl": 1234})
|
|
983
|
+
state_file.write_text(legacy_content, encoding="utf-8")
|
|
984
|
+
warning_log = tmp_path / "hook-extractor.log"
|
|
985
|
+
|
|
986
|
+
real_io_open = hook_log_extractor.io.open
|
|
987
|
+
|
|
988
|
+
def _io_open_fails_only_for_warning_log(
|
|
989
|
+
opened_file_path: str,
|
|
990
|
+
*args: Any,
|
|
991
|
+
**kwargs: Any,
|
|
992
|
+
) -> Any:
|
|
993
|
+
if opened_file_path == str(warning_log):
|
|
994
|
+
raise OSError("read-only filesystem")
|
|
995
|
+
return real_io_open(opened_file_path, *args, **kwargs)
|
|
996
|
+
|
|
997
|
+
monkeypatch.setattr(
|
|
998
|
+
hook_log_extractor.io, "open", _io_open_fails_only_for_warning_log
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
with patch.object(hook_log_extractor, "OFFLINE_WARNING_LOG", str(warning_log)):
|
|
1002
|
+
loaded_offsets = hook_log_extractor.load_offsets(str(state_file))
|
|
1003
|
+
|
|
1004
|
+
assert loaded_offsets == {}
|
|
1005
|
+
|
|
1006
|
+
|
|
1007
|
+
def test_save_and_load_offsets_round_trips_new_shape(tmp_path: Path) -> None:
|
|
1008
|
+
state_file = tmp_path / "nested" / "state.json"
|
|
1009
|
+
original_offset_by_path = {
|
|
1010
|
+
"C:/foo.jsonl": {"byte_offset": 100, "line_number": 2},
|
|
1011
|
+
"C:/bar.jsonl": {"byte_offset": 250, "line_number": 5},
|
|
1012
|
+
}
|
|
1013
|
+
hook_log_extractor.save_offsets(str(state_file), original_offset_by_path)
|
|
1014
|
+
round_tripped = hook_log_extractor.load_offsets(str(state_file))
|
|
1015
|
+
assert round_tripped == original_offset_by_path
|
|
1016
|
+
|
|
1017
|
+
|
|
1018
|
+
def test_run_full_extraction_skips_transcripts_deleted_mid_run(
|
|
1019
|
+
tmp_path: Path,
|
|
1020
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
1021
|
+
) -> None:
|
|
1022
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
1023
|
+
jsonl_file.write_text(_make_success_line() + "\n", encoding="utf-8")
|
|
1024
|
+
state_file = tmp_path / "offsets.json"
|
|
1025
|
+
|
|
1026
|
+
real_exists = hook_log_extractor.os.path.exists
|
|
1027
|
+
|
|
1028
|
+
def _return_false_for_target(each_path: str) -> bool:
|
|
1029
|
+
if each_path == str(jsonl_file):
|
|
1030
|
+
return False
|
|
1031
|
+
return real_exists(each_path)
|
|
1032
|
+
|
|
1033
|
+
fake_connection = MagicMock()
|
|
1034
|
+
fake_connection.cursor.return_value.__enter__.return_value = MagicMock()
|
|
1035
|
+
|
|
1036
|
+
with (
|
|
1037
|
+
patch.object(
|
|
1038
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
1039
|
+
),
|
|
1040
|
+
patch.object(
|
|
1041
|
+
hook_log_extractor.os.path,
|
|
1042
|
+
"exists",
|
|
1043
|
+
side_effect=_return_false_for_target,
|
|
1044
|
+
),
|
|
1045
|
+
):
|
|
1046
|
+
exit_code = hook_log_extractor.run_full_extraction(
|
|
1047
|
+
transcripts_root=str(tmp_path),
|
|
1048
|
+
state_file_path=str(state_file),
|
|
1049
|
+
full_rebuild=False,
|
|
1050
|
+
)
|
|
1051
|
+
|
|
1052
|
+
assert exit_code == 0
|
|
1053
|
+
|
|
1054
|
+
|
|
1055
|
+
def test_iter_attachment_records_exposes_final_line_number_after_trailing_non_attachment(
|
|
1056
|
+
tmp_path: Path,
|
|
1057
|
+
) -> None:
|
|
1058
|
+
"""Final line count must include non-attachment lines after last yield."""
|
|
1059
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
1060
|
+
lines = [
|
|
1061
|
+
_make_success_line(tool_use_id="toolu_a"),
|
|
1062
|
+
json.dumps({"type": "user", "content": "noise"}),
|
|
1063
|
+
json.dumps({"type": "assistant", "content": "more noise"}),
|
|
1064
|
+
]
|
|
1065
|
+
jsonl_file.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
1066
|
+
|
|
1067
|
+
attachment_iterator = hook_log_extractor.iter_attachment_records_from_file(
|
|
1068
|
+
str(jsonl_file),
|
|
1069
|
+
start_offset=0,
|
|
1070
|
+
)
|
|
1071
|
+
all_yielded = list(attachment_iterator)
|
|
1072
|
+
|
|
1073
|
+
assert len(all_yielded) == 1
|
|
1074
|
+
assert attachment_iterator.final_line_number == 3
|
|
1075
|
+
|
|
1076
|
+
|
|
1077
|
+
def test_run_full_extraction_persists_lines_consumed_with_trailing_noise(
|
|
1078
|
+
tmp_path: Path,
|
|
1079
|
+
) -> None:
|
|
1080
|
+
"""Resumption must not miscount when non-attachment lines follow the last yield."""
|
|
1081
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
1082
|
+
lines = [
|
|
1083
|
+
_make_success_line(tool_use_id="toolu_a"),
|
|
1084
|
+
json.dumps({"type": "user", "content": "trailing noise"}),
|
|
1085
|
+
]
|
|
1086
|
+
jsonl_file.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
1087
|
+
|
|
1088
|
+
state_file = tmp_path / "offsets.json"
|
|
1089
|
+
|
|
1090
|
+
fake_cursor = MagicMock()
|
|
1091
|
+
fake_connection = MagicMock()
|
|
1092
|
+
fake_connection.cursor.return_value.__enter__.return_value = fake_cursor
|
|
1093
|
+
|
|
1094
|
+
with patch.object(
|
|
1095
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
1096
|
+
):
|
|
1097
|
+
hook_log_extractor.run_full_extraction(
|
|
1098
|
+
transcripts_root=str(tmp_path),
|
|
1099
|
+
state_file_path=str(state_file),
|
|
1100
|
+
full_rebuild=False,
|
|
1101
|
+
)
|
|
1102
|
+
|
|
1103
|
+
saved_offsets = hook_log_extractor.load_offsets(str(state_file))
|
|
1104
|
+
assert saved_offsets[str(jsonl_file)]["line_number"] == 2
|
|
1105
|
+
|
|
1106
|
+
|
|
1107
|
+
def test_iter_attachment_records_final_line_number_when_no_yields(tmp_path: Path) -> None:
|
|
1108
|
+
"""Final line count reflects lines consumed even when zero records yielded."""
|
|
1109
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
1110
|
+
lines = [
|
|
1111
|
+
json.dumps({"type": "user", "content": "a"}),
|
|
1112
|
+
json.dumps({"type": "assistant", "content": "b"}),
|
|
1113
|
+
]
|
|
1114
|
+
jsonl_file.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
1115
|
+
|
|
1116
|
+
attachment_iterator = hook_log_extractor.iter_attachment_records_from_file(
|
|
1117
|
+
str(jsonl_file),
|
|
1118
|
+
start_offset=0,
|
|
1119
|
+
)
|
|
1120
|
+
all_yielded = list(attachment_iterator)
|
|
1121
|
+
|
|
1122
|
+
assert all_yielded == []
|
|
1123
|
+
assert attachment_iterator.final_line_number == 2
|
|
1124
|
+
|
|
1125
|
+
|
|
1126
|
+
def test_iter_attachment_records_exposes_final_byte_offset_after_drain(
|
|
1127
|
+
tmp_path: Path,
|
|
1128
|
+
) -> None:
|
|
1129
|
+
"""Iterator must report byte position reached after EOF, even with zero yields."""
|
|
1130
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
1131
|
+
lines = [
|
|
1132
|
+
json.dumps({"type": "user", "content": "a"}),
|
|
1133
|
+
json.dumps({"type": "assistant", "content": "b"}),
|
|
1134
|
+
]
|
|
1135
|
+
full_bytes = ("\n".join(lines) + "\n").encode("utf-8")
|
|
1136
|
+
jsonl_file.write_bytes(full_bytes)
|
|
1137
|
+
|
|
1138
|
+
attachment_iterator = hook_log_extractor.iter_attachment_records_from_file(
|
|
1139
|
+
str(jsonl_file),
|
|
1140
|
+
start_offset=0,
|
|
1141
|
+
)
|
|
1142
|
+
list(attachment_iterator)
|
|
1143
|
+
|
|
1144
|
+
assert attachment_iterator.final_byte_offset == len(full_bytes)
|
|
1145
|
+
|
|
1146
|
+
|
|
1147
|
+
def test_run_full_extraction_persists_offset_with_only_non_hook_attachments(
|
|
1148
|
+
tmp_path: Path,
|
|
1149
|
+
) -> None:
|
|
1150
|
+
"""Offset must advance when iterator drained file yielding zero hook records."""
|
|
1151
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
1152
|
+
lines = [
|
|
1153
|
+
json.dumps({"type": "user", "content": "noise"}),
|
|
1154
|
+
json.dumps({"type": "assistant", "content": "more noise"}),
|
|
1155
|
+
]
|
|
1156
|
+
jsonl_file.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
1157
|
+
|
|
1158
|
+
state_file = tmp_path / "offsets.json"
|
|
1159
|
+
|
|
1160
|
+
fake_cursor = MagicMock()
|
|
1161
|
+
fake_connection = MagicMock()
|
|
1162
|
+
fake_connection.cursor.return_value.__enter__.return_value = fake_cursor
|
|
1163
|
+
|
|
1164
|
+
with patch.object(
|
|
1165
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
1166
|
+
):
|
|
1167
|
+
hook_log_extractor.run_full_extraction(
|
|
1168
|
+
transcripts_root=str(tmp_path),
|
|
1169
|
+
state_file_path=str(state_file),
|
|
1170
|
+
full_rebuild=False,
|
|
1171
|
+
)
|
|
1172
|
+
|
|
1173
|
+
saved_offsets = hook_log_extractor.load_offsets(str(state_file))
|
|
1174
|
+
assert str(jsonl_file) in saved_offsets
|
|
1175
|
+
persisted_byte_offset = saved_offsets[str(jsonl_file)]["byte_offset"]
|
|
1176
|
+
assert persisted_byte_offset == jsonl_file.stat().st_size
|
|
1177
|
+
|
|
1178
|
+
|
|
1179
|
+
def test_run_full_extraction_persists_final_offset_not_file_size(
|
|
1180
|
+
tmp_path: Path,
|
|
1181
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
1182
|
+
) -> None:
|
|
1183
|
+
"""Persisted byte_offset must equal iterator.final_byte_offset.
|
|
1184
|
+
|
|
1185
|
+
Iterator-derived persistence is proven by equality between the
|
|
1186
|
+
saved offset and the iterator's ``final_byte_offset`` for a
|
|
1187
|
+
transcript that has been read to completion. The iterator's final
|
|
1188
|
+
offset matches the known initial byte length, and the persisted
|
|
1189
|
+
value matches that same iterator-reported value, so the save path
|
|
1190
|
+
sources its number from the iterator rather than from
|
|
1191
|
+
``os.path.getsize`` (which the production code no longer calls).
|
|
1192
|
+
"""
|
|
1193
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
1194
|
+
initial_line_bytes = (_make_success_line() + "\n").encode("utf-8")
|
|
1195
|
+
jsonl_file.write_bytes(initial_line_bytes)
|
|
1196
|
+
initial_byte_length = len(initial_line_bytes)
|
|
1197
|
+
|
|
1198
|
+
state_file = tmp_path / "offsets.json"
|
|
1199
|
+
|
|
1200
|
+
captured_iterators: list[hook_log_extractor.AttachmentRecordIterator] = []
|
|
1201
|
+
real_iterator_factory = hook_log_extractor.iter_attachment_records_from_file
|
|
1202
|
+
|
|
1203
|
+
def _capturing_iterator_factory(
|
|
1204
|
+
jsonl_file_path: str,
|
|
1205
|
+
start_offset: int,
|
|
1206
|
+
start_line_number: int = 0,
|
|
1207
|
+
) -> hook_log_extractor.AttachmentRecordIterator:
|
|
1208
|
+
produced_iterator = real_iterator_factory(
|
|
1209
|
+
jsonl_file_path,
|
|
1210
|
+
start_offset=start_offset,
|
|
1211
|
+
start_line_number=start_line_number,
|
|
1212
|
+
)
|
|
1213
|
+
captured_iterators.append(produced_iterator)
|
|
1214
|
+
return produced_iterator
|
|
1215
|
+
|
|
1216
|
+
monkeypatch.setattr(
|
|
1217
|
+
hook_log_extractor,
|
|
1218
|
+
"iter_attachment_records_from_file",
|
|
1219
|
+
_capturing_iterator_factory,
|
|
1220
|
+
)
|
|
1221
|
+
|
|
1222
|
+
fake_cursor = MagicMock()
|
|
1223
|
+
fake_connection = MagicMock()
|
|
1224
|
+
fake_connection.cursor.return_value.__enter__.return_value = fake_cursor
|
|
1225
|
+
|
|
1226
|
+
with patch.object(
|
|
1227
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
1228
|
+
):
|
|
1229
|
+
hook_log_extractor.run_full_extraction(
|
|
1230
|
+
transcripts_root=str(tmp_path),
|
|
1231
|
+
state_file_path=str(state_file),
|
|
1232
|
+
full_rebuild=False,
|
|
1233
|
+
)
|
|
1234
|
+
|
|
1235
|
+
saved_offsets = hook_log_extractor.load_offsets(str(state_file))
|
|
1236
|
+
assert captured_iterators, "iterator was never produced"
|
|
1237
|
+
iterator_reported_final_offset = captured_iterators[0].final_byte_offset
|
|
1238
|
+
assert iterator_reported_final_offset == initial_byte_length
|
|
1239
|
+
assert (
|
|
1240
|
+
saved_offsets[str(jsonl_file)]["byte_offset"]
|
|
1241
|
+
== iterator_reported_final_offset
|
|
1242
|
+
)
|
|
1243
|
+
|
|
1244
|
+
|
|
1245
|
+
def test_save_offsets_is_serialized_across_threads(tmp_path: Path) -> None:
|
|
1246
|
+
"""Locked read-modify-write cycles across threads must not clobber entries."""
|
|
1247
|
+
state_file = tmp_path / "offsets.json"
|
|
1248
|
+
hook_log_extractor.save_offsets(str(state_file), {})
|
|
1249
|
+
|
|
1250
|
+
def _writer_for_path(writer_path: str) -> None:
|
|
1251
|
+
with hook_log_extractor._acquire_offsets_lock(str(state_file)):
|
|
1252
|
+
existing_offsets = hook_log_extractor.load_offsets(str(state_file))
|
|
1253
|
+
existing_offsets[writer_path] = {"byte_offset": 100, "line_number": 1}
|
|
1254
|
+
hook_log_extractor.save_offsets(str(state_file), existing_offsets)
|
|
1255
|
+
|
|
1256
|
+
concurrent_threads = [
|
|
1257
|
+
threading.Thread(target=_writer_for_path, args=(f"C:/file_{each_index}.jsonl",))
|
|
1258
|
+
for each_index in range(5)
|
|
1259
|
+
]
|
|
1260
|
+
for each_thread in concurrent_threads:
|
|
1261
|
+
each_thread.start()
|
|
1262
|
+
for each_thread in concurrent_threads:
|
|
1263
|
+
each_thread.join()
|
|
1264
|
+
|
|
1265
|
+
final_offsets = hook_log_extractor.load_offsets(str(state_file))
|
|
1266
|
+
assert len(final_offsets) == 5
|
|
1267
|
+
for each_index in range(5):
|
|
1268
|
+
assert f"C:/file_{each_index}.jsonl" in final_offsets
|
|
1269
|
+
|
|
1270
|
+
|
|
1271
|
+
def test_run_full_extraction_holds_lock_across_load_and_save(
|
|
1272
|
+
tmp_path: Path,
|
|
1273
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
1274
|
+
) -> None:
|
|
1275
|
+
"""The extraction cycle must acquire a lock around load→mutate→save."""
|
|
1276
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
1277
|
+
jsonl_file.write_text(_make_success_line() + "\n", encoding="utf-8")
|
|
1278
|
+
state_file = tmp_path / "offsets.json"
|
|
1279
|
+
|
|
1280
|
+
lock_acquisition_count = {"count": 0}
|
|
1281
|
+
|
|
1282
|
+
real_lock_helper = hook_log_extractor._acquire_offsets_lock
|
|
1283
|
+
|
|
1284
|
+
def _counting_lock_helper(state_file_path: str) -> Any:
|
|
1285
|
+
lock_acquisition_count["count"] += 1
|
|
1286
|
+
return real_lock_helper(state_file_path)
|
|
1287
|
+
|
|
1288
|
+
monkeypatch.setattr(
|
|
1289
|
+
hook_log_extractor, "_acquire_offsets_lock", _counting_lock_helper
|
|
1290
|
+
)
|
|
1291
|
+
|
|
1292
|
+
fake_cursor = MagicMock()
|
|
1293
|
+
fake_connection = MagicMock()
|
|
1294
|
+
fake_connection.cursor.return_value.__enter__.return_value = fake_cursor
|
|
1295
|
+
|
|
1296
|
+
with patch.object(
|
|
1297
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
1298
|
+
):
|
|
1299
|
+
hook_log_extractor.run_full_extraction(
|
|
1300
|
+
transcripts_root=str(tmp_path),
|
|
1301
|
+
state_file_path=str(state_file),
|
|
1302
|
+
full_rebuild=False,
|
|
1303
|
+
)
|
|
1304
|
+
|
|
1305
|
+
assert lock_acquisition_count["count"] >= 1
|
|
1306
|
+
|
|
1307
|
+
|
|
1308
|
+
def test_lock_file_handle_blocking_reraises_permanent_oserror_quickly(
|
|
1309
|
+
tmp_path: Path,
|
|
1310
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
1311
|
+
) -> None:
|
|
1312
|
+
"""Permanent OSErrors (e.g. EBADF) must not be retried — re-raise fast.
|
|
1313
|
+
|
|
1314
|
+
EACCES is the documented contention errno for ``LK_NBLCK`` per the
|
|
1315
|
+
Microsoft ``_locking`` spec, so this test uses ``EBADF`` (invalid
|
|
1316
|
+
file descriptor) as a genuinely permanent failure that must bubble
|
|
1317
|
+
up without consuming the retry budget.
|
|
1318
|
+
"""
|
|
1319
|
+
if hook_log_extractor.msvcrt is None:
|
|
1320
|
+
pytest.skip("msvcrt retry loop only exists on Windows runtimes")
|
|
1321
|
+
|
|
1322
|
+
lock_file_handle = (tmp_path / "offsets.json.lock").open("a+", encoding="utf-8")
|
|
1323
|
+
try:
|
|
1324
|
+
def _raise_permanent_oserror(
|
|
1325
|
+
file_descriptor: int,
|
|
1326
|
+
mode_flag: int,
|
|
1327
|
+
byte_count: int,
|
|
1328
|
+
) -> None:
|
|
1329
|
+
raise OSError(errno.EBADF, "invalid file descriptor")
|
|
1330
|
+
|
|
1331
|
+
monkeypatch.setattr(
|
|
1332
|
+
hook_log_extractor.msvcrt, "locking", _raise_permanent_oserror
|
|
1333
|
+
)
|
|
1334
|
+
|
|
1335
|
+
started_at = time.monotonic()
|
|
1336
|
+
with pytest.raises(OSError) as excinfo:
|
|
1337
|
+
hook_log_extractor._lock_file_handle_blocking(lock_file_handle)
|
|
1338
|
+
elapsed_seconds = time.monotonic() - started_at
|
|
1339
|
+
|
|
1340
|
+
assert excinfo.value.errno == errno.EBADF
|
|
1341
|
+
assert elapsed_seconds < 1.0
|
|
1342
|
+
finally:
|
|
1343
|
+
lock_file_handle.close()
|
|
1344
|
+
|
|
1345
|
+
|
|
1346
|
+
def test_lock_file_handle_blocking_caps_retries_on_contention_errno(
|
|
1347
|
+
tmp_path: Path,
|
|
1348
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
1349
|
+
) -> None:
|
|
1350
|
+
"""Contention-errno must be retried a bounded number of times, then raise.
|
|
1351
|
+
|
|
1352
|
+
With ``LK_NBLCK``, contention surfaces as ``EACCES`` per the
|
|
1353
|
+
Microsoft ``_locking`` spec; the retry loop must bound the number
|
|
1354
|
+
of attempts to ``LOCK_MAXIMUM_RETRY_COUNT`` and then re-raise.
|
|
1355
|
+
"""
|
|
1356
|
+
if hook_log_extractor.msvcrt is None:
|
|
1357
|
+
pytest.skip("msvcrt retry loop only exists on Windows runtimes")
|
|
1358
|
+
|
|
1359
|
+
lock_file_handle = (tmp_path / "offsets.json.lock").open("a+", encoding="utf-8")
|
|
1360
|
+
try:
|
|
1361
|
+
attempt_count = {"value": 0}
|
|
1362
|
+
|
|
1363
|
+
def _raise_contention_oserror(
|
|
1364
|
+
file_descriptor: int,
|
|
1365
|
+
mode_flag: int,
|
|
1366
|
+
byte_count: int,
|
|
1367
|
+
) -> None:
|
|
1368
|
+
attempt_count["value"] += 1
|
|
1369
|
+
raise OSError(errno.EACCES, "retries exhausted")
|
|
1370
|
+
|
|
1371
|
+
monkeypatch.setattr(
|
|
1372
|
+
hook_log_extractor.msvcrt, "locking", _raise_contention_oserror
|
|
1373
|
+
)
|
|
1374
|
+
monkeypatch.setattr(hook_log_extractor.time, "sleep", lambda _seconds: None)
|
|
1375
|
+
|
|
1376
|
+
with pytest.raises(OSError) as excinfo:
|
|
1377
|
+
hook_log_extractor._lock_file_handle_blocking(lock_file_handle)
|
|
1378
|
+
|
|
1379
|
+
assert excinfo.value.errno == errno.EACCES
|
|
1380
|
+
assert (
|
|
1381
|
+
attempt_count["value"]
|
|
1382
|
+
== hook_log_extractor.LOCK_MAXIMUM_RETRY_COUNT
|
|
1383
|
+
)
|
|
1384
|
+
finally:
|
|
1385
|
+
lock_file_handle.close()
|
|
1386
|
+
|
|
1387
|
+
|
|
1388
|
+
def test_lock_file_handle_blocking_uses_nonblocking_mode_flag(
|
|
1389
|
+
tmp_path: Path,
|
|
1390
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
1391
|
+
) -> None:
|
|
1392
|
+
"""Windows branch must call msvcrt.locking with LK_NBLCK, not LK_LOCK.
|
|
1393
|
+
|
|
1394
|
+
LK_LOCK blocks internally for ~10 seconds per attempt per the
|
|
1395
|
+
Microsoft _locking spec, which compounded with the retry loop
|
|
1396
|
+
produces a worst-case wait of ~303s under sustained contention.
|
|
1397
|
+
LK_NBLCK raises OSError(EACCES) immediately, leaving the
|
|
1398
|
+
Python-level ``time.sleep`` as the sole pacing mechanism so the
|
|
1399
|
+
retry budget stays within its intended ~3s total.
|
|
1400
|
+
"""
|
|
1401
|
+
if hook_log_extractor.msvcrt is None:
|
|
1402
|
+
pytest.skip("msvcrt mode-flag check only applies on Windows runtimes")
|
|
1403
|
+
|
|
1404
|
+
lock_file_handle = (tmp_path / "offsets.json.lock").open("a+", encoding="utf-8")
|
|
1405
|
+
try:
|
|
1406
|
+
observed_mode_flags: list[int] = []
|
|
1407
|
+
|
|
1408
|
+
def _record_mode_flag(
|
|
1409
|
+
file_descriptor: int,
|
|
1410
|
+
mode_flag: int,
|
|
1411
|
+
byte_count: int,
|
|
1412
|
+
) -> None:
|
|
1413
|
+
observed_mode_flags.append(mode_flag)
|
|
1414
|
+
|
|
1415
|
+
monkeypatch.setattr(
|
|
1416
|
+
hook_log_extractor.msvcrt, "locking", _record_mode_flag
|
|
1417
|
+
)
|
|
1418
|
+
|
|
1419
|
+
hook_log_extractor._lock_file_handle_blocking(lock_file_handle)
|
|
1420
|
+
|
|
1421
|
+
assert observed_mode_flags == [hook_log_extractor.msvcrt.LK_NBLCK]
|
|
1422
|
+
finally:
|
|
1423
|
+
lock_file_handle.close()
|
|
1424
|
+
|
|
1425
|
+
|
|
1426
|
+
def test_run_full_extraction_does_not_hold_lock_across_db_io(
|
|
1427
|
+
tmp_path: Path,
|
|
1428
|
+
monkeypatch: pytest.MonkeyPatch,
|
|
1429
|
+
) -> None:
|
|
1430
|
+
"""DB insert must execute while the offsets lock is NOT held."""
|
|
1431
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
1432
|
+
jsonl_file.write_text(_make_success_line() + "\n", encoding="utf-8")
|
|
1433
|
+
state_file = tmp_path / "offsets.json"
|
|
1434
|
+
|
|
1435
|
+
lock_currently_held = {"value": False}
|
|
1436
|
+
lock_held_during_insert = {"value": False}
|
|
1437
|
+
|
|
1438
|
+
real_lock_helper = hook_log_extractor._acquire_offsets_lock
|
|
1439
|
+
|
|
1440
|
+
@contextlib.contextmanager
|
|
1441
|
+
def _tracking_lock_helper(passed_state_file_path: str) -> Any:
|
|
1442
|
+
lock_currently_held["value"] = True
|
|
1443
|
+
try:
|
|
1444
|
+
with real_lock_helper(passed_state_file_path):
|
|
1445
|
+
yield
|
|
1446
|
+
finally:
|
|
1447
|
+
lock_currently_held["value"] = False
|
|
1448
|
+
|
|
1449
|
+
def _observe_lock_during_insert(*_args: Any, **_kwargs: Any) -> None:
|
|
1450
|
+
if lock_currently_held["value"]:
|
|
1451
|
+
lock_held_during_insert["value"] = True
|
|
1452
|
+
|
|
1453
|
+
monkeypatch.setattr(
|
|
1454
|
+
hook_log_extractor, "_acquire_offsets_lock", _tracking_lock_helper
|
|
1455
|
+
)
|
|
1456
|
+
|
|
1457
|
+
fake_cursor = MagicMock()
|
|
1458
|
+
fake_cursor.executemany.side_effect = _observe_lock_during_insert
|
|
1459
|
+
fake_connection = MagicMock()
|
|
1460
|
+
fake_connection.cursor.return_value.__enter__.return_value = fake_cursor
|
|
1461
|
+
|
|
1462
|
+
with patch.object(
|
|
1463
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
1464
|
+
):
|
|
1465
|
+
hook_log_extractor.run_full_extraction(
|
|
1466
|
+
transcripts_root=str(tmp_path),
|
|
1467
|
+
state_file_path=str(state_file),
|
|
1468
|
+
full_rebuild=False,
|
|
1469
|
+
)
|
|
1470
|
+
|
|
1471
|
+
assert fake_cursor.executemany.called, (
|
|
1472
|
+
"Test setup failed: DB insert never ran"
|
|
1473
|
+
)
|
|
1474
|
+
assert not lock_held_during_insert["value"], (
|
|
1475
|
+
"Offsets lock must not be held during DB insert calls"
|
|
1476
|
+
)
|
|
1477
|
+
|
|
1478
|
+
|
|
1479
|
+
def test_run_full_extraction_preserves_external_offset_updates(
|
|
1480
|
+
tmp_path: Path,
|
|
1481
|
+
) -> None:
|
|
1482
|
+
"""Narrow-scope save must merge with concurrent writers, not clobber."""
|
|
1483
|
+
jsonl_file = tmp_path / "session.jsonl"
|
|
1484
|
+
jsonl_file.write_text(_make_success_line() + "\n", encoding="utf-8")
|
|
1485
|
+
state_file = tmp_path / "offsets.json"
|
|
1486
|
+
|
|
1487
|
+
fake_cursor = MagicMock()
|
|
1488
|
+
fake_connection = MagicMock()
|
|
1489
|
+
fake_connection.cursor.return_value.__enter__.return_value = fake_cursor
|
|
1490
|
+
|
|
1491
|
+
other_path_entry = {
|
|
1492
|
+
"C:/other_session.jsonl": {"byte_offset": 777, "line_number": 9},
|
|
1493
|
+
}
|
|
1494
|
+
|
|
1495
|
+
original_save_offsets = hook_log_extractor.save_offsets
|
|
1496
|
+
|
|
1497
|
+
def _save_then_inject_external_writer(
|
|
1498
|
+
passed_state_file_path: str,
|
|
1499
|
+
passed_offsets: dict[str, dict[str, int]],
|
|
1500
|
+
) -> None:
|
|
1501
|
+
original_save_offsets(passed_state_file_path, passed_offsets)
|
|
1502
|
+
if other_path_entry["C:/other_session.jsonl"][
|
|
1503
|
+
"byte_offset"
|
|
1504
|
+
] == 777 and "C:/other_session.jsonl" not in passed_offsets:
|
|
1505
|
+
loaded_from_disk = hook_log_extractor.load_offsets(passed_state_file_path)
|
|
1506
|
+
loaded_from_disk["C:/other_session.jsonl"] = other_path_entry[
|
|
1507
|
+
"C:/other_session.jsonl"
|
|
1508
|
+
]
|
|
1509
|
+
original_save_offsets(passed_state_file_path, loaded_from_disk)
|
|
1510
|
+
|
|
1511
|
+
with (
|
|
1512
|
+
patch.object(
|
|
1513
|
+
hook_log_extractor, "connect_to_neon", return_value=fake_connection
|
|
1514
|
+
),
|
|
1515
|
+
patch.object(
|
|
1516
|
+
hook_log_extractor, "save_offsets", _save_then_inject_external_writer
|
|
1517
|
+
),
|
|
1518
|
+
):
|
|
1519
|
+
hook_log_extractor.run_full_extraction(
|
|
1520
|
+
transcripts_root=str(tmp_path),
|
|
1521
|
+
state_file_path=str(state_file),
|
|
1522
|
+
full_rebuild=False,
|
|
1523
|
+
)
|
|
1524
|
+
|
|
1525
|
+
final_offsets = hook_log_extractor.load_offsets(str(state_file))
|
|
1526
|
+
assert "C:/other_session.jsonl" in final_offsets
|
|
1527
|
+
assert final_offsets["C:/other_session.jsonl"] == {
|
|
1528
|
+
"byte_offset": 777,
|
|
1529
|
+
"line_number": 9,
|
|
1530
|
+
}
|
|
1531
|
+
assert str(jsonl_file) in final_offsets
|