codespine 1.0.6__tar.gz → 1.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {codespine-1.0.6 → codespine-1.0.7}/PKG-INFO +1 -1
  2. {codespine-1.0.6 → codespine-1.0.7}/codespine/__init__.py +1 -1
  3. {codespine-1.0.6 → codespine-1.0.7}/codespine/cli.py +56 -2
  4. {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/engine.py +151 -18
  5. {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/PKG-INFO +1 -1
  6. {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/SOURCES.txt +1 -0
  7. {codespine-1.0.6 → codespine-1.0.7}/pyproject.toml +1 -1
  8. codespine-1.0.7/tests/test_parse_resilience.py +194 -0
  9. {codespine-1.0.6 → codespine-1.0.7}/LICENSE +0 -0
  10. {codespine-1.0.6 → codespine-1.0.7}/README.md +0 -0
  11. {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/__init__.py +0 -0
  12. {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/community.py +0 -0
  13. {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/context.py +0 -0
  14. {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/coupling.py +0 -0
  15. {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/crossmodule.py +0 -0
  16. {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/deadcode.py +0 -0
  17. {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/flow.py +0 -0
  18. {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/impact.py +0 -0
  19. {codespine-1.0.6 → codespine-1.0.7}/codespine/cache/__init__.py +0 -0
  20. {codespine-1.0.6 → codespine-1.0.7}/codespine/cache/result_cache.py +0 -0
  21. {codespine-1.0.6 → codespine-1.0.7}/codespine/config.py +0 -0
  22. {codespine-1.0.6 → codespine-1.0.7}/codespine/db/__init__.py +0 -0
  23. {codespine-1.0.6 → codespine-1.0.7}/codespine/db/_cypher_compat.py +0 -0
  24. {codespine-1.0.6 → codespine-1.0.7}/codespine/db/duckdb_store.py +0 -0
  25. {codespine-1.0.6 → codespine-1.0.7}/codespine/db/schema.py +0 -0
  26. {codespine-1.0.6 → codespine-1.0.7}/codespine/db/store.py +0 -0
  27. {codespine-1.0.6 → codespine-1.0.7}/codespine/diff/__init__.py +0 -0
  28. {codespine-1.0.6 → codespine-1.0.7}/codespine/diff/branch_diff.py +0 -0
  29. {codespine-1.0.6 → codespine-1.0.7}/codespine/guide.py +0 -0
  30. {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/__init__.py +0 -0
  31. {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/call_resolver.py +0 -0
  32. {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/di_resolver.py +0 -0
  33. {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/java_parser.py +0 -0
  34. {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/symbol_builder.py +0 -0
  35. {codespine-1.0.6 → codespine-1.0.7}/codespine/mcp/__init__.py +0 -0
  36. {codespine-1.0.6 → codespine-1.0.7}/codespine/mcp/server.py +0 -0
  37. {codespine-1.0.6 → codespine-1.0.7}/codespine/noise/__init__.py +0 -0
  38. {codespine-1.0.6 → codespine-1.0.7}/codespine/noise/blocklist.py +0 -0
  39. {codespine-1.0.6 → codespine-1.0.7}/codespine/overlay/__init__.py +0 -0
  40. {codespine-1.0.6 → codespine-1.0.7}/codespine/overlay/git_state.py +0 -0
  41. {codespine-1.0.6 → codespine-1.0.7}/codespine/overlay/merge.py +0 -0
  42. {codespine-1.0.6 → codespine-1.0.7}/codespine/overlay/store.py +0 -0
  43. {codespine-1.0.6 → codespine-1.0.7}/codespine/search/__init__.py +0 -0
  44. {codespine-1.0.6 → codespine-1.0.7}/codespine/search/bm25.py +0 -0
  45. {codespine-1.0.6 → codespine-1.0.7}/codespine/search/fuzzy.py +0 -0
  46. {codespine-1.0.6 → codespine-1.0.7}/codespine/search/hybrid.py +0 -0
  47. {codespine-1.0.6 → codespine-1.0.7}/codespine/search/rrf.py +0 -0
  48. {codespine-1.0.6 → codespine-1.0.7}/codespine/search/vector.py +0 -0
  49. {codespine-1.0.6 → codespine-1.0.7}/codespine/sharding/__init__.py +0 -0
  50. {codespine-1.0.6 → codespine-1.0.7}/codespine/sharding/router.py +0 -0
  51. {codespine-1.0.6 → codespine-1.0.7}/codespine/sharding/store.py +0 -0
  52. {codespine-1.0.6 → codespine-1.0.7}/codespine/watch/__init__.py +0 -0
  53. {codespine-1.0.6 → codespine-1.0.7}/codespine/watch/git_hook.py +0 -0
  54. {codespine-1.0.6 → codespine-1.0.7}/codespine/watch/watcher.py +0 -0
  55. {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/dependency_links.txt +0 -0
  56. {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/entry_points.txt +0 -0
  57. {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/requires.txt +0 -0
  58. {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/top_level.txt +0 -0
  59. {codespine-1.0.6 → codespine-1.0.7}/gindex.py +0 -0
  60. {codespine-1.0.6 → codespine-1.0.7}/setup.cfg +0 -0
  61. {codespine-1.0.6 → codespine-1.0.7}/tests/test_branch_diff_normalize.py +0 -0
  62. {codespine-1.0.6 → codespine-1.0.7}/tests/test_call_resolver.py +0 -0
  63. {codespine-1.0.6 → codespine-1.0.7}/tests/test_community_detection.py +0 -0
  64. {codespine-1.0.6 → codespine-1.0.7}/tests/test_cypher_compat.py +0 -0
  65. {codespine-1.0.6 → codespine-1.0.7}/tests/test_deadcode.py +0 -0
  66. {codespine-1.0.6 → codespine-1.0.7}/tests/test_duckdb_store.py +0 -0
  67. {codespine-1.0.6 → codespine-1.0.7}/tests/test_index_and_hybrid.py +0 -0
  68. {codespine-1.0.6 → codespine-1.0.7}/tests/test_java_parser.py +0 -0
  69. {codespine-1.0.6 → codespine-1.0.7}/tests/test_multimodule_index.py +0 -0
  70. {codespine-1.0.6 → codespine-1.0.7}/tests/test_overlay.py +0 -0
  71. {codespine-1.0.6 → codespine-1.0.7}/tests/test_result_cache.py +0 -0
  72. {codespine-1.0.6 → codespine-1.0.7}/tests/test_search_ranking.py +0 -0
  73. {codespine-1.0.6 → codespine-1.0.7}/tests/test_sharding.py +0 -0
  74. {codespine-1.0.6 → codespine-1.0.7}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 1.0.6
3
+ Version: 1.0.7
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "1.0.6"
4
+ __version__ = "1.0.7"
@@ -137,8 +137,12 @@ def _index_shard_group(
137
137
 
138
138
  for mod_path, project_id in modules:
139
139
  # Per-module progress state (local — no shared mutation).
140
- parse_state: dict = {"shown": False, "indexed": 0, "total": 0,
141
- "last_ts": 0.0, "printed_zero": False}
140
+ parse_state: dict = {
141
+ "shown": False, "indexed": 0, "total": 0,
142
+ "last_ts": 0.0, "printed_zero": False,
143
+ "current_file": "", "elapsed": 0.0,
144
+ "last_done": -1, "frozen_since": 0.0, "stall_warned": False,
145
+ }
142
146
  call_state: dict = {"shown": False, "count": 0, "last_ts": 0.0,
143
147
  "started_at": 0.0}
144
148
 
@@ -160,11 +164,61 @@ def _index_shard_group(
160
164
  _phase(f"{prefix}Parsing code...", "0/0")
161
165
  parse_state["printed_zero"] = True
162
166
  return
167
+ if event == "parse_heartbeat":
168
+ # Fires every 2s from a daemon thread — keeps spinner alive
169
+ # even when all worker threads are busy or one is hanging.
170
+ done = int(payload.get("done", 0))
171
+ total = int(payload.get("total", 0))
172
+ current = str(payload.get("current_file", ""))
173
+ elapsed_s = float(payload.get("elapsed", 0.0))
174
+ parse_state["indexed"] = done
175
+ parse_state["total"] = total
176
+ parse_state["current_file"] = current
177
+ parse_state["elapsed"] = elapsed_s
178
+ if total > 0 and not parallel:
179
+ basename = os.path.basename(current) if current else ""
180
+ click.echo(
181
+ f"\r{_spinner_char()} {prefix}Parsing code... "
182
+ f"{_bar(done, total)} {done}/{total} "
183
+ f"{basename[:38]:<38} {elapsed_s:.0f}s ",
184
+ nl=False,
185
+ )
186
+ parse_state["shown"] = True
187
+ parse_state["last_ts"] = now
188
+
189
+ # ── Stall detection ──────────────────────────────────────
190
+ if done == parse_state["last_done"]:
191
+ if parse_state["frozen_since"] == 0.0:
192
+ parse_state["frozen_since"] = now
193
+ stalled_for = now - parse_state["frozen_since"]
194
+ if stalled_for >= 15.0 and not parse_state["stall_warned"]:
195
+ parse_state["stall_warned"] = True
196
+ basename = os.path.basename(current) if current else "unknown"
197
+ with output_lock:
198
+ click.echo() # break out of \r line
199
+ click.secho(
200
+ f" ⚠ Parsing stalled on {basename} for "
201
+ f"{stalled_for:.0f}s — file may be pathological.\n"
202
+ f" Timeout at {os.environ.get('CODESPINE_PARSE_TIMEOUT_SECS', '60')}s. "
203
+ f"To skip large files: "
204
+ f"export CODESPINE_MAX_FILE_BYTES=2097152",
205
+ fg="yellow",
206
+ )
207
+ else:
208
+ parse_state["last_done"] = done
209
+ parse_state["frozen_since"] = 0.0
210
+ parse_state["stall_warned"] = False
211
+ return
163
212
  if event == "parse_progress":
164
213
  indexed = int(payload.get("indexed", 0))
165
214
  total = int(payload.get("total", 0))
166
215
  parse_state["indexed"] = indexed
167
216
  parse_state["total"] = total
217
+ # Reset stall tracker on actual progress
218
+ if indexed != parse_state["last_done"]:
219
+ parse_state["last_done"] = indexed
220
+ parse_state["frozen_since"] = 0.0
221
+ parse_state["stall_warned"] = False
168
222
  if total == 0:
169
223
  return
170
224
  if indexed == total or (now - parse_state["last_ts"]) >= 0.2:
@@ -17,12 +17,50 @@ from codespine.search.vector import embed_text
17
17
 
18
18
  LOGGER = logging.getLogger(__name__)
19
19
 
20
+ # Per-file parse size guard: files larger than this are skipped entirely.
21
+ # Large generated Java files (proto, JAXB, etc.) can cause tree-sitter to
22
+ # spin. Override with env CODESPINE_MAX_FILE_BYTES (default 2 MB).
23
+ _MAX_FILE_BYTES: int = int(os.environ.get("CODESPINE_MAX_FILE_BYTES", str(2 * 1024 * 1024)))
24
+
25
+ # Per-file parse timeout in seconds. Override with CODESPINE_PARSE_TIMEOUT_SECS.
26
+ _PARSE_TIMEOUT_SECS: int = int(os.environ.get("CODESPINE_PARSE_TIMEOUT_SECS", "60"))
27
+
28
+ # Heartbeat period for the parse-phase heartbeat thread (seconds).
29
+ _PARSE_HEARTBEAT_PERIOD: float = 2.0
30
+
20
31
 
21
32
  def _parse_file_worker(file_path: str, root_path: str, project_id: str) -> dict:
22
- """Pure CPU/IO work – no DB access. Safe to run in a thread pool."""
33
+ """Pure CPU/IO work – no DB access. Safe to run in a thread pool.
34
+
35
+ Returns a result dict. When the file is skipped (oversized), the dict
36
+ has ``parsed=None`` and ``skipped_reason`` set — callers must check and
37
+ skip DB writes for those entries.
38
+ """
23
39
  rel_path = os.path.relpath(file_path, root_path)
24
40
  is_test = "src/test/java" in file_path.replace("\\", "/")
25
41
  scope = JavaIndexer._scope_from_rel_path(rel_path)
42
+ # ── Size guard: skip files that are likely to hang tree-sitter ───────
43
+ try:
44
+ file_size = os.path.getsize(file_path)
45
+ except OSError as exc:
46
+ raise RuntimeError(f"stat failed: {exc}") from exc
47
+ if file_size > _MAX_FILE_BYTES:
48
+ LOGGER.warning(
49
+ "Skipping oversized file (%d bytes > %d): %s "
50
+ "(raise CODESPINE_MAX_FILE_BYTES to include it)",
51
+ file_size, _MAX_FILE_BYTES, rel_path,
52
+ )
53
+ return {
54
+ "file_path": file_path,
55
+ "rel_path": rel_path,
56
+ "source": b"",
57
+ "parsed": None,
58
+ "f_id": file_id(project_id, rel_path),
59
+ "digest": "",
60
+ "is_test": is_test,
61
+ "scope": scope,
62
+ "skipped_reason": "oversized",
63
+ }
26
64
  with open(file_path, "rb") as fh:
27
65
  source = fh.read()
28
66
  parsed = parse_java_source(source)
@@ -241,30 +279,119 @@ class JavaIndexer:
241
279
 
242
280
  # ── Parallel parse (CPU/IO) ──────────────────────────────────────────
243
281
  # tree-sitter releases the GIL so ThreadPoolExecutor gives real speedup.
282
+ # A daemon heartbeat thread emits parse_heartbeat events every 2 s so
283
+ # the CLI spinner keeps ticking even when all workers are busy.
284
+ # Per-future timeouts skip files that hang (e.g. huge generated source).
244
285
  _workers = max(1, min(8, len(to_reindex), os.cpu_count() or 4))
245
286
  parse_results: list[dict] = []
246
287
  if to_reindex:
247
- with concurrent.futures.ThreadPoolExecutor(max_workers=_workers) as ex:
248
- futs = {
249
- ex.submit(_parse_file_worker, fp, root_path, project_id): fp
250
- for fp in to_reindex
251
- }
252
- done = 0
253
- for fut in concurrent.futures.as_completed(futs):
254
- done += 1
255
- fp = futs[fut]
256
- try:
257
- parse_results.append(fut.result())
258
- except Exception as exc:
259
- LOGGER.warning("Skipping %s: %s", fp, exc)
288
+ _parse_done_holder: list[int] = [0]
289
+ _parse_current_holder: list[str] = [""]
290
+ _parse_hb_stop = threading.Event()
291
+ _parse_start = time.perf_counter()
292
+ _total = len(to_reindex)
293
+
294
+ def _parse_heartbeat_worker() -> None:
295
+ while not _parse_hb_stop.wait(_PARSE_HEARTBEAT_PERIOD):
260
296
  self._emit(
261
297
  progress,
262
- "parse_progress",
263
- indexed=done,
264
- total=len(to_reindex),
265
- file_path=fp,
298
+ "parse_heartbeat",
299
+ done=_parse_done_holder[0],
300
+ total=_total,
301
+ current_file=_parse_current_holder[0],
302
+ elapsed=time.perf_counter() - _parse_start,
266
303
  )
267
304
 
305
+ _parse_hb_thread = threading.Thread(
306
+ target=_parse_heartbeat_worker,
307
+ daemon=True,
308
+ name="codespine-parse-heartbeat",
309
+ )
310
+ _parse_hb_thread.start()
311
+
312
+ try:
313
+ with concurrent.futures.ThreadPoolExecutor(max_workers=_workers) as ex:
314
+ futs: dict[concurrent.futures.Future, str] = {}
315
+ submitted_at: dict[concurrent.futures.Future, float] = {}
316
+ for fp in to_reindex:
317
+ fut = ex.submit(_parse_file_worker, fp, root_path, project_id)
318
+ futs[fut] = fp
319
+ submitted_at[fut] = time.perf_counter()
320
+
321
+ pending = set(futs)
322
+ done_count = 0
323
+
324
+ while pending:
325
+ # Wait up to heartbeat period for any future to finish.
326
+ try:
327
+ for fut in concurrent.futures.as_completed(
328
+ pending, timeout=_PARSE_HEARTBEAT_PERIOD
329
+ ):
330
+ pending.discard(fut)
331
+ done_count += 1
332
+ fp = futs[fut]
333
+ _parse_done_holder[0] = done_count
334
+ _parse_current_holder[0] = fp
335
+ try:
336
+ parse_results.append(fut.result(timeout=0))
337
+ except concurrent.futures.TimeoutError:
338
+ # Shouldn't happen (future is done), but guard anyway
339
+ pass
340
+ except Exception as exc:
341
+ LOGGER.warning("Skipping %s: %s", fp, exc)
342
+ self._emit(
343
+ progress,
344
+ "parse_progress",
345
+ indexed=done_count,
346
+ total=_total,
347
+ file_path=fp,
348
+ )
349
+ except concurrent.futures.TimeoutError:
350
+ pass # heartbeat tick — proceed to deadline scan
351
+
352
+ # Abandon futures stuck past the per-file timeout.
353
+ now = time.perf_counter()
354
+ expired = [
355
+ f for f in pending
356
+ if now - submitted_at[f] > _PARSE_TIMEOUT_SECS
357
+ ]
358
+ for fut in expired:
359
+ fp = futs[fut]
360
+ LOGGER.warning(
361
+ "Parse timeout after %ds, skipping: %s "
362
+ "(thread may continue briefly in background)",
363
+ _PARSE_TIMEOUT_SECS, fp,
364
+ )
365
+ fut.cancel() # no-op if already running; cleans up pending ones
366
+ # Insert a sentinel so the file is counted but has no symbols.
367
+ parse_results.append({
368
+ "file_path": fp,
369
+ "rel_path": os.path.relpath(fp, root_path),
370
+ "source": b"",
371
+ "parsed": None,
372
+ "f_id": file_id(project_id, os.path.relpath(fp, root_path)),
373
+ "digest": "",
374
+ "is_test": "src/test/java" in fp.replace("\\", "/"),
375
+ "scope": JavaIndexer._scope_from_rel_path(
376
+ os.path.relpath(fp, root_path)
377
+ ),
378
+ "skipped_reason": "timeout",
379
+ })
380
+ pending.discard(fut)
381
+ done_count += 1
382
+ _parse_done_holder[0] = done_count
383
+ self._emit(
384
+ progress,
385
+ "parse_progress",
386
+ indexed=done_count,
387
+ total=_total,
388
+ file_path=fp,
389
+ timed_out=True,
390
+ )
391
+ finally:
392
+ _parse_hb_stop.set()
393
+ _parse_hb_thread.join(timeout=3.0)
394
+
268
395
  # ── Chunked DB writes ─────────────────────────────────────────────────
269
396
  if full:
270
397
  self.store.clear_project(project_id)
@@ -297,6 +424,12 @@ class JavaIndexer:
297
424
  symbol_rows: list[dict] = []
298
425
 
299
426
  for pr in parse_chunk:
427
+ # Skipped files (oversized, timeout) carry parsed=None.
428
+ # Still count as indexed for accurate reporting, but skip
429
+ # class/method/symbol extraction.
430
+ if pr.get("parsed") is None:
431
+ files_indexed += 1
432
+ continue
300
433
  file_path = pr["file_path"]
301
434
  parsed = pr["parsed"]
302
435
  f_id = pr["f_id"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 1.0.6
3
+ Version: 1.0.7
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -65,6 +65,7 @@ tests/test_index_and_hybrid.py
65
65
  tests/test_java_parser.py
66
66
  tests/test_multimodule_index.py
67
67
  tests/test_overlay.py
68
+ tests/test_parse_resilience.py
68
69
  tests/test_result_cache.py
69
70
  tests/test_search_ranking.py
70
71
  tests/test_sharding.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "1.0.6"
7
+ version = "1.0.7"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1,194 @@
1
+ """Regression tests for parse-phase hang recovery (v1.0.7).
2
+
3
+ Covers:
4
+ - Oversized file skip (_MAX_FILE_BYTES guard)
5
+ - Parse heartbeat thread lifecycle
6
+ - Per-future timeout with sentinel insertion
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import concurrent.futures
11
+ import os
12
+ import threading
13
+ import time
14
+
15
+ import pytest
16
+
17
+
18
+ # ---------------------------------------------------------------------------
19
+ # Test A: oversized-file skip
20
+ # ---------------------------------------------------------------------------
21
+
22
+
23
+ def test_parse_worker_skips_oversized(tmp_path, monkeypatch):
24
+ """Files larger than _MAX_FILE_BYTES return parsed=None without calling
25
+ parse_java_source, preventing tree-sitter from hanging on giant files."""
26
+ import codespine.indexer.engine as eng
27
+
28
+ monkeypatch.setattr(eng, "_MAX_FILE_BYTES", 100)
29
+
30
+ java_file = tmp_path / "Big.java"
31
+ java_file.write_bytes(b"public class Big {}\n" + b" " * 200)
32
+
33
+ result = eng._parse_file_worker(str(java_file), str(tmp_path), "test-proj")
34
+
35
+ assert result["parsed"] is None
36
+ assert result["skipped_reason"] == "oversized"
37
+ assert result["rel_path"] == "Big.java"
38
+ assert result["source"] == b""
39
+
40
+
41
+ def test_parse_worker_normal_file(tmp_path):
42
+ """Files within the size limit are parsed normally."""
43
+ import codespine.indexer.engine as eng
44
+
45
+ java_file = tmp_path / "Small.java"
46
+ java_file.write_bytes(b"public class Small {}\n")
47
+
48
+ result = eng._parse_file_worker(str(java_file), str(tmp_path), "test-proj")
49
+
50
+ assert result["parsed"] is not None
51
+ assert result["skipped_reason"] if "skipped_reason" in result else True # not set = fine
52
+ assert "skipped_reason" not in result or result["skipped_reason"] != "oversized"
53
+
54
+
55
+ def test_parse_worker_max_file_bytes_env(tmp_path, monkeypatch):
56
+ """CODESPINE_MAX_FILE_BYTES env var controls the threshold."""
57
+ monkeypatch.setenv("CODESPINE_MAX_FILE_BYTES", "50")
58
+ # Re-import to pick up new env (the constant is read at import time,
59
+ # so we must reload or patch it directly).
60
+ import codespine.indexer.engine as eng
61
+ import importlib
62
+ # Patch after import
63
+ monkeypatch.setattr(eng, "_MAX_FILE_BYTES", 50)
64
+
65
+ java_file = tmp_path / "Medium.java"
66
+ java_file.write_bytes(b"x" * 100)
67
+
68
+ result = eng._parse_file_worker(str(java_file), str(tmp_path), "test-proj")
69
+ assert result["parsed"] is None
70
+ assert result["skipped_reason"] == "oversized"
71
+
72
+
73
+ # ---------------------------------------------------------------------------
74
+ # Test B: parse heartbeat thread lifecycle
75
+ # ---------------------------------------------------------------------------
76
+
77
+
78
+ def test_parse_heartbeat_emits_and_stops():
79
+ """The heartbeat thread fires events every _PARSE_HEARTBEAT_PERIOD seconds
80
+ and stops cleanly when signalled."""
81
+ events: list[dict] = []
82
+ stop_event = threading.Event()
83
+
84
+ _done_holder: list[int] = [0]
85
+ _current_holder: list[str] = ["Foo.java"]
86
+ _start = time.perf_counter()
87
+ _total = 10
88
+ _period = 0.1 # fast for testing
89
+
90
+ def _worker() -> None:
91
+ while not stop_event.wait(_period):
92
+ events.append({
93
+ "event": "parse_heartbeat",
94
+ "done": _done_holder[0],
95
+ "total": _total,
96
+ "current_file": _current_holder[0],
97
+ "elapsed": time.perf_counter() - _start,
98
+ })
99
+
100
+ t = threading.Thread(target=_worker, daemon=True)
101
+ t.start()
102
+ time.sleep(0.35) # allow ~3 ticks
103
+ stop_event.set()
104
+ t.join(timeout=1.0)
105
+
106
+ assert not t.is_alive(), "Heartbeat thread did not stop"
107
+ assert len(events) >= 2, f"Expected ≥2 heartbeat events, got {len(events)}"
108
+ assert all(e["event"] == "parse_heartbeat" for e in events)
109
+ assert all(e["total"] == _total for e in events)
110
+
111
+
112
+ def test_parse_heartbeat_reflects_state_updates():
113
+ """The heartbeat reads live state from the shared holders."""
114
+ events: list[dict] = []
115
+ stop_event = threading.Event()
116
+ _done_holder: list[int] = [0]
117
+ _period = 0.05
118
+
119
+ def _worker() -> None:
120
+ while not stop_event.wait(_period):
121
+ events.append({"done": _done_holder[0]})
122
+
123
+ t = threading.Thread(target=_worker, daemon=True)
124
+ t.start()
125
+ time.sleep(0.08)
126
+ _done_holder[0] = 42
127
+ time.sleep(0.08)
128
+ stop_event.set()
129
+ t.join(timeout=1.0)
130
+
131
+ done_values = [e["done"] for e in events]
132
+ assert 42 in done_values, "State update not reflected in heartbeat"
133
+
134
+
135
+ # ---------------------------------------------------------------------------
136
+ # Test C: per-future timeout produces sentinel result
137
+ # ---------------------------------------------------------------------------
138
+
139
+
140
+ def test_timeout_sentinel_has_correct_shape(tmp_path):
141
+ """When a future times out, the sentinel dict must have parsed=None and
142
+ skipped_reason='timeout' so the DB-write loop skips it safely."""
143
+ import codespine.indexer.engine as eng
144
+
145
+ # Construct a sentinel the same way the engine does it.
146
+ fp = str(tmp_path / "Slow.java")
147
+ root_path = str(tmp_path)
148
+ project_id = "myproj"
149
+
150
+ sentinel = {
151
+ "file_path": fp,
152
+ "rel_path": os.path.relpath(fp, root_path),
153
+ "source": b"",
154
+ "parsed": None,
155
+ "f_id": eng.file_id(project_id, os.path.relpath(fp, root_path)),
156
+ "digest": "",
157
+ "is_test": False,
158
+ "scope": "main",
159
+ "skipped_reason": "timeout",
160
+ }
161
+
162
+ assert sentinel["parsed"] is None
163
+ assert sentinel["skipped_reason"] == "timeout"
164
+ assert sentinel["source"] == b""
165
+
166
+
167
+ def test_parse_loop_skips_none_parsed_in_db_write(tmp_path):
168
+ """The DB-write loop guard (parsed is None → continue) must not NPE when
169
+ a skipped sentinel is in parse_results."""
170
+ # We verify the guard logic directly without a real store.
171
+ parse_results = [
172
+ {
173
+ "file_path": str(tmp_path / "Skip.java"),
174
+ "rel_path": "Skip.java",
175
+ "source": b"",
176
+ "parsed": None,
177
+ "f_id": "fid-skip",
178
+ "digest": "",
179
+ "is_test": False,
180
+ "scope": "main",
181
+ "skipped_reason": "oversized",
182
+ }
183
+ ]
184
+
185
+ # Simulate what the engine's DB-write loop does.
186
+ files_indexed = 0
187
+ for pr in parse_results:
188
+ if pr.get("parsed") is None:
189
+ files_indexed += 1
190
+ continue
191
+ # This line would NPE if reached with parsed=None:
192
+ _ = pr["parsed"].classes
193
+
194
+ assert files_indexed == 1, "Skipped sentinel should increment files_indexed"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes