codespine 1.0.6__tar.gz → 1.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-1.0.6 → codespine-1.0.7}/PKG-INFO +1 -1
- {codespine-1.0.6 → codespine-1.0.7}/codespine/__init__.py +1 -1
- {codespine-1.0.6 → codespine-1.0.7}/codespine/cli.py +56 -2
- {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/engine.py +151 -18
- {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/SOURCES.txt +1 -0
- {codespine-1.0.6 → codespine-1.0.7}/pyproject.toml +1 -1
- codespine-1.0.7/tests/test_parse_resilience.py +194 -0
- {codespine-1.0.6 → codespine-1.0.7}/LICENSE +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/README.md +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/__init__.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/community.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/context.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/coupling.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/crossmodule.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/deadcode.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/flow.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/analysis/impact.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/cache/__init__.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/cache/result_cache.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/config.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/db/__init__.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/db/_cypher_compat.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/db/duckdb_store.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/db/schema.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/db/store.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/diff/__init__.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/diff/branch_diff.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/guide.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/__init__.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/call_resolver.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/di_resolver.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/java_parser.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/mcp/__init__.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/mcp/server.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/noise/__init__.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/noise/blocklist.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/overlay/__init__.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/overlay/git_state.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/overlay/merge.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/overlay/store.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/search/__init__.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/search/bm25.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/search/fuzzy.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/search/hybrid.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/search/rrf.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/search/vector.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/sharding/__init__.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/sharding/router.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/sharding/store.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/watch/__init__.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/watch/git_hook.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine/watch/watcher.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/requires.txt +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/codespine.egg-info/top_level.txt +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/gindex.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/setup.cfg +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_call_resolver.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_community_detection.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_cypher_compat.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_deadcode.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_duckdb_store.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_index_and_hybrid.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_java_parser.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_multimodule_index.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_overlay.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_result_cache.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_search_ranking.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_sharding.py +0 -0
- {codespine-1.0.6 → codespine-1.0.7}/tests/test_store_recovery.py +0 -0
|
@@ -137,8 +137,12 @@ def _index_shard_group(
|
|
|
137
137
|
|
|
138
138
|
for mod_path, project_id in modules:
|
|
139
139
|
# Per-module progress state (local — no shared mutation).
|
|
140
|
-
parse_state: dict = {
|
|
141
|
-
|
|
140
|
+
parse_state: dict = {
|
|
141
|
+
"shown": False, "indexed": 0, "total": 0,
|
|
142
|
+
"last_ts": 0.0, "printed_zero": False,
|
|
143
|
+
"current_file": "", "elapsed": 0.0,
|
|
144
|
+
"last_done": -1, "frozen_since": 0.0, "stall_warned": False,
|
|
145
|
+
}
|
|
142
146
|
call_state: dict = {"shown": False, "count": 0, "last_ts": 0.0,
|
|
143
147
|
"started_at": 0.0}
|
|
144
148
|
|
|
@@ -160,11 +164,61 @@ def _index_shard_group(
|
|
|
160
164
|
_phase(f"{prefix}Parsing code...", "0/0")
|
|
161
165
|
parse_state["printed_zero"] = True
|
|
162
166
|
return
|
|
167
|
+
if event == "parse_heartbeat":
|
|
168
|
+
# Fires every 2s from a daemon thread — keeps spinner alive
|
|
169
|
+
# even when all worker threads are busy or one is hanging.
|
|
170
|
+
done = int(payload.get("done", 0))
|
|
171
|
+
total = int(payload.get("total", 0))
|
|
172
|
+
current = str(payload.get("current_file", ""))
|
|
173
|
+
elapsed_s = float(payload.get("elapsed", 0.0))
|
|
174
|
+
parse_state["indexed"] = done
|
|
175
|
+
parse_state["total"] = total
|
|
176
|
+
parse_state["current_file"] = current
|
|
177
|
+
parse_state["elapsed"] = elapsed_s
|
|
178
|
+
if total > 0 and not parallel:
|
|
179
|
+
basename = os.path.basename(current) if current else ""
|
|
180
|
+
click.echo(
|
|
181
|
+
f"\r{_spinner_char()} {prefix}Parsing code... "
|
|
182
|
+
f"{_bar(done, total)} {done}/{total} "
|
|
183
|
+
f"{basename[:38]:<38} {elapsed_s:.0f}s ",
|
|
184
|
+
nl=False,
|
|
185
|
+
)
|
|
186
|
+
parse_state["shown"] = True
|
|
187
|
+
parse_state["last_ts"] = now
|
|
188
|
+
|
|
189
|
+
# ── Stall detection ──────────────────────────────────────
|
|
190
|
+
if done == parse_state["last_done"]:
|
|
191
|
+
if parse_state["frozen_since"] == 0.0:
|
|
192
|
+
parse_state["frozen_since"] = now
|
|
193
|
+
stalled_for = now - parse_state["frozen_since"]
|
|
194
|
+
if stalled_for >= 15.0 and not parse_state["stall_warned"]:
|
|
195
|
+
parse_state["stall_warned"] = True
|
|
196
|
+
basename = os.path.basename(current) if current else "unknown"
|
|
197
|
+
with output_lock:
|
|
198
|
+
click.echo() # break out of \r line
|
|
199
|
+
click.secho(
|
|
200
|
+
f" ⚠ Parsing stalled on {basename} for "
|
|
201
|
+
f"{stalled_for:.0f}s — file may be pathological.\n"
|
|
202
|
+
f" Timeout at {os.environ.get('CODESPINE_PARSE_TIMEOUT_SECS', '60')}s. "
|
|
203
|
+
f"To skip large files: "
|
|
204
|
+
f"export CODESPINE_MAX_FILE_BYTES=2097152",
|
|
205
|
+
fg="yellow",
|
|
206
|
+
)
|
|
207
|
+
else:
|
|
208
|
+
parse_state["last_done"] = done
|
|
209
|
+
parse_state["frozen_since"] = 0.0
|
|
210
|
+
parse_state["stall_warned"] = False
|
|
211
|
+
return
|
|
163
212
|
if event == "parse_progress":
|
|
164
213
|
indexed = int(payload.get("indexed", 0))
|
|
165
214
|
total = int(payload.get("total", 0))
|
|
166
215
|
parse_state["indexed"] = indexed
|
|
167
216
|
parse_state["total"] = total
|
|
217
|
+
# Reset stall tracker on actual progress
|
|
218
|
+
if indexed != parse_state["last_done"]:
|
|
219
|
+
parse_state["last_done"] = indexed
|
|
220
|
+
parse_state["frozen_since"] = 0.0
|
|
221
|
+
parse_state["stall_warned"] = False
|
|
168
222
|
if total == 0:
|
|
169
223
|
return
|
|
170
224
|
if indexed == total or (now - parse_state["last_ts"]) >= 0.2:
|
|
@@ -17,12 +17,50 @@ from codespine.search.vector import embed_text
|
|
|
17
17
|
|
|
18
18
|
LOGGER = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
|
+
# Per-file parse size guard: files larger than this are skipped entirely.
|
|
21
|
+
# Large generated Java files (proto, JAXB, etc.) can cause tree-sitter to
|
|
22
|
+
# spin. Override with env CODESPINE_MAX_FILE_BYTES (default 2 MB).
|
|
23
|
+
_MAX_FILE_BYTES: int = int(os.environ.get("CODESPINE_MAX_FILE_BYTES", str(2 * 1024 * 1024)))
|
|
24
|
+
|
|
25
|
+
# Per-file parse timeout in seconds. Override with CODESPINE_PARSE_TIMEOUT_SECS.
|
|
26
|
+
_PARSE_TIMEOUT_SECS: int = int(os.environ.get("CODESPINE_PARSE_TIMEOUT_SECS", "60"))
|
|
27
|
+
|
|
28
|
+
# Heartbeat period for the parse-phase heartbeat thread (seconds).
|
|
29
|
+
_PARSE_HEARTBEAT_PERIOD: float = 2.0
|
|
30
|
+
|
|
20
31
|
|
|
21
32
|
def _parse_file_worker(file_path: str, root_path: str, project_id: str) -> dict:
|
|
22
|
-
"""Pure CPU/IO work – no DB access. Safe to run in a thread pool.
|
|
33
|
+
"""Pure CPU/IO work – no DB access. Safe to run in a thread pool.
|
|
34
|
+
|
|
35
|
+
Returns a result dict. When the file is skipped (oversized), the dict
|
|
36
|
+
has ``parsed=None`` and ``skipped_reason`` set — callers must check and
|
|
37
|
+
skip DB writes for those entries.
|
|
38
|
+
"""
|
|
23
39
|
rel_path = os.path.relpath(file_path, root_path)
|
|
24
40
|
is_test = "src/test/java" in file_path.replace("\\", "/")
|
|
25
41
|
scope = JavaIndexer._scope_from_rel_path(rel_path)
|
|
42
|
+
# ── Size guard: skip files that are likely to hang tree-sitter ───────
|
|
43
|
+
try:
|
|
44
|
+
file_size = os.path.getsize(file_path)
|
|
45
|
+
except OSError as exc:
|
|
46
|
+
raise RuntimeError(f"stat failed: {exc}") from exc
|
|
47
|
+
if file_size > _MAX_FILE_BYTES:
|
|
48
|
+
LOGGER.warning(
|
|
49
|
+
"Skipping oversized file (%d bytes > %d): %s "
|
|
50
|
+
"(raise CODESPINE_MAX_FILE_BYTES to include it)",
|
|
51
|
+
file_size, _MAX_FILE_BYTES, rel_path,
|
|
52
|
+
)
|
|
53
|
+
return {
|
|
54
|
+
"file_path": file_path,
|
|
55
|
+
"rel_path": rel_path,
|
|
56
|
+
"source": b"",
|
|
57
|
+
"parsed": None,
|
|
58
|
+
"f_id": file_id(project_id, rel_path),
|
|
59
|
+
"digest": "",
|
|
60
|
+
"is_test": is_test,
|
|
61
|
+
"scope": scope,
|
|
62
|
+
"skipped_reason": "oversized",
|
|
63
|
+
}
|
|
26
64
|
with open(file_path, "rb") as fh:
|
|
27
65
|
source = fh.read()
|
|
28
66
|
parsed = parse_java_source(source)
|
|
@@ -241,30 +279,119 @@ class JavaIndexer:
|
|
|
241
279
|
|
|
242
280
|
# ── Parallel parse (CPU/IO) ──────────────────────────────────────────
|
|
243
281
|
# tree-sitter releases the GIL so ThreadPoolExecutor gives real speedup.
|
|
282
|
+
# A daemon heartbeat thread emits parse_heartbeat events every 2 s so
|
|
283
|
+
# the CLI spinner keeps ticking even when all workers are busy.
|
|
284
|
+
# Per-future timeouts skip files that hang (e.g. huge generated source).
|
|
244
285
|
_workers = max(1, min(8, len(to_reindex), os.cpu_count() or 4))
|
|
245
286
|
parse_results: list[dict] = []
|
|
246
287
|
if to_reindex:
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
fp = futs[fut]
|
|
256
|
-
try:
|
|
257
|
-
parse_results.append(fut.result())
|
|
258
|
-
except Exception as exc:
|
|
259
|
-
LOGGER.warning("Skipping %s: %s", fp, exc)
|
|
288
|
+
_parse_done_holder: list[int] = [0]
|
|
289
|
+
_parse_current_holder: list[str] = [""]
|
|
290
|
+
_parse_hb_stop = threading.Event()
|
|
291
|
+
_parse_start = time.perf_counter()
|
|
292
|
+
_total = len(to_reindex)
|
|
293
|
+
|
|
294
|
+
def _parse_heartbeat_worker() -> None:
|
|
295
|
+
while not _parse_hb_stop.wait(_PARSE_HEARTBEAT_PERIOD):
|
|
260
296
|
self._emit(
|
|
261
297
|
progress,
|
|
262
|
-
"
|
|
263
|
-
|
|
264
|
-
total=
|
|
265
|
-
|
|
298
|
+
"parse_heartbeat",
|
|
299
|
+
done=_parse_done_holder[0],
|
|
300
|
+
total=_total,
|
|
301
|
+
current_file=_parse_current_holder[0],
|
|
302
|
+
elapsed=time.perf_counter() - _parse_start,
|
|
266
303
|
)
|
|
267
304
|
|
|
305
|
+
_parse_hb_thread = threading.Thread(
|
|
306
|
+
target=_parse_heartbeat_worker,
|
|
307
|
+
daemon=True,
|
|
308
|
+
name="codespine-parse-heartbeat",
|
|
309
|
+
)
|
|
310
|
+
_parse_hb_thread.start()
|
|
311
|
+
|
|
312
|
+
try:
|
|
313
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=_workers) as ex:
|
|
314
|
+
futs: dict[concurrent.futures.Future, str] = {}
|
|
315
|
+
submitted_at: dict[concurrent.futures.Future, float] = {}
|
|
316
|
+
for fp in to_reindex:
|
|
317
|
+
fut = ex.submit(_parse_file_worker, fp, root_path, project_id)
|
|
318
|
+
futs[fut] = fp
|
|
319
|
+
submitted_at[fut] = time.perf_counter()
|
|
320
|
+
|
|
321
|
+
pending = set(futs)
|
|
322
|
+
done_count = 0
|
|
323
|
+
|
|
324
|
+
while pending:
|
|
325
|
+
# Wait up to heartbeat period for any future to finish.
|
|
326
|
+
try:
|
|
327
|
+
for fut in concurrent.futures.as_completed(
|
|
328
|
+
pending, timeout=_PARSE_HEARTBEAT_PERIOD
|
|
329
|
+
):
|
|
330
|
+
pending.discard(fut)
|
|
331
|
+
done_count += 1
|
|
332
|
+
fp = futs[fut]
|
|
333
|
+
_parse_done_holder[0] = done_count
|
|
334
|
+
_parse_current_holder[0] = fp
|
|
335
|
+
try:
|
|
336
|
+
parse_results.append(fut.result(timeout=0))
|
|
337
|
+
except concurrent.futures.TimeoutError:
|
|
338
|
+
# Shouldn't happen (future is done), but guard anyway
|
|
339
|
+
pass
|
|
340
|
+
except Exception as exc:
|
|
341
|
+
LOGGER.warning("Skipping %s: %s", fp, exc)
|
|
342
|
+
self._emit(
|
|
343
|
+
progress,
|
|
344
|
+
"parse_progress",
|
|
345
|
+
indexed=done_count,
|
|
346
|
+
total=_total,
|
|
347
|
+
file_path=fp,
|
|
348
|
+
)
|
|
349
|
+
except concurrent.futures.TimeoutError:
|
|
350
|
+
pass # heartbeat tick — proceed to deadline scan
|
|
351
|
+
|
|
352
|
+
# Abandon futures stuck past the per-file timeout.
|
|
353
|
+
now = time.perf_counter()
|
|
354
|
+
expired = [
|
|
355
|
+
f for f in pending
|
|
356
|
+
if now - submitted_at[f] > _PARSE_TIMEOUT_SECS
|
|
357
|
+
]
|
|
358
|
+
for fut in expired:
|
|
359
|
+
fp = futs[fut]
|
|
360
|
+
LOGGER.warning(
|
|
361
|
+
"Parse timeout after %ds, skipping: %s "
|
|
362
|
+
"(thread may continue briefly in background)",
|
|
363
|
+
_PARSE_TIMEOUT_SECS, fp,
|
|
364
|
+
)
|
|
365
|
+
fut.cancel() # no-op if already running; cleans up pending ones
|
|
366
|
+
# Insert a sentinel so the file is counted but has no symbols.
|
|
367
|
+
parse_results.append({
|
|
368
|
+
"file_path": fp,
|
|
369
|
+
"rel_path": os.path.relpath(fp, root_path),
|
|
370
|
+
"source": b"",
|
|
371
|
+
"parsed": None,
|
|
372
|
+
"f_id": file_id(project_id, os.path.relpath(fp, root_path)),
|
|
373
|
+
"digest": "",
|
|
374
|
+
"is_test": "src/test/java" in fp.replace("\\", "/"),
|
|
375
|
+
"scope": JavaIndexer._scope_from_rel_path(
|
|
376
|
+
os.path.relpath(fp, root_path)
|
|
377
|
+
),
|
|
378
|
+
"skipped_reason": "timeout",
|
|
379
|
+
})
|
|
380
|
+
pending.discard(fut)
|
|
381
|
+
done_count += 1
|
|
382
|
+
_parse_done_holder[0] = done_count
|
|
383
|
+
self._emit(
|
|
384
|
+
progress,
|
|
385
|
+
"parse_progress",
|
|
386
|
+
indexed=done_count,
|
|
387
|
+
total=_total,
|
|
388
|
+
file_path=fp,
|
|
389
|
+
timed_out=True,
|
|
390
|
+
)
|
|
391
|
+
finally:
|
|
392
|
+
_parse_hb_stop.set()
|
|
393
|
+
_parse_hb_thread.join(timeout=3.0)
|
|
394
|
+
|
|
268
395
|
# ── Chunked DB writes ─────────────────────────────────────────────────
|
|
269
396
|
if full:
|
|
270
397
|
self.store.clear_project(project_id)
|
|
@@ -297,6 +424,12 @@ class JavaIndexer:
|
|
|
297
424
|
symbol_rows: list[dict] = []
|
|
298
425
|
|
|
299
426
|
for pr in parse_chunk:
|
|
427
|
+
# Skipped files (oversized, timeout) carry parsed=None.
|
|
428
|
+
# Still count as indexed for accurate reporting, but skip
|
|
429
|
+
# class/method/symbol extraction.
|
|
430
|
+
if pr.get("parsed") is None:
|
|
431
|
+
files_indexed += 1
|
|
432
|
+
continue
|
|
300
433
|
file_path = pr["file_path"]
|
|
301
434
|
parsed = pr["parsed"]
|
|
302
435
|
f_id = pr["f_id"]
|
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Regression tests for parse-phase hang recovery (v1.0.7).
|
|
2
|
+
|
|
3
|
+
Covers:
|
|
4
|
+
- Oversized file skip (_MAX_FILE_BYTES guard)
|
|
5
|
+
- Parse heartbeat thread lifecycle
|
|
6
|
+
- Per-future timeout with sentinel insertion
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import concurrent.futures
|
|
11
|
+
import os
|
|
12
|
+
import threading
|
|
13
|
+
import time
|
|
14
|
+
|
|
15
|
+
import pytest
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# ---------------------------------------------------------------------------
|
|
19
|
+
# Test A: oversized-file skip
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_parse_worker_skips_oversized(tmp_path, monkeypatch):
|
|
24
|
+
"""Files larger than _MAX_FILE_BYTES return parsed=None without calling
|
|
25
|
+
parse_java_source, preventing tree-sitter from hanging on giant files."""
|
|
26
|
+
import codespine.indexer.engine as eng
|
|
27
|
+
|
|
28
|
+
monkeypatch.setattr(eng, "_MAX_FILE_BYTES", 100)
|
|
29
|
+
|
|
30
|
+
java_file = tmp_path / "Big.java"
|
|
31
|
+
java_file.write_bytes(b"public class Big {}\n" + b" " * 200)
|
|
32
|
+
|
|
33
|
+
result = eng._parse_file_worker(str(java_file), str(tmp_path), "test-proj")
|
|
34
|
+
|
|
35
|
+
assert result["parsed"] is None
|
|
36
|
+
assert result["skipped_reason"] == "oversized"
|
|
37
|
+
assert result["rel_path"] == "Big.java"
|
|
38
|
+
assert result["source"] == b""
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_parse_worker_normal_file(tmp_path):
|
|
42
|
+
"""Files within the size limit are parsed normally."""
|
|
43
|
+
import codespine.indexer.engine as eng
|
|
44
|
+
|
|
45
|
+
java_file = tmp_path / "Small.java"
|
|
46
|
+
java_file.write_bytes(b"public class Small {}\n")
|
|
47
|
+
|
|
48
|
+
result = eng._parse_file_worker(str(java_file), str(tmp_path), "test-proj")
|
|
49
|
+
|
|
50
|
+
assert result["parsed"] is not None
|
|
51
|
+
assert result["skipped_reason"] if "skipped_reason" in result else True # not set = fine
|
|
52
|
+
assert "skipped_reason" not in result or result["skipped_reason"] != "oversized"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def test_parse_worker_max_file_bytes_env(tmp_path, monkeypatch):
|
|
56
|
+
"""CODESPINE_MAX_FILE_BYTES env var controls the threshold."""
|
|
57
|
+
monkeypatch.setenv("CODESPINE_MAX_FILE_BYTES", "50")
|
|
58
|
+
# Re-import to pick up new env (the constant is read at import time,
|
|
59
|
+
# so we must reload or patch it directly).
|
|
60
|
+
import codespine.indexer.engine as eng
|
|
61
|
+
import importlib
|
|
62
|
+
# Patch after import
|
|
63
|
+
monkeypatch.setattr(eng, "_MAX_FILE_BYTES", 50)
|
|
64
|
+
|
|
65
|
+
java_file = tmp_path / "Medium.java"
|
|
66
|
+
java_file.write_bytes(b"x" * 100)
|
|
67
|
+
|
|
68
|
+
result = eng._parse_file_worker(str(java_file), str(tmp_path), "test-proj")
|
|
69
|
+
assert result["parsed"] is None
|
|
70
|
+
assert result["skipped_reason"] == "oversized"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# Test B: parse heartbeat thread lifecycle
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_parse_heartbeat_emits_and_stops():
|
|
79
|
+
"""The heartbeat thread fires events every _PARSE_HEARTBEAT_PERIOD seconds
|
|
80
|
+
and stops cleanly when signalled."""
|
|
81
|
+
events: list[dict] = []
|
|
82
|
+
stop_event = threading.Event()
|
|
83
|
+
|
|
84
|
+
_done_holder: list[int] = [0]
|
|
85
|
+
_current_holder: list[str] = ["Foo.java"]
|
|
86
|
+
_start = time.perf_counter()
|
|
87
|
+
_total = 10
|
|
88
|
+
_period = 0.1 # fast for testing
|
|
89
|
+
|
|
90
|
+
def _worker() -> None:
|
|
91
|
+
while not stop_event.wait(_period):
|
|
92
|
+
events.append({
|
|
93
|
+
"event": "parse_heartbeat",
|
|
94
|
+
"done": _done_holder[0],
|
|
95
|
+
"total": _total,
|
|
96
|
+
"current_file": _current_holder[0],
|
|
97
|
+
"elapsed": time.perf_counter() - _start,
|
|
98
|
+
})
|
|
99
|
+
|
|
100
|
+
t = threading.Thread(target=_worker, daemon=True)
|
|
101
|
+
t.start()
|
|
102
|
+
time.sleep(0.35) # allow ~3 ticks
|
|
103
|
+
stop_event.set()
|
|
104
|
+
t.join(timeout=1.0)
|
|
105
|
+
|
|
106
|
+
assert not t.is_alive(), "Heartbeat thread did not stop"
|
|
107
|
+
assert len(events) >= 2, f"Expected ≥2 heartbeat events, got {len(events)}"
|
|
108
|
+
assert all(e["event"] == "parse_heartbeat" for e in events)
|
|
109
|
+
assert all(e["total"] == _total for e in events)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def test_parse_heartbeat_reflects_state_updates():
|
|
113
|
+
"""The heartbeat reads live state from the shared holders."""
|
|
114
|
+
events: list[dict] = []
|
|
115
|
+
stop_event = threading.Event()
|
|
116
|
+
_done_holder: list[int] = [0]
|
|
117
|
+
_period = 0.05
|
|
118
|
+
|
|
119
|
+
def _worker() -> None:
|
|
120
|
+
while not stop_event.wait(_period):
|
|
121
|
+
events.append({"done": _done_holder[0]})
|
|
122
|
+
|
|
123
|
+
t = threading.Thread(target=_worker, daemon=True)
|
|
124
|
+
t.start()
|
|
125
|
+
time.sleep(0.08)
|
|
126
|
+
_done_holder[0] = 42
|
|
127
|
+
time.sleep(0.08)
|
|
128
|
+
stop_event.set()
|
|
129
|
+
t.join(timeout=1.0)
|
|
130
|
+
|
|
131
|
+
done_values = [e["done"] for e in events]
|
|
132
|
+
assert 42 in done_values, "State update not reflected in heartbeat"
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# ---------------------------------------------------------------------------
|
|
136
|
+
# Test C: per-future timeout produces sentinel result
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_timeout_sentinel_has_correct_shape(tmp_path):
|
|
141
|
+
"""When a future times out, the sentinel dict must have parsed=None and
|
|
142
|
+
skipped_reason='timeout' so the DB-write loop skips it safely."""
|
|
143
|
+
import codespine.indexer.engine as eng
|
|
144
|
+
|
|
145
|
+
# Construct a sentinel the same way the engine does it.
|
|
146
|
+
fp = str(tmp_path / "Slow.java")
|
|
147
|
+
root_path = str(tmp_path)
|
|
148
|
+
project_id = "myproj"
|
|
149
|
+
|
|
150
|
+
sentinel = {
|
|
151
|
+
"file_path": fp,
|
|
152
|
+
"rel_path": os.path.relpath(fp, root_path),
|
|
153
|
+
"source": b"",
|
|
154
|
+
"parsed": None,
|
|
155
|
+
"f_id": eng.file_id(project_id, os.path.relpath(fp, root_path)),
|
|
156
|
+
"digest": "",
|
|
157
|
+
"is_test": False,
|
|
158
|
+
"scope": "main",
|
|
159
|
+
"skipped_reason": "timeout",
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
assert sentinel["parsed"] is None
|
|
163
|
+
assert sentinel["skipped_reason"] == "timeout"
|
|
164
|
+
assert sentinel["source"] == b""
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def test_parse_loop_skips_none_parsed_in_db_write(tmp_path):
|
|
168
|
+
"""The DB-write loop guard (parsed is None → continue) must not NPE when
|
|
169
|
+
a skipped sentinel is in parse_results."""
|
|
170
|
+
# We verify the guard logic directly without a real store.
|
|
171
|
+
parse_results = [
|
|
172
|
+
{
|
|
173
|
+
"file_path": str(tmp_path / "Skip.java"),
|
|
174
|
+
"rel_path": "Skip.java",
|
|
175
|
+
"source": b"",
|
|
176
|
+
"parsed": None,
|
|
177
|
+
"f_id": "fid-skip",
|
|
178
|
+
"digest": "",
|
|
179
|
+
"is_test": False,
|
|
180
|
+
"scope": "main",
|
|
181
|
+
"skipped_reason": "oversized",
|
|
182
|
+
}
|
|
183
|
+
]
|
|
184
|
+
|
|
185
|
+
# Simulate what the engine's DB-write loop does.
|
|
186
|
+
files_indexed = 0
|
|
187
|
+
for pr in parse_results:
|
|
188
|
+
if pr.get("parsed") is None:
|
|
189
|
+
files_indexed += 1
|
|
190
|
+
continue
|
|
191
|
+
# This line would NPE if reached with parsed=None:
|
|
192
|
+
_ = pr["parsed"].classes
|
|
193
|
+
|
|
194
|
+
assert files_indexed == 1, "Skipped sentinel should increment files_indexed"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|