opencode-bridge 0.3.0__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/PKG-INFO +1 -1
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/opencode_bridge/server.py +383 -17
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/pyproject.toml +1 -1
- opencode_bridge-0.4.1/tests/test_chunking.py +630 -0
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/.claude-plugin/plugin.json +0 -0
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/.github/workflows/ci.yml +0 -0
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/.github/workflows/release.yml +0 -0
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/.gitignore +0 -0
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/README.md +0 -0
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/opencode_bridge/__init__.py +0 -0
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/opencode_bridge/install.py +0 -0
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/skills/opencode.md +0 -0
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/tests/__init__.py +0 -0
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/tests/test_companion.py +0 -0
- {opencode_bridge-0.3.0 → opencode_bridge-0.4.1}/uv.lock +0 -0
|
@@ -16,6 +16,7 @@ Configuration:
|
|
|
16
16
|
"""
|
|
17
17
|
|
|
18
18
|
import os
|
|
19
|
+
import re
|
|
19
20
|
import json
|
|
20
21
|
import asyncio
|
|
21
22
|
import shutil
|
|
@@ -35,6 +36,13 @@ SMALL_FILE = 500 # lines
|
|
|
35
36
|
MEDIUM_FILE = 1500 # lines
|
|
36
37
|
LARGE_FILE = 5000 # lines
|
|
37
38
|
|
|
39
|
+
# Chunked processing thresholds
|
|
40
|
+
CHUNK_THRESHOLD = 2000 # lines — files above this get chunked
|
|
41
|
+
CHUNK_SIZE = 800 # lines per chunk
|
|
42
|
+
CHUNK_OVERLAP = 20 # overlap between adjacent chunks
|
|
43
|
+
MAX_PARALLEL_CHUNKS = 6 # concurrency limit
|
|
44
|
+
MAX_TOTAL_CHUNKS = 20 # safety cap
|
|
45
|
+
|
|
38
46
|
# Language detection by extension
|
|
39
47
|
LANG_MAP = {
|
|
40
48
|
".py": "Python", ".js": "JavaScript", ".ts": "TypeScript", ".tsx": "TypeScript/React",
|
|
@@ -304,6 +312,174 @@ def build_companion_prompt(
|
|
|
304
312
|
return "\n".join(parts)
|
|
305
313
|
|
|
306
314
|
|
|
315
|
+
# ---------------------------------------------------------------------------
|
|
316
|
+
# Chunked Processing — map-reduce for large files
|
|
317
|
+
# ---------------------------------------------------------------------------
|
|
318
|
+
|
|
319
|
+
# Regex for natural code boundaries (language-agnostic)
|
|
320
|
+
_BOUNDARY_RE = re.compile(
|
|
321
|
+
r"^(?:\s*$" # blank line
|
|
322
|
+
r"|(?:def |class |function |func |fn |pub fn |impl |module |package )" # definitions
|
|
323
|
+
r"|(?:})\s*$" # closing brace on its own line
|
|
324
|
+
r"|(?://|#|/\*|\*/).{0,80}$" # comment lines
|
|
325
|
+
r")",
|
|
326
|
+
re.MULTILINE,
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def chunk_file(
|
|
331
|
+
filepath: str,
|
|
332
|
+
chunk_size: int = CHUNK_SIZE,
|
|
333
|
+
overlap: int = CHUNK_OVERLAP,
|
|
334
|
+
) -> list[dict]:
|
|
335
|
+
"""Split a file into overlapping chunks with boundary snapping.
|
|
336
|
+
|
|
337
|
+
Returns a list of dicts with keys:
|
|
338
|
+
chunk_index, total_chunks, start_line, end_line, content, filepath
|
|
339
|
+
"""
|
|
340
|
+
p = Path(filepath)
|
|
341
|
+
try:
|
|
342
|
+
lines = p.read_text(errors="replace").splitlines(keepends=True)
|
|
343
|
+
except Exception:
|
|
344
|
+
return []
|
|
345
|
+
|
|
346
|
+
total = len(lines)
|
|
347
|
+
if total == 0:
|
|
348
|
+
return []
|
|
349
|
+
if total <= chunk_size:
|
|
350
|
+
return [{
|
|
351
|
+
"chunk_index": 0,
|
|
352
|
+
"total_chunks": 1,
|
|
353
|
+
"start_line": 1,
|
|
354
|
+
"end_line": total,
|
|
355
|
+
"content": "".join(lines),
|
|
356
|
+
"filepath": str(p),
|
|
357
|
+
}]
|
|
358
|
+
|
|
359
|
+
chunks: list[dict] = []
|
|
360
|
+
pos = 0
|
|
361
|
+
while pos < total:
|
|
362
|
+
end = min(pos + chunk_size, total)
|
|
363
|
+
|
|
364
|
+
# Snap to a natural boundary within ±50 lines of the cut point
|
|
365
|
+
if end < total:
|
|
366
|
+
best = end
|
|
367
|
+
scan_start = max(end - 50, pos + chunk_size // 2)
|
|
368
|
+
scan_end = min(end + 50, total)
|
|
369
|
+
for i in range(scan_start, scan_end):
|
|
370
|
+
if _BOUNDARY_RE.match(lines[i]):
|
|
371
|
+
best = i + 1 # include the boundary line in this chunk
|
|
372
|
+
break
|
|
373
|
+
end = best
|
|
374
|
+
|
|
375
|
+
chunk_content = "".join(lines[pos:end])
|
|
376
|
+
chunks.append({
|
|
377
|
+
"chunk_index": len(chunks),
|
|
378
|
+
"total_chunks": -1, # filled in below
|
|
379
|
+
"start_line": pos + 1, # 1-indexed
|
|
380
|
+
"end_line": end,
|
|
381
|
+
"content": chunk_content,
|
|
382
|
+
"filepath": str(p),
|
|
383
|
+
})
|
|
384
|
+
|
|
385
|
+
# Advance: overlap with previous chunk, but stop if we've reached the end
|
|
386
|
+
if end >= total:
|
|
387
|
+
break
|
|
388
|
+
pos = max(end - overlap, pos + 1)
|
|
389
|
+
|
|
390
|
+
# Fill in total_chunks
|
|
391
|
+
for c in chunks:
|
|
392
|
+
c["total_chunks"] = len(chunks)
|
|
393
|
+
|
|
394
|
+
return chunks
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def build_chunk_prompt(
|
|
398
|
+
user_prompt: str,
|
|
399
|
+
chunk_info: dict,
|
|
400
|
+
file_info: dict,
|
|
401
|
+
mode: str = "discuss",
|
|
402
|
+
) -> str:
|
|
403
|
+
"""Build a focused prompt for analyzing a single file chunk."""
|
|
404
|
+
name = file_info.get("name", Path(chunk_info["filepath"]).name)
|
|
405
|
+
language = file_info.get("language", "Unknown")
|
|
406
|
+
total_lines = file_info.get("lines", "?")
|
|
407
|
+
idx = chunk_info["chunk_index"] + 1
|
|
408
|
+
total = chunk_info["total_chunks"]
|
|
409
|
+
start = chunk_info["start_line"]
|
|
410
|
+
end = chunk_info["end_line"]
|
|
411
|
+
|
|
412
|
+
parts = [
|
|
413
|
+
f"You are analyzing **chunk {idx} of {total}** from `{name}` "
|
|
414
|
+
f"({language}, {total_lines} total lines).",
|
|
415
|
+
f"This chunk covers **lines {start}–{end}**.",
|
|
416
|
+
"",
|
|
417
|
+
"## Task",
|
|
418
|
+
user_prompt,
|
|
419
|
+
"",
|
|
420
|
+
"## Instructions",
|
|
421
|
+
"- Focus ONLY on the code in this chunk",
|
|
422
|
+
"- Note any references to code that might exist outside this chunk",
|
|
423
|
+
"- Be concise — your output will be combined with analyses of other chunks",
|
|
424
|
+
"- Include line numbers for any issues found",
|
|
425
|
+
]
|
|
426
|
+
|
|
427
|
+
if mode == "review":
|
|
428
|
+
parts.append("- Categorize findings as: bug, security, design, performance, or style")
|
|
429
|
+
|
|
430
|
+
return "\n".join(parts)
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def build_synthesis_prompt(
|
|
434
|
+
user_prompt: str,
|
|
435
|
+
chunk_results: list[dict],
|
|
436
|
+
file_infos: list[dict],
|
|
437
|
+
mode: str = "discuss",
|
|
438
|
+
) -> str:
|
|
439
|
+
"""Build a prompt that merges chunk analyses into one coherent response."""
|
|
440
|
+
file_desc = ", ".join(
|
|
441
|
+
f"`{i.get('name', '?')}` ({i.get('lines', '?')} lines)"
|
|
442
|
+
for i in file_infos
|
|
443
|
+
)
|
|
444
|
+
n = len(chunk_results)
|
|
445
|
+
|
|
446
|
+
parts = [
|
|
447
|
+
f"You analyzed a large file in **{n} chunks**. "
|
|
448
|
+
"Synthesize the chunk analyses below into one coherent response.",
|
|
449
|
+
"",
|
|
450
|
+
"## Original Request",
|
|
451
|
+
user_prompt,
|
|
452
|
+
"",
|
|
453
|
+
"## Files Analyzed",
|
|
454
|
+
file_desc,
|
|
455
|
+
"",
|
|
456
|
+
"## Chunk Analyses",
|
|
457
|
+
]
|
|
458
|
+
|
|
459
|
+
for cr in sorted(chunk_results, key=lambda c: c.get("chunk_index", 0)):
|
|
460
|
+
idx = cr.get("chunk_index", 0) + 1
|
|
461
|
+
fp = Path(cr.get("file", "")).name
|
|
462
|
+
response = cr.get("response", "[analysis failed]")
|
|
463
|
+
if cr.get("error"):
|
|
464
|
+
response = f"[analysis failed: {cr['error']}]"
|
|
465
|
+
parts.append(f"\n### Chunk {idx} — `{fp}`")
|
|
466
|
+
parts.append(response)
|
|
467
|
+
|
|
468
|
+
parts.extend([
|
|
469
|
+
"",
|
|
470
|
+
"## Instructions",
|
|
471
|
+
"- Combine findings and remove duplicates (chunks overlap slightly)",
|
|
472
|
+
"- Organize by importance, not by chunk order",
|
|
473
|
+
"- Preserve line number references from the original analyses",
|
|
474
|
+
"- Provide an overall assessment at the top",
|
|
475
|
+
])
|
|
476
|
+
|
|
477
|
+
if mode == "review":
|
|
478
|
+
parts.append("- Group findings by category: bugs, security, design, performance, style")
|
|
479
|
+
|
|
480
|
+
return "\n".join(parts)
|
|
481
|
+
|
|
482
|
+
|
|
307
483
|
# Default configuration
|
|
308
484
|
DEFAULT_MODEL = "openai/gpt-5.2-codex"
|
|
309
485
|
DEFAULT_AGENT = "plan"
|
|
@@ -470,6 +646,182 @@ class OpenCodeBridge:
|
|
|
470
646
|
except Exception as e:
|
|
471
647
|
return f"Error: {e}", 1
|
|
472
648
|
|
|
649
|
+
@staticmethod
|
|
650
|
+
def _parse_opencode_response(output: str) -> tuple[str, Optional[str]]:
|
|
651
|
+
"""Parse JSON-lines output from opencode CLI.
|
|
652
|
+
|
|
653
|
+
Returns (reply_text, session_id).
|
|
654
|
+
"""
|
|
655
|
+
reply_parts: list[str] = []
|
|
656
|
+
session_id: Optional[str] = None
|
|
657
|
+
for line in output.split("\n"):
|
|
658
|
+
if not line:
|
|
659
|
+
continue
|
|
660
|
+
try:
|
|
661
|
+
event = json.loads(line)
|
|
662
|
+
if not session_id and "sessionID" in event:
|
|
663
|
+
session_id = event["sessionID"]
|
|
664
|
+
if event.get("type") == "text":
|
|
665
|
+
text = event.get("part", {}).get("text", "")
|
|
666
|
+
if text:
|
|
667
|
+
reply_parts.append(text)
|
|
668
|
+
except json.JSONDecodeError:
|
|
669
|
+
continue
|
|
670
|
+
return "".join(reply_parts), session_id
|
|
671
|
+
|
|
672
|
+
async def _run_chunk(
|
|
673
|
+
self,
|
|
674
|
+
chunk_info: dict,
|
|
675
|
+
file_info: dict,
|
|
676
|
+
user_prompt: str,
|
|
677
|
+
session: "Session",
|
|
678
|
+
mode: str = "discuss",
|
|
679
|
+
) -> dict:
|
|
680
|
+
"""Process a single file chunk through OpenCode (stateless)."""
|
|
681
|
+
result = {
|
|
682
|
+
"chunk_index": chunk_info["chunk_index"],
|
|
683
|
+
"file": chunk_info["filepath"],
|
|
684
|
+
"response": "",
|
|
685
|
+
"error": None,
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
# Write chunk to a temp file preserving the original extension
|
|
689
|
+
ext = Path(chunk_info["filepath"]).suffix or ".txt"
|
|
690
|
+
tmp = None
|
|
691
|
+
try:
|
|
692
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
693
|
+
mode="w", suffix=ext, delete=False, prefix="opencode_chunk_"
|
|
694
|
+
)
|
|
695
|
+
tmp.write(chunk_info["content"])
|
|
696
|
+
tmp.close()
|
|
697
|
+
|
|
698
|
+
prompt = build_chunk_prompt(user_prompt, chunk_info, file_info, mode)
|
|
699
|
+
|
|
700
|
+
args = [
|
|
701
|
+
"run", prompt,
|
|
702
|
+
"--model", session.model,
|
|
703
|
+
"--agent", session.agent,
|
|
704
|
+
"--file", tmp.name,
|
|
705
|
+
"--format", "json",
|
|
706
|
+
]
|
|
707
|
+
if session.variant:
|
|
708
|
+
args.extend(["--variant", session.variant])
|
|
709
|
+
|
|
710
|
+
output, code = await self._run_opencode(*args, timeout=300)
|
|
711
|
+
|
|
712
|
+
if code != 0:
|
|
713
|
+
result["error"] = output[:500]
|
|
714
|
+
return result
|
|
715
|
+
|
|
716
|
+
reply, _ = self._parse_opencode_response(output)
|
|
717
|
+
result["response"] = reply or "[no response]"
|
|
718
|
+
|
|
719
|
+
except Exception as e:
|
|
720
|
+
result["error"] = str(e)
|
|
721
|
+
finally:
|
|
722
|
+
if tmp:
|
|
723
|
+
try:
|
|
724
|
+
os.unlink(tmp.name)
|
|
725
|
+
except OSError:
|
|
726
|
+
pass
|
|
727
|
+
return result
|
|
728
|
+
|
|
729
|
+
async def _run_chunked(
|
|
730
|
+
self,
|
|
731
|
+
user_prompt: str,
|
|
732
|
+
files: list[str],
|
|
733
|
+
session: "Session",
|
|
734
|
+
mode: str = "discuss",
|
|
735
|
+
) -> str:
|
|
736
|
+
"""Map-reduce orchestrator: chunk large files, process in parallel, synthesize."""
|
|
737
|
+
small_files: list[str] = []
|
|
738
|
+
all_chunks: list[tuple[dict, dict]] = [] # (chunk_info, file_info)
|
|
739
|
+
|
|
740
|
+
for f in files:
|
|
741
|
+
info = get_file_info(f)
|
|
742
|
+
line_count = info.get("lines", 0)
|
|
743
|
+
if line_count > CHUNK_THRESHOLD:
|
|
744
|
+
chunks = chunk_file(f, CHUNK_SIZE, CHUNK_OVERLAP)
|
|
745
|
+
for c in chunks:
|
|
746
|
+
all_chunks.append((c, info))
|
|
747
|
+
else:
|
|
748
|
+
small_files.append(f)
|
|
749
|
+
|
|
750
|
+
# Safety: if too many chunks, increase chunk size and re-chunk
|
|
751
|
+
if len(all_chunks) > MAX_TOTAL_CHUNKS:
|
|
752
|
+
all_chunks = []
|
|
753
|
+
bigger = CHUNK_SIZE * 2
|
|
754
|
+
for f in files:
|
|
755
|
+
info = get_file_info(f)
|
|
756
|
+
if info.get("lines", 0) > CHUNK_THRESHOLD:
|
|
757
|
+
chunks = chunk_file(f, bigger, CHUNK_OVERLAP)
|
|
758
|
+
for c in chunks:
|
|
759
|
+
all_chunks.append((c, info))
|
|
760
|
+
# small_files already collected above
|
|
761
|
+
|
|
762
|
+
if not all_chunks:
|
|
763
|
+
return "No chunks to process."
|
|
764
|
+
|
|
765
|
+
# --- Map phase: run chunks in parallel ---
|
|
766
|
+
sem = asyncio.Semaphore(MAX_PARALLEL_CHUNKS)
|
|
767
|
+
|
|
768
|
+
async def _limited(chunk_info: dict, file_info: dict) -> dict:
|
|
769
|
+
async with sem:
|
|
770
|
+
return await self._run_chunk(chunk_info, file_info, user_prompt, session, mode)
|
|
771
|
+
|
|
772
|
+
tasks = [_limited(ci, fi) for ci, fi in all_chunks]
|
|
773
|
+
chunk_results: list[dict] = await asyncio.gather(*tasks)
|
|
774
|
+
|
|
775
|
+
# Check failure rate
|
|
776
|
+
failed = sum(1 for cr in chunk_results if cr.get("error"))
|
|
777
|
+
if failed > len(chunk_results) / 2:
|
|
778
|
+
return (
|
|
779
|
+
f"Chunked analysis failed: {failed}/{len(chunk_results)} chunks errored. "
|
|
780
|
+
"Try with a smaller file or increase the chunk size."
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
# --- Reduce phase: synthesize ---
|
|
784
|
+
file_infos = []
|
|
785
|
+
seen_paths: set[str] = set()
|
|
786
|
+
for _, fi in all_chunks:
|
|
787
|
+
fp = fi.get("path", "")
|
|
788
|
+
if fp not in seen_paths:
|
|
789
|
+
seen_paths.add(fp)
|
|
790
|
+
file_infos.append(fi)
|
|
791
|
+
|
|
792
|
+
synthesis_prompt = build_synthesis_prompt(user_prompt, chunk_results, file_infos, mode)
|
|
793
|
+
|
|
794
|
+
# Attach small files for reference context (not the large ones)
|
|
795
|
+
args = [
|
|
796
|
+
"run", synthesis_prompt,
|
|
797
|
+
"--model", session.model,
|
|
798
|
+
"--agent", session.agent,
|
|
799
|
+
"--format", "json",
|
|
800
|
+
]
|
|
801
|
+
if session.variant:
|
|
802
|
+
args.extend(["--variant", session.variant])
|
|
803
|
+
for sf in small_files:
|
|
804
|
+
args.extend(["--file", sf])
|
|
805
|
+
|
|
806
|
+
# Longer timeout for synthesis
|
|
807
|
+
output, code = await self._run_opencode(*args, timeout=600)
|
|
808
|
+
|
|
809
|
+
if code != 0:
|
|
810
|
+
# Fallback: concatenate raw chunk results
|
|
811
|
+
parts = ["*Synthesis failed — showing raw chunk analyses:*\n"]
|
|
812
|
+
for cr in sorted(chunk_results, key=lambda c: c.get("chunk_index", 0)):
|
|
813
|
+
idx = cr.get("chunk_index", 0) + 1
|
|
814
|
+
fp = Path(cr.get("file", "")).name
|
|
815
|
+
parts.append(f"\n### Chunk {idx} — `{fp}`")
|
|
816
|
+
if cr.get("error"):
|
|
817
|
+
parts.append(f"[error: {cr['error']}]")
|
|
818
|
+
else:
|
|
819
|
+
parts.append(cr.get("response", "[no response]"))
|
|
820
|
+
return "\n".join(parts)
|
|
821
|
+
|
|
822
|
+
reply, _ = self._parse_opencode_response(output)
|
|
823
|
+
return reply or "No response from synthesis."
|
|
824
|
+
|
|
473
825
|
async def list_models(self, provider: Optional[str] = None) -> str:
|
|
474
826
|
"""List available models from OpenCode."""
|
|
475
827
|
args = ["models"]
|
|
@@ -599,6 +951,27 @@ Set via:
|
|
|
599
951
|
temp_file.close()
|
|
600
952
|
files = (files or []) + [temp_file.name]
|
|
601
953
|
|
|
954
|
+
# --- Chunking gate: large user files get map-reduce processing ---
|
|
955
|
+
user_files = [f for f in files if not Path(f).name.startswith("opencode_msg_")]
|
|
956
|
+
needs_chunking = any(
|
|
957
|
+
get_file_info(f).get("lines", 0) > CHUNK_THRESHOLD
|
|
958
|
+
for f in user_files
|
|
959
|
+
)
|
|
960
|
+
|
|
961
|
+
if needs_chunking:
|
|
962
|
+
reply = await self._run_chunked(message, user_files, session, mode="discuss")
|
|
963
|
+
# Cleanup temp file
|
|
964
|
+
try:
|
|
965
|
+
os.unlink(temp_file.name)
|
|
966
|
+
except OSError:
|
|
967
|
+
pass
|
|
968
|
+
if reply:
|
|
969
|
+
session.add_message("assistant", reply)
|
|
970
|
+
session.save(self.sessions_dir / f"{sid}.json")
|
|
971
|
+
return reply or "No response received"
|
|
972
|
+
|
|
973
|
+
# --- Normal (non-chunked) path ---
|
|
974
|
+
|
|
602
975
|
# Build prompt: companion system unless _raw is set
|
|
603
976
|
if _raw:
|
|
604
977
|
run_prompt = build_message_prompt(message, files)
|
|
@@ -631,7 +1004,6 @@ Set via:
|
|
|
631
1004
|
args.extend(["--format", "json"])
|
|
632
1005
|
|
|
633
1006
|
# Scale timeout based on attached file size
|
|
634
|
-
user_files = [f for f in files if not Path(f).name.startswith("opencode_msg_")]
|
|
635
1007
|
total_lines = sum(get_file_info(f).get("lines", 0) for f in user_files)
|
|
636
1008
|
# Base 300s, +60s per 1000 lines above threshold, capped at 900s
|
|
637
1009
|
timeout = min(900, 300 + max(0, (total_lines - MEDIUM_FILE) * 60 // 1000))
|
|
@@ -649,22 +1021,10 @@ Set via:
|
|
|
649
1021
|
return f"Error: {output}"
|
|
650
1022
|
|
|
651
1023
|
# Parse JSON events for session ID and text
|
|
652
|
-
|
|
653
|
-
|
|
654
|
-
|
|
655
|
-
continue
|
|
656
|
-
try:
|
|
657
|
-
event = json.loads(line)
|
|
658
|
-
if not session.opencode_session_id and "sessionID" in event:
|
|
659
|
-
session.opencode_session_id = event["sessionID"]
|
|
660
|
-
if event.get("type") == "text":
|
|
661
|
-
text = event.get("part", {}).get("text", "")
|
|
662
|
-
if text:
|
|
663
|
-
reply_parts.append(text)
|
|
664
|
-
except json.JSONDecodeError:
|
|
665
|
-
continue
|
|
1024
|
+
reply, new_session_id = self._parse_opencode_response(output)
|
|
1025
|
+
if new_session_id and not session.opencode_session_id:
|
|
1026
|
+
session.opencode_session_id = new_session_id
|
|
666
1027
|
|
|
667
|
-
reply = "".join(reply_parts)
|
|
668
1028
|
if reply:
|
|
669
1029
|
session.add_message("assistant", reply)
|
|
670
1030
|
|
|
@@ -737,10 +1097,16 @@ Set via:
|
|
|
737
1097
|
files = file_paths
|
|
738
1098
|
file_infos = [get_file_info(f) for f in file_paths]
|
|
739
1099
|
file_infos = [i for i in file_infos if i]
|
|
1100
|
+
total_lines = sum(i.get("lines", 0) for i in file_infos)
|
|
1101
|
+
|
|
1102
|
+
# Chunking gate for large reviews
|
|
1103
|
+
if total_lines > CHUNK_THRESHOLD:
|
|
1104
|
+
prompt = build_review_prompt(file_infos, focus)
|
|
1105
|
+
return await self._run_chunked(prompt, file_paths, self.sessions[sid], mode="review")
|
|
1106
|
+
|
|
740
1107
|
prompt = build_review_prompt(file_infos, focus)
|
|
741
1108
|
|
|
742
1109
|
# Increase timeout for large files
|
|
743
|
-
total_lines = sum(i.get("lines", 0) for i in file_infos)
|
|
744
1110
|
if total_lines > LARGE_FILE:
|
|
745
1111
|
# Use variant=high for large reviews if not already high+
|
|
746
1112
|
session = self.sessions[sid]
|
|
@@ -0,0 +1,630 @@
|
|
|
1
|
+
"""Tests for chunked subagent processing of large files."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import tempfile
|
|
5
|
+
import textwrap
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from unittest.mock import AsyncMock, patch
|
|
8
|
+
|
|
9
|
+
import pytest
|
|
10
|
+
|
|
11
|
+
from opencode_bridge.server import (
|
|
12
|
+
CHUNK_OVERLAP,
|
|
13
|
+
CHUNK_SIZE,
|
|
14
|
+
CHUNK_THRESHOLD,
|
|
15
|
+
MAX_TOTAL_CHUNKS,
|
|
16
|
+
OpenCodeBridge,
|
|
17
|
+
build_chunk_prompt,
|
|
18
|
+
build_synthesis_prompt,
|
|
19
|
+
chunk_file,
|
|
20
|
+
get_file_info,
|
|
21
|
+
_file_info_cache,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# ---------------------------------------------------------------------------
|
|
26
|
+
# Helpers
|
|
27
|
+
# ---------------------------------------------------------------------------
|
|
28
|
+
|
|
29
|
+
def _make_file(lines: int, *, with_boundaries: bool = False) -> str:
|
|
30
|
+
"""Create a temp file with the given number of lines. Returns its path."""
|
|
31
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
32
|
+
mode="w", suffix=".py", delete=False, prefix="test_chunk_"
|
|
33
|
+
)
|
|
34
|
+
for i in range(1, lines + 1):
|
|
35
|
+
if with_boundaries and i % 200 == 0:
|
|
36
|
+
tmp.write(f"\ndef function_{i}():\n")
|
|
37
|
+
else:
|
|
38
|
+
tmp.write(f"# line {i}\n")
|
|
39
|
+
tmp.close()
|
|
40
|
+
return tmp.name
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
# chunk_file
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
class TestChunkFile:
|
|
48
|
+
def test_small_file_single_chunk(self):
|
|
49
|
+
path = _make_file(100)
|
|
50
|
+
chunks = chunk_file(path, chunk_size=CHUNK_SIZE)
|
|
51
|
+
assert len(chunks) == 1
|
|
52
|
+
assert chunks[0]["chunk_index"] == 0
|
|
53
|
+
assert chunks[0]["total_chunks"] == 1
|
|
54
|
+
assert chunks[0]["start_line"] == 1
|
|
55
|
+
assert chunks[0]["end_line"] == 100
|
|
56
|
+
|
|
57
|
+
def test_large_file_multiple_chunks(self):
|
|
58
|
+
path = _make_file(2500)
|
|
59
|
+
chunks = chunk_file(path, chunk_size=800, overlap=20)
|
|
60
|
+
assert len(chunks) > 1
|
|
61
|
+
# All chunks should reference the same file
|
|
62
|
+
for c in chunks:
|
|
63
|
+
assert c["filepath"] == path
|
|
64
|
+
assert c["total_chunks"] == len(chunks)
|
|
65
|
+
|
|
66
|
+
def test_chunks_cover_full_file(self):
|
|
67
|
+
path = _make_file(3000)
|
|
68
|
+
chunks = chunk_file(path, chunk_size=800, overlap=20)
|
|
69
|
+
# First chunk starts at line 1
|
|
70
|
+
assert chunks[0]["start_line"] == 1
|
|
71
|
+
# Last chunk ends at the file's last line
|
|
72
|
+
assert chunks[-1]["end_line"] == 3000
|
|
73
|
+
|
|
74
|
+
def test_chunk_overlap(self):
|
|
75
|
+
path = _make_file(2000)
|
|
76
|
+
chunks = chunk_file(path, chunk_size=800, overlap=20)
|
|
77
|
+
for i in range(1, len(chunks)):
|
|
78
|
+
prev_end = chunks[i - 1]["end_line"]
|
|
79
|
+
cur_start = chunks[i]["start_line"]
|
|
80
|
+
# The start of the next chunk should overlap with the end of the previous
|
|
81
|
+
assert cur_start <= prev_end, (
|
|
82
|
+
f"Chunk {i} starts at {cur_start} but previous ends at {prev_end}"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def test_boundary_snapping(self):
|
|
86
|
+
"""Chunks should prefer cutting near function definitions."""
|
|
87
|
+
path = _make_file(2000, with_boundaries=True)
|
|
88
|
+
chunks = chunk_file(path, chunk_size=800, overlap=20)
|
|
89
|
+
# With boundaries every 200 lines, cuts should snap near those points
|
|
90
|
+
assert len(chunks) >= 2
|
|
91
|
+
|
|
92
|
+
def test_chunk_indices_sequential(self):
|
|
93
|
+
path = _make_file(3000)
|
|
94
|
+
chunks = chunk_file(path, chunk_size=800, overlap=20)
|
|
95
|
+
for i, c in enumerate(chunks):
|
|
96
|
+
assert c["chunk_index"] == i
|
|
97
|
+
|
|
98
|
+
def test_empty_file(self):
|
|
99
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
100
|
+
mode="w", suffix=".py", delete=False, prefix="test_empty_"
|
|
101
|
+
)
|
|
102
|
+
tmp.close()
|
|
103
|
+
chunks = chunk_file(tmp.name)
|
|
104
|
+
assert chunks == []
|
|
105
|
+
|
|
106
|
+
def test_nonexistent_file(self):
|
|
107
|
+
chunks = chunk_file("/nonexistent/file.py")
|
|
108
|
+
assert chunks == []
|
|
109
|
+
|
|
110
|
+
def test_content_preserved(self):
|
|
111
|
+
path = _make_file(100)
|
|
112
|
+
chunks = chunk_file(path, chunk_size=800)
|
|
113
|
+
original = Path(path).read_text()
|
|
114
|
+
assert chunks[0]["content"] == original
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# ---------------------------------------------------------------------------
|
|
118
|
+
# build_chunk_prompt
|
|
119
|
+
# ---------------------------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
class TestBuildChunkPrompt:
|
|
122
|
+
def test_includes_chunk_metadata(self):
|
|
123
|
+
chunk_info = {
|
|
124
|
+
"chunk_index": 2,
|
|
125
|
+
"total_chunks": 5,
|
|
126
|
+
"start_line": 1601,
|
|
127
|
+
"end_line": 2400,
|
|
128
|
+
"content": "...",
|
|
129
|
+
"filepath": "/tmp/test.py",
|
|
130
|
+
}
|
|
131
|
+
file_info = {"name": "test.py", "language": "Python", "lines": 5000}
|
|
132
|
+
prompt = build_chunk_prompt("Review this code", chunk_info, file_info)
|
|
133
|
+
assert "chunk 3 of 5" in prompt
|
|
134
|
+
assert "lines 1601" in prompt
|
|
135
|
+
assert "2400" in prompt
|
|
136
|
+
assert "test.py" in prompt
|
|
137
|
+
assert "Python" in prompt
|
|
138
|
+
assert "5000" in prompt
|
|
139
|
+
|
|
140
|
+
def test_includes_user_prompt(self):
|
|
141
|
+
chunk_info = {
|
|
142
|
+
"chunk_index": 0, "total_chunks": 1,
|
|
143
|
+
"start_line": 1, "end_line": 100,
|
|
144
|
+
"content": "...", "filepath": "/tmp/x.py",
|
|
145
|
+
}
|
|
146
|
+
file_info = {"name": "x.py", "language": "Python", "lines": 100}
|
|
147
|
+
prompt = build_chunk_prompt("Find security bugs", chunk_info, file_info)
|
|
148
|
+
assert "Find security bugs" in prompt
|
|
149
|
+
|
|
150
|
+
def test_review_mode_adds_categories(self):
|
|
151
|
+
chunk_info = {
|
|
152
|
+
"chunk_index": 0, "total_chunks": 1,
|
|
153
|
+
"start_line": 1, "end_line": 100,
|
|
154
|
+
"content": "...", "filepath": "/tmp/x.py",
|
|
155
|
+
}
|
|
156
|
+
file_info = {"name": "x.py", "language": "Python", "lines": 100}
|
|
157
|
+
prompt = build_chunk_prompt("Review", chunk_info, file_info, mode="review")
|
|
158
|
+
assert "bug" in prompt.lower()
|
|
159
|
+
assert "security" in prompt.lower()
|
|
160
|
+
|
|
161
|
+
def test_discuss_mode_no_categories(self):
|
|
162
|
+
chunk_info = {
|
|
163
|
+
"chunk_index": 0, "total_chunks": 1,
|
|
164
|
+
"start_line": 1, "end_line": 100,
|
|
165
|
+
"content": "...", "filepath": "/tmp/x.py",
|
|
166
|
+
}
|
|
167
|
+
file_info = {"name": "x.py", "language": "Python", "lines": 100}
|
|
168
|
+
prompt = build_chunk_prompt("Explain this", chunk_info, file_info, mode="discuss")
|
|
169
|
+
assert "Categorize findings" not in prompt
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
# ---------------------------------------------------------------------------
|
|
173
|
+
# build_synthesis_prompt
|
|
174
|
+
# ---------------------------------------------------------------------------
|
|
175
|
+
|
|
176
|
+
class TestBuildSynthesisPrompt:
|
|
177
|
+
def test_includes_all_chunk_responses(self):
|
|
178
|
+
results = [
|
|
179
|
+
{"chunk_index": 0, "file": "/tmp/a.py", "response": "Found bug on line 10", "error": None},
|
|
180
|
+
{"chunk_index": 1, "file": "/tmp/a.py", "response": "Performance issue at line 900", "error": None},
|
|
181
|
+
]
|
|
182
|
+
file_infos = [{"name": "a.py", "lines": 2000}]
|
|
183
|
+
prompt = build_synthesis_prompt("Review code", results, file_infos)
|
|
184
|
+
assert "Found bug on line 10" in prompt
|
|
185
|
+
assert "Performance issue at line 900" in prompt
|
|
186
|
+
assert "Chunk 1" in prompt
|
|
187
|
+
assert "Chunk 2" in prompt
|
|
188
|
+
|
|
189
|
+
def test_marks_failed_chunks(self):
|
|
190
|
+
results = [
|
|
191
|
+
{"chunk_index": 0, "file": "/tmp/a.py", "response": "OK", "error": None},
|
|
192
|
+
{"chunk_index": 1, "file": "/tmp/a.py", "response": "", "error": "timeout"},
|
|
193
|
+
]
|
|
194
|
+
file_infos = [{"name": "a.py", "lines": 2000}]
|
|
195
|
+
prompt = build_synthesis_prompt("Review", results, file_infos)
|
|
196
|
+
assert "analysis failed" in prompt
|
|
197
|
+
assert "timeout" in prompt
|
|
198
|
+
|
|
199
|
+
def test_includes_original_request(self):
|
|
200
|
+
results = [{"chunk_index": 0, "file": "/tmp/a.py", "response": "OK", "error": None}]
|
|
201
|
+
file_infos = [{"name": "a.py", "lines": 100}]
|
|
202
|
+
prompt = build_synthesis_prompt("Find SQL injections", results, file_infos)
|
|
203
|
+
assert "Find SQL injections" in prompt
|
|
204
|
+
|
|
205
|
+
def test_review_mode_adds_grouping(self):
|
|
206
|
+
results = [{"chunk_index": 0, "file": "/tmp/a.py", "response": "OK", "error": None}]
|
|
207
|
+
file_infos = [{"name": "a.py", "lines": 100}]
|
|
208
|
+
prompt = build_synthesis_prompt("Review", results, file_infos, mode="review")
|
|
209
|
+
assert "Group findings by category" in prompt
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
# ---------------------------------------------------------------------------
|
|
213
|
+
# _parse_opencode_response
|
|
214
|
+
# ---------------------------------------------------------------------------
|
|
215
|
+
|
|
216
|
+
class TestParseOpenCodeResponse:
|
|
217
|
+
def test_extracts_text_and_session_id(self):
|
|
218
|
+
lines = [
|
|
219
|
+
json.dumps({"sessionID": "sess-123"}),
|
|
220
|
+
json.dumps({"type": "text", "part": {"text": "Hello "}}),
|
|
221
|
+
json.dumps({"type": "text", "part": {"text": "World"}}),
|
|
222
|
+
]
|
|
223
|
+
output = "\n".join(lines)
|
|
224
|
+
text, sid = OpenCodeBridge._parse_opencode_response(output)
|
|
225
|
+
assert text == "Hello World"
|
|
226
|
+
assert sid == "sess-123"
|
|
227
|
+
|
|
228
|
+
def test_no_session_id(self):
|
|
229
|
+
lines = [
|
|
230
|
+
json.dumps({"type": "text", "part": {"text": "Just text"}}),
|
|
231
|
+
]
|
|
232
|
+
output = "\n".join(lines)
|
|
233
|
+
text, sid = OpenCodeBridge._parse_opencode_response(output)
|
|
234
|
+
assert text == "Just text"
|
|
235
|
+
assert sid is None
|
|
236
|
+
|
|
237
|
+
def test_skips_invalid_json(self):
|
|
238
|
+
output = "not json\n" + json.dumps({"type": "text", "part": {"text": "OK"}})
|
|
239
|
+
text, sid = OpenCodeBridge._parse_opencode_response(output)
|
|
240
|
+
assert text == "OK"
|
|
241
|
+
|
|
242
|
+
def test_empty_output(self):
|
|
243
|
+
text, sid = OpenCodeBridge._parse_opencode_response("")
|
|
244
|
+
assert text == ""
|
|
245
|
+
assert sid is None
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
# ---------------------------------------------------------------------------
|
|
249
|
+
# Integration: chunking gate in send_message / review_code
|
|
250
|
+
# ---------------------------------------------------------------------------
|
|
251
|
+
|
|
252
|
+
def _mock_opencode_response(text: str, session_id: str = "mock-sess") -> str:
|
|
253
|
+
"""Build a mock JSON-lines response from opencode."""
|
|
254
|
+
lines = [
|
|
255
|
+
json.dumps({"sessionID": session_id}),
|
|
256
|
+
json.dumps({"type": "text", "part": {"text": text}}),
|
|
257
|
+
]
|
|
258
|
+
return "\n".join(lines)
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
@pytest.fixture
|
|
262
|
+
def bridge():
|
|
263
|
+
b = OpenCodeBridge()
|
|
264
|
+
return b
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class TestChunkingGateIntegration:
|
|
268
|
+
@pytest.mark.anyio
|
|
269
|
+
async def test_small_file_bypasses_chunking(self, bridge, tmp_path):
|
|
270
|
+
"""Files under CHUNK_THRESHOLD should NOT trigger chunking."""
|
|
271
|
+
small = tmp_path / "small.py"
|
|
272
|
+
small.write_text("\n".join(f"# line {i}" for i in range(500)))
|
|
273
|
+
|
|
274
|
+
await bridge.start_session("test-small")
|
|
275
|
+
|
|
276
|
+
with patch.object(bridge, "_run_opencode", new_callable=AsyncMock) as mock_run:
|
|
277
|
+
mock_run.return_value = (_mock_opencode_response("review done"), 0)
|
|
278
|
+
with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
|
|
279
|
+
result = await bridge.send_message(
|
|
280
|
+
"Review this", files=[str(small)]
|
|
281
|
+
)
|
|
282
|
+
mock_chunked.assert_not_called()
|
|
283
|
+
assert "review done" in result
|
|
284
|
+
|
|
285
|
+
@pytest.mark.anyio
|
|
286
|
+
async def test_large_file_triggers_chunking(self, bridge, tmp_path):
|
|
287
|
+
"""Files over CHUNK_THRESHOLD should trigger _run_chunked."""
|
|
288
|
+
large = tmp_path / "large.py"
|
|
289
|
+
large.write_text("\n".join(f"# line {i}" for i in range(CHUNK_THRESHOLD + 100)))
|
|
290
|
+
|
|
291
|
+
await bridge.start_session("test-large")
|
|
292
|
+
|
|
293
|
+
with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
|
|
294
|
+
mock_chunked.return_value = "chunked analysis done"
|
|
295
|
+
result = await bridge.send_message(
|
|
296
|
+
"Review this", files=[str(large)]
|
|
297
|
+
)
|
|
298
|
+
mock_chunked.assert_called_once()
|
|
299
|
+
assert "chunked analysis done" in result
|
|
300
|
+
|
|
301
|
+
@pytest.mark.anyio
|
|
302
|
+
async def test_review_code_large_file_triggers_chunking(self, bridge, tmp_path):
|
|
303
|
+
"""review_code should also route large files through chunking."""
|
|
304
|
+
large = tmp_path / "big.py"
|
|
305
|
+
large.write_text("\n".join(f"# line {i}" for i in range(CHUNK_THRESHOLD + 100)))
|
|
306
|
+
|
|
307
|
+
await bridge.start_session("test-review-large")
|
|
308
|
+
|
|
309
|
+
with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
|
|
310
|
+
mock_chunked.return_value = "chunked review done"
|
|
311
|
+
result = await bridge.review_code(str(large))
|
|
312
|
+
mock_chunked.assert_called_once()
|
|
313
|
+
assert "chunked review done" in result
|
|
314
|
+
|
|
315
|
+
@pytest.mark.anyio
|
|
316
|
+
async def test_review_code_small_file_no_chunking(self, bridge, tmp_path):
|
|
317
|
+
"""Small files in review_code should not trigger chunking."""
|
|
318
|
+
small = tmp_path / "tiny.py"
|
|
319
|
+
small.write_text("\n".join(f"# line {i}" for i in range(200)))
|
|
320
|
+
|
|
321
|
+
await bridge.start_session("test-review-small")
|
|
322
|
+
|
|
323
|
+
with patch.object(bridge, "_run_opencode", new_callable=AsyncMock) as mock_run:
|
|
324
|
+
mock_run.return_value = (_mock_opencode_response("looks good"), 0)
|
|
325
|
+
with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
|
|
326
|
+
result = await bridge.review_code(str(small))
|
|
327
|
+
mock_chunked.assert_not_called()
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
# ---------------------------------------------------------------------------
|
|
331
|
+
# Integration: _run_chunked map-reduce
|
|
332
|
+
# ---------------------------------------------------------------------------
|
|
333
|
+
|
|
334
|
+
class TestRunChunked:
|
|
335
|
+
@pytest.mark.anyio
|
|
336
|
+
async def test_successful_chunked_processing(self, bridge, tmp_path):
|
|
337
|
+
"""Full map-reduce pipeline with mocked _run_opencode."""
|
|
338
|
+
large = tmp_path / "big.py"
|
|
339
|
+
large.write_text("\n".join(f"# line {i}" for i in range(2500)))
|
|
340
|
+
|
|
341
|
+
await bridge.start_session("test-chunked")
|
|
342
|
+
session = bridge.sessions["test-chunked"]
|
|
343
|
+
|
|
344
|
+
call_count = 0
|
|
345
|
+
|
|
346
|
+
async def mock_run(*args, timeout=300):
|
|
347
|
+
nonlocal call_count
|
|
348
|
+
call_count += 1
|
|
349
|
+
# Chunk calls return chunk analysis; synthesis call returns final
|
|
350
|
+
return (_mock_opencode_response(f"analysis-{call_count}"), 0)
|
|
351
|
+
|
|
352
|
+
with patch.object(bridge, "_run_opencode", side_effect=mock_run):
|
|
353
|
+
result = await bridge._run_chunked(
|
|
354
|
+
"Review this code", [str(large)], session, mode="review"
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
# Should have made multiple calls (chunks + synthesis)
|
|
358
|
+
assert call_count > 2
|
|
359
|
+
assert result # non-empty
|
|
360
|
+
|
|
361
|
+
@pytest.mark.anyio
|
|
362
|
+
async def test_majority_failure_returns_error(self, bridge, tmp_path):
|
|
363
|
+
"""If >50% of chunks fail, return an error message."""
|
|
364
|
+
large = tmp_path / "big.py"
|
|
365
|
+
large.write_text("\n".join(f"# line {i}" for i in range(2500)))
|
|
366
|
+
|
|
367
|
+
await bridge.start_session("test-fail")
|
|
368
|
+
session = bridge.sessions["test-fail"]
|
|
369
|
+
|
|
370
|
+
# All calls fail
|
|
371
|
+
async def mock_fail(*args, timeout=300):
|
|
372
|
+
return ("error: context_length_exceeded", 1)
|
|
373
|
+
|
|
374
|
+
with patch.object(bridge, "_run_opencode", side_effect=mock_fail):
|
|
375
|
+
result = await bridge._run_chunked(
|
|
376
|
+
"Review", [str(large)], session
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
assert "failed" in result.lower()
|
|
380
|
+
|
|
381
|
+
@pytest.mark.anyio
|
|
382
|
+
async def test_synthesis_failure_falls_back(self, bridge, tmp_path):
|
|
383
|
+
"""If synthesis fails, fall back to concatenated chunk results."""
|
|
384
|
+
large = tmp_path / "big.py"
|
|
385
|
+
large.write_text("\n".join(f"# line {i}" for i in range(2500)))
|
|
386
|
+
|
|
387
|
+
await bridge.start_session("test-fallback")
|
|
388
|
+
session = bridge.sessions["test-fallback"]
|
|
389
|
+
|
|
390
|
+
chunks = chunk_file(str(large), CHUNK_SIZE, CHUNK_OVERLAP)
|
|
391
|
+
num_chunks = len(chunks)
|
|
392
|
+
|
|
393
|
+
# Track calls to distinguish chunk calls from synthesis call.
|
|
394
|
+
# Chunk calls have a --file arg pointing to a temp chunk file;
|
|
395
|
+
# the synthesis call does NOT attach a chunk temp file.
|
|
396
|
+
async def mock_run(*args, timeout=300):
|
|
397
|
+
# The synthesis prompt includes "Synthesize" — detect it
|
|
398
|
+
prompt_arg = args[1] if len(args) > 1 else ""
|
|
399
|
+
if "Synthesize" in prompt_arg or "Chunk Analyses" in prompt_arg:
|
|
400
|
+
return ("synthesis error", 1)
|
|
401
|
+
return (_mock_opencode_response("chunk-ok"), 0)
|
|
402
|
+
|
|
403
|
+
with patch.object(bridge, "_run_opencode", side_effect=mock_run):
|
|
404
|
+
result = await bridge._run_chunked(
|
|
405
|
+
"Review", [str(large)], session
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
# Should contain raw chunk results as fallback
|
|
409
|
+
assert "Synthesis failed" in result
|
|
410
|
+
assert "chunk-ok" in result
|
|
411
|
+
|
|
412
|
+
@pytest.mark.anyio
|
|
413
|
+
async def test_partial_chunk_failure_still_synthesizes(self, bridge, tmp_path):
|
|
414
|
+
"""If some chunks fail but not majority, synthesis should still run."""
|
|
415
|
+
large = tmp_path / "big.py"
|
|
416
|
+
large.write_text("\n".join(f"# line {i}" for i in range(2500)))
|
|
417
|
+
|
|
418
|
+
await bridge.start_session("test-partial")
|
|
419
|
+
session = bridge.sessions["test-partial"]
|
|
420
|
+
|
|
421
|
+
first_call_done = False
|
|
422
|
+
|
|
423
|
+
async def mock_run(*args, timeout=300):
|
|
424
|
+
nonlocal first_call_done
|
|
425
|
+
prompt_arg = args[1] if len(args) > 1 else ""
|
|
426
|
+
# Synthesis call — always succeed
|
|
427
|
+
if "Synthesize" in prompt_arg or "Chunk Analyses" in prompt_arg:
|
|
428
|
+
return (_mock_opencode_response("synthesized"), 0)
|
|
429
|
+
# Fail just the first chunk call
|
|
430
|
+
if not first_call_done:
|
|
431
|
+
first_call_done = True
|
|
432
|
+
return ("error", 1)
|
|
433
|
+
return (_mock_opencode_response("chunk-ok"), 0)
|
|
434
|
+
|
|
435
|
+
with patch.object(bridge, "_run_opencode", side_effect=mock_run):
|
|
436
|
+
result = await bridge._run_chunked(
|
|
437
|
+
"Review", [str(large)], session
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
# Should have gotten a synthesis result (not a total failure)
|
|
441
|
+
assert "synthesized" in result or "chunk-ok" in result
|
|
442
|
+
|
|
443
|
+
|
|
444
|
+
# ---------------------------------------------------------------------------
|
|
445
|
+
# Edge cases
|
|
446
|
+
# ---------------------------------------------------------------------------
|
|
447
|
+
|
|
448
|
+
class TestChunkEdgeCases:
|
|
449
|
+
def test_exactly_at_threshold(self):
|
|
450
|
+
"""A file with exactly CHUNK_THRESHOLD lines should produce one chunk."""
|
|
451
|
+
path = _make_file(CHUNK_THRESHOLD)
|
|
452
|
+
chunks = chunk_file(path, chunk_size=CHUNK_SIZE)
|
|
453
|
+
# CHUNK_THRESHOLD (2000) > CHUNK_SIZE (800) → should produce multiple chunks
|
|
454
|
+
assert len(chunks) > 1
|
|
455
|
+
|
|
456
|
+
def test_one_line_over_threshold(self):
|
|
457
|
+
"""CHUNK_THRESHOLD+1 lines should chunk properly."""
|
|
458
|
+
path = _make_file(CHUNK_THRESHOLD + 1)
|
|
459
|
+
chunks = chunk_file(path, chunk_size=CHUNK_SIZE)
|
|
460
|
+
assert len(chunks) > 1
|
|
461
|
+
assert chunks[-1]["end_line"] == CHUNK_THRESHOLD + 1
|
|
462
|
+
|
|
463
|
+
def test_single_line_file(self):
|
|
464
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
465
|
+
mode="w", suffix=".py", delete=False, prefix="test_one_"
|
|
466
|
+
)
|
|
467
|
+
tmp.write("x = 1\n")
|
|
468
|
+
tmp.close()
|
|
469
|
+
chunks = chunk_file(tmp.name)
|
|
470
|
+
assert len(chunks) == 1
|
|
471
|
+
assert chunks[0]["start_line"] == 1
|
|
472
|
+
assert chunks[0]["end_line"] == 1
|
|
473
|
+
|
|
474
|
+
def test_very_large_file_chunk_count(self):
|
|
475
|
+
"""A huge file should produce a reasonable number of chunks."""
|
|
476
|
+
path = _make_file(10000)
|
|
477
|
+
chunks = chunk_file(path, chunk_size=800, overlap=20)
|
|
478
|
+
# ~14 chunks (boundary snapping may reduce effective chunk size)
|
|
479
|
+
assert 10 <= len(chunks) <= 25
|
|
480
|
+
|
|
481
|
+
def test_binary_file_does_not_crash(self):
|
|
482
|
+
"""Binary file with replacement errors should not crash."""
|
|
483
|
+
tmp = tempfile.NamedTemporaryFile(
|
|
484
|
+
mode="wb", suffix=".bin", delete=False, prefix="test_bin_"
|
|
485
|
+
)
|
|
486
|
+
tmp.write(b"\x00\xff" * 5000 + b"\n" * 100)
|
|
487
|
+
tmp.close()
|
|
488
|
+
chunks = chunk_file(tmp.name)
|
|
489
|
+
# Should produce something without crashing
|
|
490
|
+
assert isinstance(chunks, list)
|
|
491
|
+
|
|
492
|
+
def test_file_info_cache_not_stale(self, tmp_path):
|
|
493
|
+
"""get_file_info cache shouldn't interfere with chunk decisions."""
|
|
494
|
+
f = tmp_path / "grow.py"
|
|
495
|
+
f.write_text("\n".join(f"# {i}" for i in range(100)))
|
|
496
|
+
info1 = get_file_info(str(f))
|
|
497
|
+
assert info1["lines"] == 100
|
|
498
|
+
|
|
499
|
+
# Clear cache so re-read picks up new size
|
|
500
|
+
_file_info_cache.pop(str(f.resolve()), None)
|
|
501
|
+
|
|
502
|
+
f.write_text("\n".join(f"# {i}" for i in range(3000)))
|
|
503
|
+
info2 = get_file_info(str(f))
|
|
504
|
+
assert info2["lines"] == 3000
|
|
505
|
+
|
|
506
|
+
def test_chunk_content_has_correct_lines(self):
|
|
507
|
+
"""Verify each chunk's content actually matches its line range."""
|
|
508
|
+
path = _make_file(2000)
|
|
509
|
+
all_lines = Path(path).read_text().splitlines(keepends=True)
|
|
510
|
+
chunks = chunk_file(path, chunk_size=800, overlap=20)
|
|
511
|
+
for c in chunks:
|
|
512
|
+
start = c["start_line"] - 1 # 0-indexed
|
|
513
|
+
end = c["end_line"]
|
|
514
|
+
expected = "".join(all_lines[start:end])
|
|
515
|
+
assert c["content"] == expected, (
|
|
516
|
+
f"Chunk {c['chunk_index']} content mismatch: "
|
|
517
|
+
f"lines {c['start_line']}-{c['end_line']}"
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
def test_no_content_loss_across_chunks(self):
|
|
521
|
+
"""Union of all chunk line ranges should cover every line in the file."""
|
|
522
|
+
path = _make_file(3000)
|
|
523
|
+
chunks = chunk_file(path, chunk_size=800, overlap=20)
|
|
524
|
+
covered = set()
|
|
525
|
+
for c in chunks:
|
|
526
|
+
for line in range(c["start_line"], c["end_line"] + 1):
|
|
527
|
+
covered.add(line)
|
|
528
|
+
assert covered == set(range(1, 3001))
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
class TestChunkGateBoundary:
|
|
532
|
+
"""Test the exact boundary where chunking kicks in."""
|
|
533
|
+
|
|
534
|
+
@pytest.mark.anyio
|
|
535
|
+
async def test_file_at_threshold_no_chunking_in_send(self, bridge, tmp_path):
|
|
536
|
+
"""File with exactly CHUNK_THRESHOLD lines should NOT trigger chunking
|
|
537
|
+
in send_message (gate is strictly >)."""
|
|
538
|
+
f = tmp_path / "exact.py"
|
|
539
|
+
f.write_text("\n".join(f"# {i}" for i in range(CHUNK_THRESHOLD)))
|
|
540
|
+
|
|
541
|
+
await bridge.start_session("test-exact")
|
|
542
|
+
|
|
543
|
+
with patch.object(bridge, "_run_opencode", new_callable=AsyncMock) as mock_run:
|
|
544
|
+
mock_run.return_value = (_mock_opencode_response("normal"), 0)
|
|
545
|
+
with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
|
|
546
|
+
result = await bridge.send_message("Review", files=[str(f)])
|
|
547
|
+
mock_chunked.assert_not_called()
|
|
548
|
+
assert "normal" in result
|
|
549
|
+
|
|
550
|
+
@pytest.mark.anyio
|
|
551
|
+
async def test_file_one_over_threshold_triggers_chunking(self, bridge, tmp_path):
|
|
552
|
+
"""File with CHUNK_THRESHOLD+1 lines SHOULD trigger chunking."""
|
|
553
|
+
f = tmp_path / "over.py"
|
|
554
|
+
f.write_text("\n".join(f"# {i}" for i in range(CHUNK_THRESHOLD + 1)))
|
|
555
|
+
|
|
556
|
+
await bridge.start_session("test-over")
|
|
557
|
+
|
|
558
|
+
with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
|
|
559
|
+
mock_chunked.return_value = "chunked"
|
|
560
|
+
result = await bridge.send_message("Review", files=[str(f)])
|
|
561
|
+
mock_chunked.assert_called_once()
|
|
562
|
+
|
|
563
|
+
@pytest.mark.anyio
|
|
564
|
+
async def test_multiple_small_files_no_chunking(self, bridge, tmp_path):
|
|
565
|
+
"""Multiple small files (each under threshold) should NOT trigger chunking,
|
|
566
|
+
even if total lines exceed threshold."""
|
|
567
|
+
f1 = tmp_path / "a.py"
|
|
568
|
+
f2 = tmp_path / "b.py"
|
|
569
|
+
f1.write_text("\n".join(f"# {i}" for i in range(1500)))
|
|
570
|
+
f2.write_text("\n".join(f"# {i}" for i in range(1500)))
|
|
571
|
+
|
|
572
|
+
await bridge.start_session("test-multi-small")
|
|
573
|
+
|
|
574
|
+
with patch.object(bridge, "_run_opencode", new_callable=AsyncMock) as mock_run:
|
|
575
|
+
mock_run.return_value = (_mock_opencode_response("ok"), 0)
|
|
576
|
+
with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
|
|
577
|
+
result = await bridge.send_message(
|
|
578
|
+
"Review", files=[str(f1), str(f2)]
|
|
579
|
+
)
|
|
580
|
+
mock_chunked.assert_not_called()
|
|
581
|
+
|
|
582
|
+
@pytest.mark.anyio
|
|
583
|
+
async def test_mix_small_and_large_triggers_chunking(self, bridge, tmp_path):
|
|
584
|
+
"""If ANY file exceeds threshold, chunking should trigger."""
|
|
585
|
+
small = tmp_path / "small.py"
|
|
586
|
+
large = tmp_path / "large.py"
|
|
587
|
+
small.write_text("\n".join(f"# {i}" for i in range(100)))
|
|
588
|
+
large.write_text("\n".join(f"# {i}" for i in range(CHUNK_THRESHOLD + 1)))
|
|
589
|
+
|
|
590
|
+
await bridge.start_session("test-mix")
|
|
591
|
+
|
|
592
|
+
with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
|
|
593
|
+
mock_chunked.return_value = "chunked"
|
|
594
|
+
result = await bridge.send_message(
|
|
595
|
+
"Review", files=[str(small), str(large)]
|
|
596
|
+
)
|
|
597
|
+
mock_chunked.assert_called_once()
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
class TestRunChunkedSmallFilesPassthrough:
|
|
601
|
+
"""Verify small files are passed to synthesis call as context."""
|
|
602
|
+
|
|
603
|
+
@pytest.mark.anyio
|
|
604
|
+
async def test_small_files_attached_to_synthesis(self, bridge, tmp_path):
|
|
605
|
+
small = tmp_path / "helper.py"
|
|
606
|
+
large = tmp_path / "main.py"
|
|
607
|
+
small.write_text("\n".join(f"# {i}" for i in range(100)))
|
|
608
|
+
large.write_text("\n".join(f"# {i}" for i in range(2500)))
|
|
609
|
+
|
|
610
|
+
await bridge.start_session("test-passthrough")
|
|
611
|
+
session = bridge.sessions["test-passthrough"]
|
|
612
|
+
|
|
613
|
+
synthesis_args = []
|
|
614
|
+
|
|
615
|
+
async def mock_run(*args, timeout=300):
|
|
616
|
+
prompt_arg = args[1] if len(args) > 1 else ""
|
|
617
|
+
if "Synthesize" in prompt_arg or "Chunk Analyses" in prompt_arg:
|
|
618
|
+
synthesis_args.extend(args)
|
|
619
|
+
return (_mock_opencode_response("final"), 0)
|
|
620
|
+
return (_mock_opencode_response("chunk-ok"), 0)
|
|
621
|
+
|
|
622
|
+
with patch.object(bridge, "_run_opencode", side_effect=mock_run):
|
|
623
|
+
result = await bridge._run_chunked(
|
|
624
|
+
"Review", [str(small), str(large)], session
|
|
625
|
+
)
|
|
626
|
+
|
|
627
|
+
# The synthesis call should have --file pointing to the small file
|
|
628
|
+
assert str(small) in synthesis_args, (
|
|
629
|
+
f"Small file not passed to synthesis. Args: {synthesis_args}"
|
|
630
|
+
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|