opencode-bridge 0.3.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opencode-bridge
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: MCP server for continuous OpenCode discussion sessions
5
5
  Project-URL: Repository, https://github.com/genomewalker/opencode-bridge
6
6
  Author: Antonio Fernandez-Guerra
@@ -16,6 +16,7 @@ Configuration:
16
16
  """
17
17
 
18
18
  import os
19
+ import re
19
20
  import json
20
21
  import asyncio
21
22
  import shutil
@@ -35,6 +36,13 @@ SMALL_FILE = 500 # lines
35
36
  MEDIUM_FILE = 1500 # lines
36
37
  LARGE_FILE = 5000 # lines
37
38
 
39
+ # Chunked processing thresholds
40
+ CHUNK_THRESHOLD = 2000 # lines — files above this get chunked
41
+ CHUNK_SIZE = 800 # lines per chunk
42
+ CHUNK_OVERLAP = 20 # overlap between adjacent chunks
43
+ MAX_PARALLEL_CHUNKS = 6 # concurrency limit
44
+ MAX_TOTAL_CHUNKS = 20 # safety cap
45
+
38
46
  # Language detection by extension
39
47
  LANG_MAP = {
40
48
  ".py": "Python", ".js": "JavaScript", ".ts": "TypeScript", ".tsx": "TypeScript/React",
@@ -304,6 +312,174 @@ def build_companion_prompt(
304
312
  return "\n".join(parts)
305
313
 
306
314
 
315
+ # ---------------------------------------------------------------------------
316
+ # Chunked Processing — map-reduce for large files
317
+ # ---------------------------------------------------------------------------
318
+
319
+ # Regex for natural code boundaries (language-agnostic)
320
+ _BOUNDARY_RE = re.compile(
321
+ r"^(?:\s*$" # blank line
322
+ r"|(?:def |class |function |func |fn |pub fn |impl |module |package )" # definitions
323
+ r"|(?:})\s*$" # closing brace on its own line
324
+ r"|(?://|#|/\*|\*/).{0,80}$" # comment lines
325
+ r")",
326
+ re.MULTILINE,
327
+ )
328
+
329
+
330
+ def chunk_file(
331
+ filepath: str,
332
+ chunk_size: int = CHUNK_SIZE,
333
+ overlap: int = CHUNK_OVERLAP,
334
+ ) -> list[dict]:
335
+ """Split a file into overlapping chunks with boundary snapping.
336
+
337
+ Returns a list of dicts with keys:
338
+ chunk_index, total_chunks, start_line, end_line, content, filepath
339
+ """
340
+ p = Path(filepath)
341
+ try:
342
+ lines = p.read_text(errors="replace").splitlines(keepends=True)
343
+ except Exception:
344
+ return []
345
+
346
+ total = len(lines)
347
+ if total == 0:
348
+ return []
349
+ if total <= chunk_size:
350
+ return [{
351
+ "chunk_index": 0,
352
+ "total_chunks": 1,
353
+ "start_line": 1,
354
+ "end_line": total,
355
+ "content": "".join(lines),
356
+ "filepath": str(p),
357
+ }]
358
+
359
+ chunks: list[dict] = []
360
+ pos = 0
361
+ while pos < total:
362
+ end = min(pos + chunk_size, total)
363
+
364
+ # Snap to a natural boundary within ±50 lines of the cut point
365
+ if end < total:
366
+ best = end
367
+ scan_start = max(end - 50, pos + chunk_size // 2)
368
+ scan_end = min(end + 50, total)
369
+ for i in range(scan_start, scan_end):
370
+ if _BOUNDARY_RE.match(lines[i]):
371
+ best = i + 1 # include the boundary line in this chunk
372
+ break
373
+ end = best
374
+
375
+ chunk_content = "".join(lines[pos:end])
376
+ chunks.append({
377
+ "chunk_index": len(chunks),
378
+ "total_chunks": -1, # filled in below
379
+ "start_line": pos + 1, # 1-indexed
380
+ "end_line": end,
381
+ "content": chunk_content,
382
+ "filepath": str(p),
383
+ })
384
+
385
+ # Advance: overlap with previous chunk, but stop if we've reached the end
386
+ if end >= total:
387
+ break
388
+ pos = max(end - overlap, pos + 1)
389
+
390
+ # Fill in total_chunks
391
+ for c in chunks:
392
+ c["total_chunks"] = len(chunks)
393
+
394
+ return chunks
395
+
396
+
397
+ def build_chunk_prompt(
398
+ user_prompt: str,
399
+ chunk_info: dict,
400
+ file_info: dict,
401
+ mode: str = "discuss",
402
+ ) -> str:
403
+ """Build a focused prompt for analyzing a single file chunk."""
404
+ name = file_info.get("name", Path(chunk_info["filepath"]).name)
405
+ language = file_info.get("language", "Unknown")
406
+ total_lines = file_info.get("lines", "?")
407
+ idx = chunk_info["chunk_index"] + 1
408
+ total = chunk_info["total_chunks"]
409
+ start = chunk_info["start_line"]
410
+ end = chunk_info["end_line"]
411
+
412
+ parts = [
413
+ f"You are analyzing **chunk {idx} of {total}** from `{name}` "
414
+ f"({language}, {total_lines} total lines).",
415
+ f"This chunk covers **lines {start}–{end}**.",
416
+ "",
417
+ "## Task",
418
+ user_prompt,
419
+ "",
420
+ "## Instructions",
421
+ "- Focus ONLY on the code in this chunk",
422
+ "- Note any references to code that might exist outside this chunk",
423
+ "- Be concise — your output will be combined with analyses of other chunks",
424
+ "- Include line numbers for any issues found",
425
+ ]
426
+
427
+ if mode == "review":
428
+ parts.append("- Categorize findings as: bug, security, design, performance, or style")
429
+
430
+ return "\n".join(parts)
431
+
432
+
433
+ def build_synthesis_prompt(
434
+ user_prompt: str,
435
+ chunk_results: list[dict],
436
+ file_infos: list[dict],
437
+ mode: str = "discuss",
438
+ ) -> str:
439
+ """Build a prompt that merges chunk analyses into one coherent response."""
440
+ file_desc = ", ".join(
441
+ f"`{i.get('name', '?')}` ({i.get('lines', '?')} lines)"
442
+ for i in file_infos
443
+ )
444
+ n = len(chunk_results)
445
+
446
+ parts = [
447
+ f"You analyzed a large file in **{n} chunks**. "
448
+ "Synthesize the chunk analyses below into one coherent response.",
449
+ "",
450
+ "## Original Request",
451
+ user_prompt,
452
+ "",
453
+ "## Files Analyzed",
454
+ file_desc,
455
+ "",
456
+ "## Chunk Analyses",
457
+ ]
458
+
459
+ for cr in sorted(chunk_results, key=lambda c: c.get("chunk_index", 0)):
460
+ idx = cr.get("chunk_index", 0) + 1
461
+ fp = Path(cr.get("file", "")).name
462
+ response = cr.get("response", "[analysis failed]")
463
+ if cr.get("error"):
464
+ response = f"[analysis failed: {cr['error']}]"
465
+ parts.append(f"\n### Chunk {idx} — `{fp}`")
466
+ parts.append(response)
467
+
468
+ parts.extend([
469
+ "",
470
+ "## Instructions",
471
+ "- Combine findings and remove duplicates (chunks overlap slightly)",
472
+ "- Organize by importance, not by chunk order",
473
+ "- Preserve line number references from the original analyses",
474
+ "- Provide an overall assessment at the top",
475
+ ])
476
+
477
+ if mode == "review":
478
+ parts.append("- Group findings by category: bugs, security, design, performance, style")
479
+
480
+ return "\n".join(parts)
481
+
482
+
307
483
  # Default configuration
308
484
  DEFAULT_MODEL = "openai/gpt-5.2-codex"
309
485
  DEFAULT_AGENT = "plan"
@@ -470,6 +646,182 @@ class OpenCodeBridge:
470
646
  except Exception as e:
471
647
  return f"Error: {e}", 1
472
648
 
649
+ @staticmethod
650
+ def _parse_opencode_response(output: str) -> tuple[str, Optional[str]]:
651
+ """Parse JSON-lines output from opencode CLI.
652
+
653
+ Returns (reply_text, session_id).
654
+ """
655
+ reply_parts: list[str] = []
656
+ session_id: Optional[str] = None
657
+ for line in output.split("\n"):
658
+ if not line:
659
+ continue
660
+ try:
661
+ event = json.loads(line)
662
+ if not session_id and "sessionID" in event:
663
+ session_id = event["sessionID"]
664
+ if event.get("type") == "text":
665
+ text = event.get("part", {}).get("text", "")
666
+ if text:
667
+ reply_parts.append(text)
668
+ except json.JSONDecodeError:
669
+ continue
670
+ return "".join(reply_parts), session_id
671
+
672
+ async def _run_chunk(
673
+ self,
674
+ chunk_info: dict,
675
+ file_info: dict,
676
+ user_prompt: str,
677
+ session: "Session",
678
+ mode: str = "discuss",
679
+ ) -> dict:
680
+ """Process a single file chunk through OpenCode (stateless)."""
681
+ result = {
682
+ "chunk_index": chunk_info["chunk_index"],
683
+ "file": chunk_info["filepath"],
684
+ "response": "",
685
+ "error": None,
686
+ }
687
+
688
+ # Write chunk to a temp file preserving the original extension
689
+ ext = Path(chunk_info["filepath"]).suffix or ".txt"
690
+ tmp = None
691
+ try:
692
+ tmp = tempfile.NamedTemporaryFile(
693
+ mode="w", suffix=ext, delete=False, prefix="opencode_chunk_"
694
+ )
695
+ tmp.write(chunk_info["content"])
696
+ tmp.close()
697
+
698
+ prompt = build_chunk_prompt(user_prompt, chunk_info, file_info, mode)
699
+
700
+ args = [
701
+ "run", prompt,
702
+ "--model", session.model,
703
+ "--agent", session.agent,
704
+ "--file", tmp.name,
705
+ "--format", "json",
706
+ ]
707
+ if session.variant:
708
+ args.extend(["--variant", session.variant])
709
+
710
+ output, code = await self._run_opencode(*args, timeout=300)
711
+
712
+ if code != 0:
713
+ result["error"] = output[:500]
714
+ return result
715
+
716
+ reply, _ = self._parse_opencode_response(output)
717
+ result["response"] = reply or "[no response]"
718
+
719
+ except Exception as e:
720
+ result["error"] = str(e)
721
+ finally:
722
+ if tmp:
723
+ try:
724
+ os.unlink(tmp.name)
725
+ except OSError:
726
+ pass
727
+ return result
728
+
729
+ async def _run_chunked(
730
+ self,
731
+ user_prompt: str,
732
+ files: list[str],
733
+ session: "Session",
734
+ mode: str = "discuss",
735
+ ) -> str:
736
+ """Map-reduce orchestrator: chunk large files, process in parallel, synthesize."""
737
+ small_files: list[str] = []
738
+ all_chunks: list[tuple[dict, dict]] = [] # (chunk_info, file_info)
739
+
740
+ for f in files:
741
+ info = get_file_info(f)
742
+ line_count = info.get("lines", 0)
743
+ if line_count > CHUNK_THRESHOLD:
744
+ chunks = chunk_file(f, CHUNK_SIZE, CHUNK_OVERLAP)
745
+ for c in chunks:
746
+ all_chunks.append((c, info))
747
+ else:
748
+ small_files.append(f)
749
+
750
+ # Safety: if too many chunks, increase chunk size and re-chunk
751
+ if len(all_chunks) > MAX_TOTAL_CHUNKS:
752
+ all_chunks = []
753
+ bigger = CHUNK_SIZE * 2
754
+ for f in files:
755
+ info = get_file_info(f)
756
+ if info.get("lines", 0) > CHUNK_THRESHOLD:
757
+ chunks = chunk_file(f, bigger, CHUNK_OVERLAP)
758
+ for c in chunks:
759
+ all_chunks.append((c, info))
760
+ # small_files already collected above
761
+
762
+ if not all_chunks:
763
+ return "No chunks to process."
764
+
765
+ # --- Map phase: run chunks in parallel ---
766
+ sem = asyncio.Semaphore(MAX_PARALLEL_CHUNKS)
767
+
768
+ async def _limited(chunk_info: dict, file_info: dict) -> dict:
769
+ async with sem:
770
+ return await self._run_chunk(chunk_info, file_info, user_prompt, session, mode)
771
+
772
+ tasks = [_limited(ci, fi) for ci, fi in all_chunks]
773
+ chunk_results: list[dict] = await asyncio.gather(*tasks)
774
+
775
+ # Check failure rate
776
+ failed = sum(1 for cr in chunk_results if cr.get("error"))
777
+ if failed > len(chunk_results) / 2:
778
+ return (
779
+ f"Chunked analysis failed: {failed}/{len(chunk_results)} chunks errored. "
780
+ "Try with a smaller file or increase the chunk size."
781
+ )
782
+
783
+ # --- Reduce phase: synthesize ---
784
+ file_infos = []
785
+ seen_paths: set[str] = set()
786
+ for _, fi in all_chunks:
787
+ fp = fi.get("path", "")
788
+ if fp not in seen_paths:
789
+ seen_paths.add(fp)
790
+ file_infos.append(fi)
791
+
792
+ synthesis_prompt = build_synthesis_prompt(user_prompt, chunk_results, file_infos, mode)
793
+
794
+ # Attach small files for reference context (not the large ones)
795
+ args = [
796
+ "run", synthesis_prompt,
797
+ "--model", session.model,
798
+ "--agent", session.agent,
799
+ "--format", "json",
800
+ ]
801
+ if session.variant:
802
+ args.extend(["--variant", session.variant])
803
+ for sf in small_files:
804
+ args.extend(["--file", sf])
805
+
806
+ # Longer timeout for synthesis
807
+ output, code = await self._run_opencode(*args, timeout=600)
808
+
809
+ if code != 0:
810
+ # Fallback: concatenate raw chunk results
811
+ parts = ["*Synthesis failed — showing raw chunk analyses:*\n"]
812
+ for cr in sorted(chunk_results, key=lambda c: c.get("chunk_index", 0)):
813
+ idx = cr.get("chunk_index", 0) + 1
814
+ fp = Path(cr.get("file", "")).name
815
+ parts.append(f"\n### Chunk {idx} — `{fp}`")
816
+ if cr.get("error"):
817
+ parts.append(f"[error: {cr['error']}]")
818
+ else:
819
+ parts.append(cr.get("response", "[no response]"))
820
+ return "\n".join(parts)
821
+
822
+ reply, _ = self._parse_opencode_response(output)
823
+ return reply or "No response from synthesis."
824
+
473
825
  async def list_models(self, provider: Optional[str] = None) -> str:
474
826
  """List available models from OpenCode."""
475
827
  args = ["models"]
@@ -599,6 +951,27 @@ Set via:
599
951
  temp_file.close()
600
952
  files = (files or []) + [temp_file.name]
601
953
 
954
+ # --- Chunking gate: large user files get map-reduce processing ---
955
+ user_files = [f for f in files if not Path(f).name.startswith("opencode_msg_")]
956
+ needs_chunking = any(
957
+ get_file_info(f).get("lines", 0) > CHUNK_THRESHOLD
958
+ for f in user_files
959
+ )
960
+
961
+ if needs_chunking:
962
+ reply = await self._run_chunked(message, user_files, session, mode="discuss")
963
+ # Cleanup temp file
964
+ try:
965
+ os.unlink(temp_file.name)
966
+ except OSError:
967
+ pass
968
+ if reply:
969
+ session.add_message("assistant", reply)
970
+ session.save(self.sessions_dir / f"{sid}.json")
971
+ return reply or "No response received"
972
+
973
+ # --- Normal (non-chunked) path ---
974
+
602
975
  # Build prompt: companion system unless _raw is set
603
976
  if _raw:
604
977
  run_prompt = build_message_prompt(message, files)
@@ -631,7 +1004,6 @@ Set via:
631
1004
  args.extend(["--format", "json"])
632
1005
 
633
1006
  # Scale timeout based on attached file size
634
- user_files = [f for f in files if not Path(f).name.startswith("opencode_msg_")]
635
1007
  total_lines = sum(get_file_info(f).get("lines", 0) for f in user_files)
636
1008
  # Base 300s, +60s per 1000 lines above threshold, capped at 900s
637
1009
  timeout = min(900, 300 + max(0, (total_lines - MEDIUM_FILE) * 60 // 1000))
@@ -649,22 +1021,10 @@ Set via:
649
1021
  return f"Error: {output}"
650
1022
 
651
1023
  # Parse JSON events for session ID and text
652
- reply_parts = []
653
- for line in output.split("\n"):
654
- if not line:
655
- continue
656
- try:
657
- event = json.loads(line)
658
- if not session.opencode_session_id and "sessionID" in event:
659
- session.opencode_session_id = event["sessionID"]
660
- if event.get("type") == "text":
661
- text = event.get("part", {}).get("text", "")
662
- if text:
663
- reply_parts.append(text)
664
- except json.JSONDecodeError:
665
- continue
1024
+ reply, new_session_id = self._parse_opencode_response(output)
1025
+ if new_session_id and not session.opencode_session_id:
1026
+ session.opencode_session_id = new_session_id
666
1027
 
667
- reply = "".join(reply_parts)
668
1028
  if reply:
669
1029
  session.add_message("assistant", reply)
670
1030
 
@@ -737,10 +1097,16 @@ Set via:
737
1097
  files = file_paths
738
1098
  file_infos = [get_file_info(f) for f in file_paths]
739
1099
  file_infos = [i for i in file_infos if i]
1100
+ total_lines = sum(i.get("lines", 0) for i in file_infos)
1101
+
1102
+ # Chunking gate for large reviews
1103
+ if total_lines > CHUNK_THRESHOLD:
1104
+ prompt = build_review_prompt(file_infos, focus)
1105
+ return await self._run_chunked(prompt, file_paths, self.sessions[sid], mode="review")
1106
+
740
1107
  prompt = build_review_prompt(file_infos, focus)
741
1108
 
742
1109
  # Increase timeout for large files
743
- total_lines = sum(i.get("lines", 0) for i in file_infos)
744
1110
  if total_lines > LARGE_FILE:
745
1111
  # Use variant=high for large reviews if not already high+
746
1112
  session = self.sessions[sid]
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "opencode-bridge"
7
- version = "0.3.0"
7
+ version = "0.4.1"
8
8
  description = "MCP server for continuous OpenCode discussion sessions"
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -0,0 +1,630 @@
1
+ """Tests for chunked subagent processing of large files."""
2
+
3
+ import json
4
+ import tempfile
5
+ import textwrap
6
+ from pathlib import Path
7
+ from unittest.mock import AsyncMock, patch
8
+
9
+ import pytest
10
+
11
+ from opencode_bridge.server import (
12
+ CHUNK_OVERLAP,
13
+ CHUNK_SIZE,
14
+ CHUNK_THRESHOLD,
15
+ MAX_TOTAL_CHUNKS,
16
+ OpenCodeBridge,
17
+ build_chunk_prompt,
18
+ build_synthesis_prompt,
19
+ chunk_file,
20
+ get_file_info,
21
+ _file_info_cache,
22
+ )
23
+
24
+
25
+ # ---------------------------------------------------------------------------
26
+ # Helpers
27
+ # ---------------------------------------------------------------------------
28
+
29
+ def _make_file(lines: int, *, with_boundaries: bool = False) -> str:
30
+ """Create a temp file with the given number of lines. Returns its path."""
31
+ tmp = tempfile.NamedTemporaryFile(
32
+ mode="w", suffix=".py", delete=False, prefix="test_chunk_"
33
+ )
34
+ for i in range(1, lines + 1):
35
+ if with_boundaries and i % 200 == 0:
36
+ tmp.write(f"\ndef function_{i}():\n")
37
+ else:
38
+ tmp.write(f"# line {i}\n")
39
+ tmp.close()
40
+ return tmp.name
41
+
42
+
43
+ # ---------------------------------------------------------------------------
44
+ # chunk_file
45
+ # ---------------------------------------------------------------------------
46
+
47
+ class TestChunkFile:
48
+ def test_small_file_single_chunk(self):
49
+ path = _make_file(100)
50
+ chunks = chunk_file(path, chunk_size=CHUNK_SIZE)
51
+ assert len(chunks) == 1
52
+ assert chunks[0]["chunk_index"] == 0
53
+ assert chunks[0]["total_chunks"] == 1
54
+ assert chunks[0]["start_line"] == 1
55
+ assert chunks[0]["end_line"] == 100
56
+
57
+ def test_large_file_multiple_chunks(self):
58
+ path = _make_file(2500)
59
+ chunks = chunk_file(path, chunk_size=800, overlap=20)
60
+ assert len(chunks) > 1
61
+ # All chunks should reference the same file
62
+ for c in chunks:
63
+ assert c["filepath"] == path
64
+ assert c["total_chunks"] == len(chunks)
65
+
66
+ def test_chunks_cover_full_file(self):
67
+ path = _make_file(3000)
68
+ chunks = chunk_file(path, chunk_size=800, overlap=20)
69
+ # First chunk starts at line 1
70
+ assert chunks[0]["start_line"] == 1
71
+ # Last chunk ends at the file's last line
72
+ assert chunks[-1]["end_line"] == 3000
73
+
74
+ def test_chunk_overlap(self):
75
+ path = _make_file(2000)
76
+ chunks = chunk_file(path, chunk_size=800, overlap=20)
77
+ for i in range(1, len(chunks)):
78
+ prev_end = chunks[i - 1]["end_line"]
79
+ cur_start = chunks[i]["start_line"]
80
+ # The start of the next chunk should overlap with the end of the previous
81
+ assert cur_start <= prev_end, (
82
+ f"Chunk {i} starts at {cur_start} but previous ends at {prev_end}"
83
+ )
84
+
85
+ def test_boundary_snapping(self):
86
+ """Chunks should prefer cutting near function definitions."""
87
+ path = _make_file(2000, with_boundaries=True)
88
+ chunks = chunk_file(path, chunk_size=800, overlap=20)
89
+ # With boundaries every 200 lines, cuts should snap near those points
90
+ assert len(chunks) >= 2
91
+
92
+ def test_chunk_indices_sequential(self):
93
+ path = _make_file(3000)
94
+ chunks = chunk_file(path, chunk_size=800, overlap=20)
95
+ for i, c in enumerate(chunks):
96
+ assert c["chunk_index"] == i
97
+
98
+ def test_empty_file(self):
99
+ tmp = tempfile.NamedTemporaryFile(
100
+ mode="w", suffix=".py", delete=False, prefix="test_empty_"
101
+ )
102
+ tmp.close()
103
+ chunks = chunk_file(tmp.name)
104
+ assert chunks == []
105
+
106
+ def test_nonexistent_file(self):
107
+ chunks = chunk_file("/nonexistent/file.py")
108
+ assert chunks == []
109
+
110
+ def test_content_preserved(self):
111
+ path = _make_file(100)
112
+ chunks = chunk_file(path, chunk_size=800)
113
+ original = Path(path).read_text()
114
+ assert chunks[0]["content"] == original
115
+
116
+
117
+ # ---------------------------------------------------------------------------
118
+ # build_chunk_prompt
119
+ # ---------------------------------------------------------------------------
120
+
121
+ class TestBuildChunkPrompt:
122
+ def test_includes_chunk_metadata(self):
123
+ chunk_info = {
124
+ "chunk_index": 2,
125
+ "total_chunks": 5,
126
+ "start_line": 1601,
127
+ "end_line": 2400,
128
+ "content": "...",
129
+ "filepath": "/tmp/test.py",
130
+ }
131
+ file_info = {"name": "test.py", "language": "Python", "lines": 5000}
132
+ prompt = build_chunk_prompt("Review this code", chunk_info, file_info)
133
+ assert "chunk 3 of 5" in prompt
134
+ assert "lines 1601" in prompt
135
+ assert "2400" in prompt
136
+ assert "test.py" in prompt
137
+ assert "Python" in prompt
138
+ assert "5000" in prompt
139
+
140
+ def test_includes_user_prompt(self):
141
+ chunk_info = {
142
+ "chunk_index": 0, "total_chunks": 1,
143
+ "start_line": 1, "end_line": 100,
144
+ "content": "...", "filepath": "/tmp/x.py",
145
+ }
146
+ file_info = {"name": "x.py", "language": "Python", "lines": 100}
147
+ prompt = build_chunk_prompt("Find security bugs", chunk_info, file_info)
148
+ assert "Find security bugs" in prompt
149
+
150
+ def test_review_mode_adds_categories(self):
151
+ chunk_info = {
152
+ "chunk_index": 0, "total_chunks": 1,
153
+ "start_line": 1, "end_line": 100,
154
+ "content": "...", "filepath": "/tmp/x.py",
155
+ }
156
+ file_info = {"name": "x.py", "language": "Python", "lines": 100}
157
+ prompt = build_chunk_prompt("Review", chunk_info, file_info, mode="review")
158
+ assert "bug" in prompt.lower()
159
+ assert "security" in prompt.lower()
160
+
161
+ def test_discuss_mode_no_categories(self):
162
+ chunk_info = {
163
+ "chunk_index": 0, "total_chunks": 1,
164
+ "start_line": 1, "end_line": 100,
165
+ "content": "...", "filepath": "/tmp/x.py",
166
+ }
167
+ file_info = {"name": "x.py", "language": "Python", "lines": 100}
168
+ prompt = build_chunk_prompt("Explain this", chunk_info, file_info, mode="discuss")
169
+ assert "Categorize findings" not in prompt
170
+
171
+
172
+ # ---------------------------------------------------------------------------
173
+ # build_synthesis_prompt
174
+ # ---------------------------------------------------------------------------
175
+
176
+ class TestBuildSynthesisPrompt:
177
+ def test_includes_all_chunk_responses(self):
178
+ results = [
179
+ {"chunk_index": 0, "file": "/tmp/a.py", "response": "Found bug on line 10", "error": None},
180
+ {"chunk_index": 1, "file": "/tmp/a.py", "response": "Performance issue at line 900", "error": None},
181
+ ]
182
+ file_infos = [{"name": "a.py", "lines": 2000}]
183
+ prompt = build_synthesis_prompt("Review code", results, file_infos)
184
+ assert "Found bug on line 10" in prompt
185
+ assert "Performance issue at line 900" in prompt
186
+ assert "Chunk 1" in prompt
187
+ assert "Chunk 2" in prompt
188
+
189
+ def test_marks_failed_chunks(self):
190
+ results = [
191
+ {"chunk_index": 0, "file": "/tmp/a.py", "response": "OK", "error": None},
192
+ {"chunk_index": 1, "file": "/tmp/a.py", "response": "", "error": "timeout"},
193
+ ]
194
+ file_infos = [{"name": "a.py", "lines": 2000}]
195
+ prompt = build_synthesis_prompt("Review", results, file_infos)
196
+ assert "analysis failed" in prompt
197
+ assert "timeout" in prompt
198
+
199
+ def test_includes_original_request(self):
200
+ results = [{"chunk_index": 0, "file": "/tmp/a.py", "response": "OK", "error": None}]
201
+ file_infos = [{"name": "a.py", "lines": 100}]
202
+ prompt = build_synthesis_prompt("Find SQL injections", results, file_infos)
203
+ assert "Find SQL injections" in prompt
204
+
205
+ def test_review_mode_adds_grouping(self):
206
+ results = [{"chunk_index": 0, "file": "/tmp/a.py", "response": "OK", "error": None}]
207
+ file_infos = [{"name": "a.py", "lines": 100}]
208
+ prompt = build_synthesis_prompt("Review", results, file_infos, mode="review")
209
+ assert "Group findings by category" in prompt
210
+
211
+
212
+ # ---------------------------------------------------------------------------
213
+ # _parse_opencode_response
214
+ # ---------------------------------------------------------------------------
215
+
216
+ class TestParseOpenCodeResponse:
217
+ def test_extracts_text_and_session_id(self):
218
+ lines = [
219
+ json.dumps({"sessionID": "sess-123"}),
220
+ json.dumps({"type": "text", "part": {"text": "Hello "}}),
221
+ json.dumps({"type": "text", "part": {"text": "World"}}),
222
+ ]
223
+ output = "\n".join(lines)
224
+ text, sid = OpenCodeBridge._parse_opencode_response(output)
225
+ assert text == "Hello World"
226
+ assert sid == "sess-123"
227
+
228
+ def test_no_session_id(self):
229
+ lines = [
230
+ json.dumps({"type": "text", "part": {"text": "Just text"}}),
231
+ ]
232
+ output = "\n".join(lines)
233
+ text, sid = OpenCodeBridge._parse_opencode_response(output)
234
+ assert text == "Just text"
235
+ assert sid is None
236
+
237
+ def test_skips_invalid_json(self):
238
+ output = "not json\n" + json.dumps({"type": "text", "part": {"text": "OK"}})
239
+ text, sid = OpenCodeBridge._parse_opencode_response(output)
240
+ assert text == "OK"
241
+
242
+ def test_empty_output(self):
243
+ text, sid = OpenCodeBridge._parse_opencode_response("")
244
+ assert text == ""
245
+ assert sid is None
246
+
247
+
248
+ # ---------------------------------------------------------------------------
249
+ # Integration: chunking gate in send_message / review_code
250
+ # ---------------------------------------------------------------------------
251
+
252
+ def _mock_opencode_response(text: str, session_id: str = "mock-sess") -> str:
253
+ """Build a mock JSON-lines response from opencode."""
254
+ lines = [
255
+ json.dumps({"sessionID": session_id}),
256
+ json.dumps({"type": "text", "part": {"text": text}}),
257
+ ]
258
+ return "\n".join(lines)
259
+
260
+
261
+ @pytest.fixture
262
+ def bridge():
263
+ b = OpenCodeBridge()
264
+ return b
265
+
266
+
267
+ class TestChunkingGateIntegration:
268
+ @pytest.mark.anyio
269
+ async def test_small_file_bypasses_chunking(self, bridge, tmp_path):
270
+ """Files under CHUNK_THRESHOLD should NOT trigger chunking."""
271
+ small = tmp_path / "small.py"
272
+ small.write_text("\n".join(f"# line {i}" for i in range(500)))
273
+
274
+ await bridge.start_session("test-small")
275
+
276
+ with patch.object(bridge, "_run_opencode", new_callable=AsyncMock) as mock_run:
277
+ mock_run.return_value = (_mock_opencode_response("review done"), 0)
278
+ with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
279
+ result = await bridge.send_message(
280
+ "Review this", files=[str(small)]
281
+ )
282
+ mock_chunked.assert_not_called()
283
+ assert "review done" in result
284
+
285
+ @pytest.mark.anyio
286
+ async def test_large_file_triggers_chunking(self, bridge, tmp_path):
287
+ """Files over CHUNK_THRESHOLD should trigger _run_chunked."""
288
+ large = tmp_path / "large.py"
289
+ large.write_text("\n".join(f"# line {i}" for i in range(CHUNK_THRESHOLD + 100)))
290
+
291
+ await bridge.start_session("test-large")
292
+
293
+ with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
294
+ mock_chunked.return_value = "chunked analysis done"
295
+ result = await bridge.send_message(
296
+ "Review this", files=[str(large)]
297
+ )
298
+ mock_chunked.assert_called_once()
299
+ assert "chunked analysis done" in result
300
+
301
+ @pytest.mark.anyio
302
+ async def test_review_code_large_file_triggers_chunking(self, bridge, tmp_path):
303
+ """review_code should also route large files through chunking."""
304
+ large = tmp_path / "big.py"
305
+ large.write_text("\n".join(f"# line {i}" for i in range(CHUNK_THRESHOLD + 100)))
306
+
307
+ await bridge.start_session("test-review-large")
308
+
309
+ with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
310
+ mock_chunked.return_value = "chunked review done"
311
+ result = await bridge.review_code(str(large))
312
+ mock_chunked.assert_called_once()
313
+ assert "chunked review done" in result
314
+
315
+ @pytest.mark.anyio
316
+ async def test_review_code_small_file_no_chunking(self, bridge, tmp_path):
317
+ """Small files in review_code should not trigger chunking."""
318
+ small = tmp_path / "tiny.py"
319
+ small.write_text("\n".join(f"# line {i}" for i in range(200)))
320
+
321
+ await bridge.start_session("test-review-small")
322
+
323
+ with patch.object(bridge, "_run_opencode", new_callable=AsyncMock) as mock_run:
324
+ mock_run.return_value = (_mock_opencode_response("looks good"), 0)
325
+ with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
326
+ result = await bridge.review_code(str(small))
327
+ mock_chunked.assert_not_called()
328
+
329
+
330
+ # ---------------------------------------------------------------------------
331
+ # Integration: _run_chunked map-reduce
332
+ # ---------------------------------------------------------------------------
333
+
334
+ class TestRunChunked:
335
+ @pytest.mark.anyio
336
+ async def test_successful_chunked_processing(self, bridge, tmp_path):
337
+ """Full map-reduce pipeline with mocked _run_opencode."""
338
+ large = tmp_path / "big.py"
339
+ large.write_text("\n".join(f"# line {i}" for i in range(2500)))
340
+
341
+ await bridge.start_session("test-chunked")
342
+ session = bridge.sessions["test-chunked"]
343
+
344
+ call_count = 0
345
+
346
+ async def mock_run(*args, timeout=300):
347
+ nonlocal call_count
348
+ call_count += 1
349
+ # Chunk calls return chunk analysis; synthesis call returns final
350
+ return (_mock_opencode_response(f"analysis-{call_count}"), 0)
351
+
352
+ with patch.object(bridge, "_run_opencode", side_effect=mock_run):
353
+ result = await bridge._run_chunked(
354
+ "Review this code", [str(large)], session, mode="review"
355
+ )
356
+
357
+ # Should have made multiple calls (chunks + synthesis)
358
+ assert call_count > 2
359
+ assert result # non-empty
360
+
361
+ @pytest.mark.anyio
362
+ async def test_majority_failure_returns_error(self, bridge, tmp_path):
363
+ """If >50% of chunks fail, return an error message."""
364
+ large = tmp_path / "big.py"
365
+ large.write_text("\n".join(f"# line {i}" for i in range(2500)))
366
+
367
+ await bridge.start_session("test-fail")
368
+ session = bridge.sessions["test-fail"]
369
+
370
+ # All calls fail
371
+ async def mock_fail(*args, timeout=300):
372
+ return ("error: context_length_exceeded", 1)
373
+
374
+ with patch.object(bridge, "_run_opencode", side_effect=mock_fail):
375
+ result = await bridge._run_chunked(
376
+ "Review", [str(large)], session
377
+ )
378
+
379
+ assert "failed" in result.lower()
380
+
381
+ @pytest.mark.anyio
382
+ async def test_synthesis_failure_falls_back(self, bridge, tmp_path):
383
+ """If synthesis fails, fall back to concatenated chunk results."""
384
+ large = tmp_path / "big.py"
385
+ large.write_text("\n".join(f"# line {i}" for i in range(2500)))
386
+
387
+ await bridge.start_session("test-fallback")
388
+ session = bridge.sessions["test-fallback"]
389
+
390
+ chunks = chunk_file(str(large), CHUNK_SIZE, CHUNK_OVERLAP)
391
+ num_chunks = len(chunks)
392
+
393
+ # Track calls to distinguish chunk calls from synthesis call.
394
+ # Chunk calls have a --file arg pointing to a temp chunk file;
395
+ # the synthesis call does NOT attach a chunk temp file.
396
+ async def mock_run(*args, timeout=300):
397
+ # The synthesis prompt includes "Synthesize" — detect it
398
+ prompt_arg = args[1] if len(args) > 1 else ""
399
+ if "Synthesize" in prompt_arg or "Chunk Analyses" in prompt_arg:
400
+ return ("synthesis error", 1)
401
+ return (_mock_opencode_response("chunk-ok"), 0)
402
+
403
+ with patch.object(bridge, "_run_opencode", side_effect=mock_run):
404
+ result = await bridge._run_chunked(
405
+ "Review", [str(large)], session
406
+ )
407
+
408
+ # Should contain raw chunk results as fallback
409
+ assert "Synthesis failed" in result
410
+ assert "chunk-ok" in result
411
+
412
+ @pytest.mark.anyio
413
+ async def test_partial_chunk_failure_still_synthesizes(self, bridge, tmp_path):
414
+ """If some chunks fail but not majority, synthesis should still run."""
415
+ large = tmp_path / "big.py"
416
+ large.write_text("\n".join(f"# line {i}" for i in range(2500)))
417
+
418
+ await bridge.start_session("test-partial")
419
+ session = bridge.sessions["test-partial"]
420
+
421
+ first_call_done = False
422
+
423
+ async def mock_run(*args, timeout=300):
424
+ nonlocal first_call_done
425
+ prompt_arg = args[1] if len(args) > 1 else ""
426
+ # Synthesis call — always succeed
427
+ if "Synthesize" in prompt_arg or "Chunk Analyses" in prompt_arg:
428
+ return (_mock_opencode_response("synthesized"), 0)
429
+ # Fail just the first chunk call
430
+ if not first_call_done:
431
+ first_call_done = True
432
+ return ("error", 1)
433
+ return (_mock_opencode_response("chunk-ok"), 0)
434
+
435
+ with patch.object(bridge, "_run_opencode", side_effect=mock_run):
436
+ result = await bridge._run_chunked(
437
+ "Review", [str(large)], session
438
+ )
439
+
440
+ # Should have gotten a synthesis result (not a total failure)
441
+ assert "synthesized" in result or "chunk-ok" in result
442
+
443
+
444
+ # ---------------------------------------------------------------------------
445
+ # Edge cases
446
+ # ---------------------------------------------------------------------------
447
+
448
+ class TestChunkEdgeCases:
449
+ def test_exactly_at_threshold(self):
450
+ """A file with exactly CHUNK_THRESHOLD lines should produce one chunk."""
451
+ path = _make_file(CHUNK_THRESHOLD)
452
+ chunks = chunk_file(path, chunk_size=CHUNK_SIZE)
453
+ # CHUNK_THRESHOLD (2000) > CHUNK_SIZE (800) → should produce multiple chunks
454
+ assert len(chunks) > 1
455
+
456
+ def test_one_line_over_threshold(self):
457
+ """CHUNK_THRESHOLD+1 lines should chunk properly."""
458
+ path = _make_file(CHUNK_THRESHOLD + 1)
459
+ chunks = chunk_file(path, chunk_size=CHUNK_SIZE)
460
+ assert len(chunks) > 1
461
+ assert chunks[-1]["end_line"] == CHUNK_THRESHOLD + 1
462
+
463
+ def test_single_line_file(self):
464
+ tmp = tempfile.NamedTemporaryFile(
465
+ mode="w", suffix=".py", delete=False, prefix="test_one_"
466
+ )
467
+ tmp.write("x = 1\n")
468
+ tmp.close()
469
+ chunks = chunk_file(tmp.name)
470
+ assert len(chunks) == 1
471
+ assert chunks[0]["start_line"] == 1
472
+ assert chunks[0]["end_line"] == 1
473
+
474
+ def test_very_large_file_chunk_count(self):
475
+ """A huge file should produce a reasonable number of chunks."""
476
+ path = _make_file(10000)
477
+ chunks = chunk_file(path, chunk_size=800, overlap=20)
478
+ # ~14 chunks (boundary snapping may reduce effective chunk size)
479
+ assert 10 <= len(chunks) <= 25
480
+
481
+ def test_binary_file_does_not_crash(self):
482
+ """Binary file with replacement errors should not crash."""
483
+ tmp = tempfile.NamedTemporaryFile(
484
+ mode="wb", suffix=".bin", delete=False, prefix="test_bin_"
485
+ )
486
+ tmp.write(b"\x00\xff" * 5000 + b"\n" * 100)
487
+ tmp.close()
488
+ chunks = chunk_file(tmp.name)
489
+ # Should produce something without crashing
490
+ assert isinstance(chunks, list)
491
+
492
+ def test_file_info_cache_not_stale(self, tmp_path):
493
+ """get_file_info cache shouldn't interfere with chunk decisions."""
494
+ f = tmp_path / "grow.py"
495
+ f.write_text("\n".join(f"# {i}" for i in range(100)))
496
+ info1 = get_file_info(str(f))
497
+ assert info1["lines"] == 100
498
+
499
+ # Clear cache so re-read picks up new size
500
+ _file_info_cache.pop(str(f.resolve()), None)
501
+
502
+ f.write_text("\n".join(f"# {i}" for i in range(3000)))
503
+ info2 = get_file_info(str(f))
504
+ assert info2["lines"] == 3000
505
+
506
+ def test_chunk_content_has_correct_lines(self):
507
+ """Verify each chunk's content actually matches its line range."""
508
+ path = _make_file(2000)
509
+ all_lines = Path(path).read_text().splitlines(keepends=True)
510
+ chunks = chunk_file(path, chunk_size=800, overlap=20)
511
+ for c in chunks:
512
+ start = c["start_line"] - 1 # 0-indexed
513
+ end = c["end_line"]
514
+ expected = "".join(all_lines[start:end])
515
+ assert c["content"] == expected, (
516
+ f"Chunk {c['chunk_index']} content mismatch: "
517
+ f"lines {c['start_line']}-{c['end_line']}"
518
+ )
519
+
520
+ def test_no_content_loss_across_chunks(self):
521
+ """Union of all chunk line ranges should cover every line in the file."""
522
+ path = _make_file(3000)
523
+ chunks = chunk_file(path, chunk_size=800, overlap=20)
524
+ covered = set()
525
+ for c in chunks:
526
+ for line in range(c["start_line"], c["end_line"] + 1):
527
+ covered.add(line)
528
+ assert covered == set(range(1, 3001))
529
+
530
+
531
+ class TestChunkGateBoundary:
532
+ """Test the exact boundary where chunking kicks in."""
533
+
534
+ @pytest.mark.anyio
535
+ async def test_file_at_threshold_no_chunking_in_send(self, bridge, tmp_path):
536
+ """File with exactly CHUNK_THRESHOLD lines should NOT trigger chunking
537
+ in send_message (gate is strictly >)."""
538
+ f = tmp_path / "exact.py"
539
+ f.write_text("\n".join(f"# {i}" for i in range(CHUNK_THRESHOLD)))
540
+
541
+ await bridge.start_session("test-exact")
542
+
543
+ with patch.object(bridge, "_run_opencode", new_callable=AsyncMock) as mock_run:
544
+ mock_run.return_value = (_mock_opencode_response("normal"), 0)
545
+ with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
546
+ result = await bridge.send_message("Review", files=[str(f)])
547
+ mock_chunked.assert_not_called()
548
+ assert "normal" in result
549
+
550
+ @pytest.mark.anyio
551
+ async def test_file_one_over_threshold_triggers_chunking(self, bridge, tmp_path):
552
+ """File with CHUNK_THRESHOLD+1 lines SHOULD trigger chunking."""
553
+ f = tmp_path / "over.py"
554
+ f.write_text("\n".join(f"# {i}" for i in range(CHUNK_THRESHOLD + 1)))
555
+
556
+ await bridge.start_session("test-over")
557
+
558
+ with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
559
+ mock_chunked.return_value = "chunked"
560
+ result = await bridge.send_message("Review", files=[str(f)])
561
+ mock_chunked.assert_called_once()
562
+
563
+ @pytest.mark.anyio
564
+ async def test_multiple_small_files_no_chunking(self, bridge, tmp_path):
565
+ """Multiple small files (each under threshold) should NOT trigger chunking,
566
+ even if total lines exceed threshold."""
567
+ f1 = tmp_path / "a.py"
568
+ f2 = tmp_path / "b.py"
569
+ f1.write_text("\n".join(f"# {i}" for i in range(1500)))
570
+ f2.write_text("\n".join(f"# {i}" for i in range(1500)))
571
+
572
+ await bridge.start_session("test-multi-small")
573
+
574
+ with patch.object(bridge, "_run_opencode", new_callable=AsyncMock) as mock_run:
575
+ mock_run.return_value = (_mock_opencode_response("ok"), 0)
576
+ with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
577
+ result = await bridge.send_message(
578
+ "Review", files=[str(f1), str(f2)]
579
+ )
580
+ mock_chunked.assert_not_called()
581
+
582
+ @pytest.mark.anyio
583
+ async def test_mix_small_and_large_triggers_chunking(self, bridge, tmp_path):
584
+ """If ANY file exceeds threshold, chunking should trigger."""
585
+ small = tmp_path / "small.py"
586
+ large = tmp_path / "large.py"
587
+ small.write_text("\n".join(f"# {i}" for i in range(100)))
588
+ large.write_text("\n".join(f"# {i}" for i in range(CHUNK_THRESHOLD + 1)))
589
+
590
+ await bridge.start_session("test-mix")
591
+
592
+ with patch.object(bridge, "_run_chunked", new_callable=AsyncMock) as mock_chunked:
593
+ mock_chunked.return_value = "chunked"
594
+ result = await bridge.send_message(
595
+ "Review", files=[str(small), str(large)]
596
+ )
597
+ mock_chunked.assert_called_once()
598
+
599
+
600
+ class TestRunChunkedSmallFilesPassthrough:
601
+ """Verify small files are passed to synthesis call as context."""
602
+
603
+ @pytest.mark.anyio
604
+ async def test_small_files_attached_to_synthesis(self, bridge, tmp_path):
605
+ small = tmp_path / "helper.py"
606
+ large = tmp_path / "main.py"
607
+ small.write_text("\n".join(f"# {i}" for i in range(100)))
608
+ large.write_text("\n".join(f"# {i}" for i in range(2500)))
609
+
610
+ await bridge.start_session("test-passthrough")
611
+ session = bridge.sessions["test-passthrough"]
612
+
613
+ synthesis_args = []
614
+
615
+ async def mock_run(*args, timeout=300):
616
+ prompt_arg = args[1] if len(args) > 1 else ""
617
+ if "Synthesize" in prompt_arg or "Chunk Analyses" in prompt_arg:
618
+ synthesis_args.extend(args)
619
+ return (_mock_opencode_response("final"), 0)
620
+ return (_mock_opencode_response("chunk-ok"), 0)
621
+
622
+ with patch.object(bridge, "_run_opencode", side_effect=mock_run):
623
+ result = await bridge._run_chunked(
624
+ "Review", [str(small), str(large)], session
625
+ )
626
+
627
+ # The synthesis call should have --file pointing to the small file
628
+ assert str(small) in synthesis_args, (
629
+ f"Small file not passed to synthesis. Args: {synthesis_args}"
630
+ )
File without changes