opencode-bridge 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
opencode_bridge/server.py CHANGED
@@ -16,6 +16,7 @@ Configuration:
16
16
  """
17
17
 
18
18
  import os
19
+ import re
19
20
  import json
20
21
  import asyncio
21
22
  import shutil
@@ -35,6 +36,13 @@ SMALL_FILE = 500 # lines
35
36
  MEDIUM_FILE = 1500 # lines
36
37
  LARGE_FILE = 5000 # lines
37
38
 
39
+ # Chunked processing thresholds
40
+ CHUNK_THRESHOLD = 2000 # lines — files above this get chunked
41
+ CHUNK_SIZE = 800 # lines per chunk
42
+ CHUNK_OVERLAP = 20 # overlap between adjacent chunks
43
+ MAX_PARALLEL_CHUNKS = 6 # concurrency limit
44
+ MAX_TOTAL_CHUNKS = 20 # safety cap
45
+
38
46
  # Language detection by extension
39
47
  LANG_MAP = {
40
48
  ".py": "Python", ".js": "JavaScript", ".ts": "TypeScript", ".tsx": "TypeScript/React",
@@ -304,6 +312,174 @@ def build_companion_prompt(
304
312
  return "\n".join(parts)
305
313
 
306
314
 
315
+ # ---------------------------------------------------------------------------
316
+ # Chunked Processing — map-reduce for large files
317
+ # ---------------------------------------------------------------------------
318
+
319
+ # Regex for natural code boundaries (language-agnostic)
320
+ _BOUNDARY_RE = re.compile(
321
+ r"^(?:\s*$" # blank line
322
+ r"|(?:def |class |function |func |fn |pub fn |impl |module |package )" # definitions
323
+ r"|(?:})\s*$" # closing brace on its own line
324
+ r"|(?://|#|/\*|\*/).{0,80}$" # comment lines
325
+ r")",
326
+ re.MULTILINE,
327
+ )
328
+
329
+
330
+ def chunk_file(
331
+ filepath: str,
332
+ chunk_size: int = CHUNK_SIZE,
333
+ overlap: int = CHUNK_OVERLAP,
334
+ ) -> list[dict]:
335
+ """Split a file into overlapping chunks with boundary snapping.
336
+
337
+ Returns a list of dicts with keys:
338
+ chunk_index, total_chunks, start_line, end_line, content, filepath
339
+ """
340
+ p = Path(filepath)
341
+ try:
342
+ lines = p.read_text(errors="replace").splitlines(keepends=True)
343
+ except Exception:
344
+ return []
345
+
346
+ total = len(lines)
347
+ if total == 0:
348
+ return []
349
+ if total <= chunk_size:
350
+ return [{
351
+ "chunk_index": 0,
352
+ "total_chunks": 1,
353
+ "start_line": 1,
354
+ "end_line": total,
355
+ "content": "".join(lines),
356
+ "filepath": str(p),
357
+ }]
358
+
359
+ chunks: list[dict] = []
360
+ pos = 0
361
+ while pos < total:
362
+ end = min(pos + chunk_size, total)
363
+
364
+ # Snap to a natural boundary within ±50 lines of the cut point
365
+ if end < total:
366
+ best = end
367
+ scan_start = max(end - 50, pos + chunk_size // 2)
368
+ scan_end = min(end + 50, total)
369
+ for i in range(scan_start, scan_end):
370
+ if _BOUNDARY_RE.match(lines[i]):
371
+ best = i + 1 # include the boundary line in this chunk
372
+ break
373
+ end = best
374
+
375
+ chunk_content = "".join(lines[pos:end])
376
+ chunks.append({
377
+ "chunk_index": len(chunks),
378
+ "total_chunks": -1, # filled in below
379
+ "start_line": pos + 1, # 1-indexed
380
+ "end_line": end,
381
+ "content": chunk_content,
382
+ "filepath": str(p),
383
+ })
384
+
385
+ # Advance: overlap with previous chunk, but stop if we've reached the end
386
+ if end >= total:
387
+ break
388
+ pos = max(end - overlap, pos + 1)
389
+
390
+ # Fill in total_chunks
391
+ for c in chunks:
392
+ c["total_chunks"] = len(chunks)
393
+
394
+ return chunks
395
+
396
+
397
+ def build_chunk_prompt(
398
+ user_prompt: str,
399
+ chunk_info: dict,
400
+ file_info: dict,
401
+ mode: str = "discuss",
402
+ ) -> str:
403
+ """Build a focused prompt for analyzing a single file chunk."""
404
+ name = file_info.get("name", Path(chunk_info["filepath"]).name)
405
+ language = file_info.get("language", "Unknown")
406
+ total_lines = file_info.get("lines", "?")
407
+ idx = chunk_info["chunk_index"] + 1
408
+ total = chunk_info["total_chunks"]
409
+ start = chunk_info["start_line"]
410
+ end = chunk_info["end_line"]
411
+
412
+ parts = [
413
+ f"You are analyzing **chunk {idx} of {total}** from `{name}` "
414
+ f"({language}, {total_lines} total lines).",
415
+ f"This chunk covers **lines {start}–{end}**.",
416
+ "",
417
+ "## Task",
418
+ user_prompt,
419
+ "",
420
+ "## Instructions",
421
+ "- Focus ONLY on the code in this chunk",
422
+ "- Note any references to code that might exist outside this chunk",
423
+ "- Be concise — your output will be combined with analyses of other chunks",
424
+ "- Include line numbers for any issues found",
425
+ ]
426
+
427
+ if mode == "review":
428
+ parts.append("- Categorize findings as: bug, security, design, performance, or style")
429
+
430
+ return "\n".join(parts)
431
+
432
+
433
+ def build_synthesis_prompt(
434
+ user_prompt: str,
435
+ chunk_results: list[dict],
436
+ file_infos: list[dict],
437
+ mode: str = "discuss",
438
+ ) -> str:
439
+ """Build a prompt that merges chunk analyses into one coherent response."""
440
+ file_desc = ", ".join(
441
+ f"`{i.get('name', '?')}` ({i.get('lines', '?')} lines)"
442
+ for i in file_infos
443
+ )
444
+ n = len(chunk_results)
445
+
446
+ parts = [
447
+ f"You analyzed a large file in **{n} chunks**. "
448
+ "Synthesize the chunk analyses below into one coherent response.",
449
+ "",
450
+ "## Original Request",
451
+ user_prompt,
452
+ "",
453
+ "## Files Analyzed",
454
+ file_desc,
455
+ "",
456
+ "## Chunk Analyses",
457
+ ]
458
+
459
+ for cr in sorted(chunk_results, key=lambda c: c.get("chunk_index", 0)):
460
+ idx = cr.get("chunk_index", 0) + 1
461
+ fp = Path(cr.get("file", "")).name
462
+ response = cr.get("response", "[analysis failed]")
463
+ if cr.get("error"):
464
+ response = f"[analysis failed: {cr['error']}]"
465
+ parts.append(f"\n### Chunk {idx} — `{fp}`")
466
+ parts.append(response)
467
+
468
+ parts.extend([
469
+ "",
470
+ "## Instructions",
471
+ "- Combine findings and remove duplicates (chunks overlap slightly)",
472
+ "- Organize by importance, not by chunk order",
473
+ "- Preserve line number references from the original analyses",
474
+ "- Provide an overall assessment at the top",
475
+ ])
476
+
477
+ if mode == "review":
478
+ parts.append("- Group findings by category: bugs, security, design, performance, style")
479
+
480
+ return "\n".join(parts)
481
+
482
+
307
483
  # Default configuration
308
484
  DEFAULT_MODEL = "openai/gpt-5.2-codex"
309
485
  DEFAULT_AGENT = "plan"
@@ -470,6 +646,182 @@ class OpenCodeBridge:
470
646
  except Exception as e:
471
647
  return f"Error: {e}", 1
472
648
 
649
+ @staticmethod
650
+ def _parse_opencode_response(output: str) -> tuple[str, Optional[str]]:
651
+ """Parse JSON-lines output from opencode CLI.
652
+
653
+ Returns (reply_text, session_id).
654
+ """
655
+ reply_parts: list[str] = []
656
+ session_id: Optional[str] = None
657
+ for line in output.split("\n"):
658
+ if not line:
659
+ continue
660
+ try:
661
+ event = json.loads(line)
662
+ if not session_id and "sessionID" in event:
663
+ session_id = event["sessionID"]
664
+ if event.get("type") == "text":
665
+ text = event.get("part", {}).get("text", "")
666
+ if text:
667
+ reply_parts.append(text)
668
+ except json.JSONDecodeError:
669
+ continue
670
+ return "".join(reply_parts), session_id
671
+
672
+ async def _run_chunk(
673
+ self,
674
+ chunk_info: dict,
675
+ file_info: dict,
676
+ user_prompt: str,
677
+ session: "Session",
678
+ mode: str = "discuss",
679
+ ) -> dict:
680
+ """Process a single file chunk through OpenCode (stateless)."""
681
+ result = {
682
+ "chunk_index": chunk_info["chunk_index"],
683
+ "file": chunk_info["filepath"],
684
+ "response": "",
685
+ "error": None,
686
+ }
687
+
688
+ # Write chunk to a temp file preserving the original extension
689
+ ext = Path(chunk_info["filepath"]).suffix or ".txt"
690
+ tmp = None
691
+ try:
692
+ tmp = tempfile.NamedTemporaryFile(
693
+ mode="w", suffix=ext, delete=False, prefix="opencode_chunk_"
694
+ )
695
+ tmp.write(chunk_info["content"])
696
+ tmp.close()
697
+
698
+ prompt = build_chunk_prompt(user_prompt, chunk_info, file_info, mode)
699
+
700
+ args = [
701
+ "run", prompt,
702
+ "--model", session.model,
703
+ "--agent", session.agent,
704
+ "--file", tmp.name,
705
+ "--format", "json",
706
+ ]
707
+ if session.variant:
708
+ args.extend(["--variant", session.variant])
709
+
710
+ output, code = await self._run_opencode(*args, timeout=300)
711
+
712
+ if code != 0:
713
+ result["error"] = output[:500]
714
+ return result
715
+
716
+ reply, _ = self._parse_opencode_response(output)
717
+ result["response"] = reply or "[no response]"
718
+
719
+ except Exception as e:
720
+ result["error"] = str(e)
721
+ finally:
722
+ if tmp:
723
+ try:
724
+ os.unlink(tmp.name)
725
+ except OSError:
726
+ pass
727
+ return result
728
+
729
+ async def _run_chunked(
730
+ self,
731
+ user_prompt: str,
732
+ files: list[str],
733
+ session: "Session",
734
+ mode: str = "discuss",
735
+ ) -> str:
736
+ """Map-reduce orchestrator: chunk large files, process in parallel, synthesize."""
737
+ small_files: list[str] = []
738
+ all_chunks: list[tuple[dict, dict]] = [] # (chunk_info, file_info)
739
+
740
+ for f in files:
741
+ info = get_file_info(f)
742
+ line_count = info.get("lines", 0)
743
+ if line_count > CHUNK_THRESHOLD:
744
+ chunks = chunk_file(f, CHUNK_SIZE, CHUNK_OVERLAP)
745
+ for c in chunks:
746
+ all_chunks.append((c, info))
747
+ else:
748
+ small_files.append(f)
749
+
750
+ # Safety: if too many chunks, increase chunk size and re-chunk
751
+ if len(all_chunks) > MAX_TOTAL_CHUNKS:
752
+ all_chunks = []
753
+ bigger = CHUNK_SIZE * 2
754
+ for f in files:
755
+ info = get_file_info(f)
756
+ if info.get("lines", 0) > CHUNK_THRESHOLD:
757
+ chunks = chunk_file(f, bigger, CHUNK_OVERLAP)
758
+ for c in chunks:
759
+ all_chunks.append((c, info))
760
+ # small_files already collected above
761
+
762
+ if not all_chunks:
763
+ return "No chunks to process."
764
+
765
+ # --- Map phase: run chunks in parallel ---
766
+ sem = asyncio.Semaphore(MAX_PARALLEL_CHUNKS)
767
+
768
+ async def _limited(chunk_info: dict, file_info: dict) -> dict:
769
+ async with sem:
770
+ return await self._run_chunk(chunk_info, file_info, user_prompt, session, mode)
771
+
772
+ tasks = [_limited(ci, fi) for ci, fi in all_chunks]
773
+ chunk_results: list[dict] = await asyncio.gather(*tasks)
774
+
775
+ # Check failure rate
776
+ failed = sum(1 for cr in chunk_results if cr.get("error"))
777
+ if failed > len(chunk_results) / 2:
778
+ return (
779
+ f"Chunked analysis failed: {failed}/{len(chunk_results)} chunks errored. "
780
+ "Try with a smaller file or increase the chunk size."
781
+ )
782
+
783
+ # --- Reduce phase: synthesize ---
784
+ file_infos = []
785
+ seen_paths: set[str] = set()
786
+ for _, fi in all_chunks:
787
+ fp = fi.get("path", "")
788
+ if fp not in seen_paths:
789
+ seen_paths.add(fp)
790
+ file_infos.append(fi)
791
+
792
+ synthesis_prompt = build_synthesis_prompt(user_prompt, chunk_results, file_infos, mode)
793
+
794
+ # Attach small files for reference context (not the large ones)
795
+ args = [
796
+ "run", synthesis_prompt,
797
+ "--model", session.model,
798
+ "--agent", session.agent,
799
+ "--format", "json",
800
+ ]
801
+ if session.variant:
802
+ args.extend(["--variant", session.variant])
803
+ for sf in small_files:
804
+ args.extend(["--file", sf])
805
+
806
+ # Longer timeout for synthesis
807
+ output, code = await self._run_opencode(*args, timeout=600)
808
+
809
+ if code != 0:
810
+ # Fallback: concatenate raw chunk results
811
+ parts = ["*Synthesis failed — showing raw chunk analyses:*\n"]
812
+ for cr in sorted(chunk_results, key=lambda c: c.get("chunk_index", 0)):
813
+ idx = cr.get("chunk_index", 0) + 1
814
+ fp = Path(cr.get("file", "")).name
815
+ parts.append(f"\n### Chunk {idx} — `{fp}`")
816
+ if cr.get("error"):
817
+ parts.append(f"[error: {cr['error']}]")
818
+ else:
819
+ parts.append(cr.get("response", "[no response]"))
820
+ return "\n".join(parts)
821
+
822
+ reply, _ = self._parse_opencode_response(output)
823
+ return reply or "No response from synthesis."
824
+
473
825
  async def list_models(self, provider: Optional[str] = None) -> str:
474
826
  """List available models from OpenCode."""
475
827
  args = ["models"]
@@ -599,6 +951,27 @@ Set via:
599
951
  temp_file.close()
600
952
  files = (files or []) + [temp_file.name]
601
953
 
954
+ # --- Chunking gate: large user files get map-reduce processing ---
955
+ user_files = [f for f in files if not Path(f).name.startswith("opencode_msg_")]
956
+ needs_chunking = any(
957
+ get_file_info(f).get("lines", 0) > CHUNK_THRESHOLD
958
+ for f in user_files
959
+ )
960
+
961
+ if needs_chunking:
962
+ reply = await self._run_chunked(message, user_files, session, mode="discuss")
963
+ # Cleanup temp file
964
+ try:
965
+ os.unlink(temp_file.name)
966
+ except OSError:
967
+ pass
968
+ if reply:
969
+ session.add_message("assistant", reply)
970
+ session.save(self.sessions_dir / f"{sid}.json")
971
+ return reply or "No response received"
972
+
973
+ # --- Normal (non-chunked) path ---
974
+
602
975
  # Build prompt: companion system unless _raw is set
603
976
  if _raw:
604
977
  run_prompt = build_message_prompt(message, files)
@@ -631,7 +1004,6 @@ Set via:
631
1004
  args.extend(["--format", "json"])
632
1005
 
633
1006
  # Scale timeout based on attached file size
634
- user_files = [f for f in files if not Path(f).name.startswith("opencode_msg_")]
635
1007
  total_lines = sum(get_file_info(f).get("lines", 0) for f in user_files)
636
1008
  # Base 300s, +60s per 1000 lines above threshold, capped at 900s
637
1009
  timeout = min(900, 300 + max(0, (total_lines - MEDIUM_FILE) * 60 // 1000))
@@ -649,22 +1021,10 @@ Set via:
649
1021
  return f"Error: {output}"
650
1022
 
651
1023
  # Parse JSON events for session ID and text
652
- reply_parts = []
653
- for line in output.split("\n"):
654
- if not line:
655
- continue
656
- try:
657
- event = json.loads(line)
658
- if not session.opencode_session_id and "sessionID" in event:
659
- session.opencode_session_id = event["sessionID"]
660
- if event.get("type") == "text":
661
- text = event.get("part", {}).get("text", "")
662
- if text:
663
- reply_parts.append(text)
664
- except json.JSONDecodeError:
665
- continue
1024
+ reply, new_session_id = self._parse_opencode_response(output)
1025
+ if new_session_id and not session.opencode_session_id:
1026
+ session.opencode_session_id = new_session_id
666
1027
 
667
- reply = "".join(reply_parts)
668
1028
  if reply:
669
1029
  session.add_message("assistant", reply)
670
1030
 
@@ -737,10 +1097,16 @@ Set via:
737
1097
  files = file_paths
738
1098
  file_infos = [get_file_info(f) for f in file_paths]
739
1099
  file_infos = [i for i in file_infos if i]
1100
+ total_lines = sum(i.get("lines", 0) for i in file_infos)
1101
+
1102
+ # Chunking gate for large reviews
1103
+ if total_lines > CHUNK_THRESHOLD:
1104
+ prompt = build_review_prompt(file_infos, focus)
1105
+ return await self._run_chunked(prompt, file_paths, self.sessions[sid], mode="review")
1106
+
740
1107
  prompt = build_review_prompt(file_infos, focus)
741
1108
 
742
1109
  # Increase timeout for large files
743
- total_lines = sum(i.get("lines", 0) for i in file_infos)
744
1110
  if total_lines > LARGE_FILE:
745
1111
  # Use variant=high for large reviews if not already high+
746
1112
  session = self.sessions[sid]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: opencode-bridge
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: MCP server for continuous OpenCode discussion sessions
5
5
  Project-URL: Repository, https://github.com/genomewalker/opencode-bridge
6
6
  Author: Antonio Fernandez-Guerra
@@ -0,0 +1,7 @@
1
+ opencode_bridge/__init__.py,sha256=SkXVg907MuInd7UEYOjHjiiIIT46y4S2l20hE9cShKo,92
2
+ opencode_bridge/install.py,sha256=VOJNYUPxq88g0XizkHSQ9noM3Qcd3AfZxPUZInEKErk,1796
3
+ opencode_bridge/server.py,sha256=aQMfcDrQp4Z8ctTIldWf7ezfphaa5375yFt5UKnYkGk,57247
4
+ opencode_bridge-0.4.1.dist-info/METADATA,sha256=IaEiqeUE81_r8JvoN-Pn73Ha7RmkxrMs8mS2dYHgF28,3924
5
+ opencode_bridge-0.4.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
+ opencode_bridge-0.4.1.dist-info/entry_points.txt,sha256=8elAgeI-Sk7EPoV7kUr3CCgQyIAW2VfDj5ZXQ_9slCc,184
7
+ opencode_bridge-0.4.1.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- opencode_bridge/__init__.py,sha256=SkXVg907MuInd7UEYOjHjiiIIT46y4S2l20hE9cShKo,92
2
- opencode_bridge/install.py,sha256=VOJNYUPxq88g0XizkHSQ9noM3Qcd3AfZxPUZInEKErk,1796
3
- opencode_bridge/server.py,sha256=Cu7AKdUMCrSxvKpHPP3WjM4ivyqqWEcuGhKzmV1tY24,44797
4
- opencode_bridge-0.3.0.dist-info/METADATA,sha256=C-IjnIrmOC4w87ZgJjc-ZC0xZDhqQh97A9mo52WfgbI,3924
5
- opencode_bridge-0.3.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
6
- opencode_bridge-0.3.0.dist-info/entry_points.txt,sha256=8elAgeI-Sk7EPoV7kUr3CCgQyIAW2VfDj5ZXQ_9slCc,184
7
- opencode_bridge-0.3.0.dist-info/RECORD,,